From 887e6ee81ba172374bda9441360a6a76d668647e Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Fri, 29 Nov 2024 17:11:24 +0000 Subject: [PATCH] feat: add paddedconv implementation --- include/aidge/backend/cpu.hpp | 1 + .../backend/cpu/operator/PaddedConvImpl.hpp | 59 +++++ .../cpu/operator/PaddedConvImpl_kernels.hpp | 228 ++++++++++++++++++ src/operator/PaddedConvImpl.cpp | 128 ++++++++++ 4 files changed, 416 insertions(+) create mode 100644 include/aidge/backend/cpu/operator/PaddedConvImpl.hpp create mode 100644 include/aidge/backend/cpu/operator/PaddedConvImpl_kernels.hpp create mode 100644 src/operator/PaddedConvImpl.cpp diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index a4586edf..caa75328 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -37,6 +37,7 @@ #include "aidge/backend/cpu/operator/MatMulImpl.hpp" #include "aidge/backend/cpu/operator/MulImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl.hpp" +#include "aidge/backend/cpu/operator/PaddedConvImpl.hpp" #include "aidge/backend/cpu/operator/PowImpl.hpp" #include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp" #include "aidge/backend/cpu/operator/ReduceSumImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/PaddedConvImpl.hpp b/include/aidge/backend/cpu/operator/PaddedConvImpl.hpp new file mode 100644 index 00000000..e1e2a89c --- /dev/null +++ b/include/aidge/backend/cpu/operator/PaddedConvImpl.hpp @@ -0,0 +1,59 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_PADDEDCONVIMPL_H_ +#define AIDGE_CPU_OPERATOR_PADDEDCONVIMPL_H_ + +#include <array> +#include <memory> +#include <tuple> +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/MetaOperatorDefs.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Operator implementation entry point for the backend +using PaddedConv1D_Op = MetaOperator_Op; +using PaddedConvImpl1D_cpu = OperatorImpl_cpu<MetaOperator_Op, + void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 3> &, + DimSize_t, + const void *, + const void *, + const void *, + void *)>; + +using PaddedConv2D_Op = MetaOperator_Op; +using PaddedConvImpl2D_cpu = OperatorImpl_cpu<MetaOperator_Op, + void(const std::array<DimSize_t, 4>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 4> &, + DimSize_t, + const void *, + const void *, + const void *, + void *)>; + +// Implementation entry point registration to Operator +// Uncomment to activate implementation for PaddedConv. It is currently less efficient, hence why it is commented. +// REGISTRAR(PaddedConv1D_Op, std::array<std::string, 2>({"cpu", "PaddedConv1D"}), Aidge::PaddedConvImpl1D_cpu::create); +// REGISTRAR(PaddedConv2D_Op, std::array<std::string, 2>({"cpu", "PaddedConv2D"}), Aidge::PaddedConvImpl2D_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_PADDEDCONVIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/PaddedConvImpl_kernels.hpp b/include/aidge/backend/cpu/operator/PaddedConvImpl_kernels.hpp new file mode 100644 index 00000000..85fb7243 --- /dev/null +++ b/include/aidge/backend/cpu/operator/PaddedConvImpl_kernels.hpp @@ -0,0 +1,228 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_PADDEDCONVIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_PADDEDCONVIMPL_KERNELS_H_ + +#include <array> +#include <cstddef> +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/PaddedConvImpl.hpp" +#include "aidge/operator/Pad.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Only works for constant padding zero +/** + * @brief Forward kernel for 1D Convolution on CPU backend. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param params tuple of Attributes from the Operator + * @param inputDims Array of input dimensions. + * @param input_ const input Tensor. + * @param weights_ const weight Tensor. + * @param biases_ const Biais Tensor. + * @param output_ Output Tensor. + */ +template <class I, class W, class B, class O> +void PaddedConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorders, + const std::array<DimSize_t, 1>& strideDims, + const std::array<DimSize_t, 1>& dilationDims, + const std::array<DimSize_t, 1>& kernelDims, + const std::array<DimSize_t, 3>& inputDims, + DimSize_t outChannels, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) +{ + // FIXME: missing convolution attributes as arguments + const I *input = static_cast<const I *>(input_); + const W *weights = static_cast<const W *>(weights_); + const B *biases = static_cast<const B *>(biases_); + O *output = static_cast<O *>(output_); + + // output H size + const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; + const std::size_t oxSize = + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) / + static_cast<float>(strideDims[0]))); + + // TODO: kernel computation + // output (batch, outCh, Xout, Yout) + // input (batch, inCh, Xin, Yin) + // weight (outCh, inCh, kernelX, kernelY) + // does not take Dilation attribute into account + using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { + const std::size_t oIndex = (outCh + batch*outChannels) * oxSize; + // If bias = nullptr, set B(0) + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); + std::fill(output + oIndex, output+(oIndex+oxSize), biasVal); + for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { + const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2]; + const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { + const signedsize difx = static_cast<signedsize>(ox * strideDims[0]); + const std::size_t sxMin = static_cast<std::size_t>(std::max(static_cast<signedsize>(beginEndBorders[0]) - difx, signedsize(0))); + const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + static_cast<signedsize>(beginEndBorders[1]) - difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + + const std::size_t oIndexFull = oIndex + ox; + const signedsize ix = static_cast<signedsize>(ox * strideDims[0]) - static_cast<signedsize>(beginEndBorders[0]); + + for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) { + output[oIndexFull] += weights[wIndex + sx] * + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))]; + } + } + } + } + } +} + +// Kernels registration to implementation entry point +REGISTRAR(PaddedConvImpl1D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}, DynamicAttributes(std::map<std::string, future_std::any>({std::make_pair("type", future_std::any(std::string("PaddedConv1D")))}))}, + {ProdConso::inPlaceModel, Aidge::PaddedConvImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr}); +REGISTRAR(PaddedConvImpl1D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}, DynamicAttributes(std::map<std::string, future_std::any>({std::make_pair("type", future_std::any(std::string("PaddedConv1D")))}))}, + {ProdConso::inPlaceModel, Aidge::PaddedConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr}); +REGISTRAR(PaddedConvImpl1D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}, DynamicAttributes(std::map<std::string, future_std::any>({std::make_pair("type", future_std::any(std::string("PaddedConv1D")))}))}, + {ProdConso::inPlaceModel, Aidge::PaddedConvImpl1D_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, nullptr}); +REGISTRAR(PaddedConvImpl1D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}, DynamicAttributes(std::map<std::string, future_std::any>({std::make_pair("type", future_std::any(std::string("PaddedConv1D")))}))}, + {ProdConso::inPlaceModel, Aidge::PaddedConvImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr}); + + +/** + * @brief Forward kernel for 2D Convolution on CPU backend. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param params tuple of Attributes from the Operator + * @param inputDims Array of input dimensions. + * @param input_ const input Tensor. + * @param weights_ const weight Tensor. + * @param biases_ const Biais Tensor. + * @param output_ Output Tensor. + */ +template <class I, class W, class B, class O> +void PaddedConvImpl2D_cpu_forward_kernel( + const std::array<DimSize_t, 4>& beginEndBorders, + const std::array<DimSize_t, 2>& strideDims, + const std::array<DimSize_t, 2>& dilationDims, + const std::array<DimSize_t, 2>& kernelDims, + const std::array<DimSize_t, 4> &inputDims, + DimSize_t outChannels, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) +{ + // FIXME: missing convolution attributes as arguments + const I *input = static_cast<const I *>(input_); + const W *weights = static_cast<const W *>(weights_); + const B *biases = static_cast<const B *>(biases_); + O *output = static_cast<O *>(output_); + + // output H size + const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; + const std::size_t oxSize = + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + beginEndBorders[0] + beginEndBorders[2] + strideDims[0]) / + static_cast<float>(strideDims[0]))); + // output W size + const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1; + const std::size_t oySize = + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilated_kernel_y + beginEndBorders[1] + beginEndBorders[3] + strideDims[1]) / + static_cast<float>(strideDims[1]))); + + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { + const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize; + // If bias = nullptr, set B(0) + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); + std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); + for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { + const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; + const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { + const std::size_t difx = ox * strideDims[0]; + const std::size_t sxMin = beginEndBorders[0] < difx ? std::size_t(0) : beginEndBorders[0] - difx; + const std::size_t sxMax = (inputDims[2] + beginEndBorders[2]) < difx ? + 0 : + ((inputDims[2] + beginEndBorders[2]) > dilated_kernel_x + difx ? + dilated_kernel_x : + (inputDims[2] + beginEndBorders[2] - difx)); + + for (std::size_t oy = 0; oy < oySize; ++oy) { + const std::size_t dify = oy * strideDims[1]; + const std::size_t syMin = beginEndBorders[1] < dify ? std::size_t(0) : beginEndBorders[1] - dify; + const std::size_t syMax = (inputDims[3] + beginEndBorders[3]) < dify ? + 0 : + ((inputDims[3] + beginEndBorders[3]) > dilated_kernel_y + dify ? + dilated_kernel_y : + (inputDims[3] + beginEndBorders[3] - dify)); + const std::size_t oIndexFull = oIndex + ox*oySize + oy; + const std::size_t ix = ox * strideDims[0] - beginEndBorders[0]; + const std::size_t iy = oy * strideDims[1] - beginEndBorders[1]; + + + if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { + output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); + } else { + for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) { + for (std::size_t sy = syMin; sy*dilationDims[1] < syMax; ++sy) { + output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] * + input[iIndex + (sx*dilationDims[0] + ix)*inputDims[3] + sy*dilationDims[1] + iy]; + } + } + } + } + } + } + } + } +} + + +// Kernels registration to implementation entry point +REGISTRAR(PaddedConvImpl2D_cpu, + // ImplSpec{std::vector<ImplSpec::IOSpec>({ImplSpec::IOSpec{DataType::Any, DataFormat::NCHW}, ImplSpec::IOSpec{DataType::Any, DataFormat::NCHW}}) , std::vector<ImplSpec::IOSpec>({ImplSpec::IOSpec{DataType::Int32, DataFormat::NCHW}})}, + {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}, DynamicAttributes(std::map<std::string, future_std::any>({std::make_pair("type", future_std::any(std::string("PaddedConv2D")))}))}, + {ProdConso::inPlaceModel, Aidge::PaddedConvImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>, nullptr}); +REGISTRAR(PaddedConvImpl2D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}, DynamicAttributes(std::map<std::string, future_std::any>({std::make_pair("type", future_std::any(std::string("PaddedConv2D")))}))}, + {ProdConso::inPlaceModel, Aidge::PaddedConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr}); +REGISTRAR(PaddedConvImpl2D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}, DynamicAttributes(std::map<std::string, future_std::any>({std::make_pair("type", future_std::any(std::string("PaddedConv2D")))}))}, + {ProdConso::inPlaceModel, Aidge::PaddedConvImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr}); +REGISTRAR(PaddedConvImpl2D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}, DynamicAttributes(std::map<std::string, future_std::any>({std::make_pair("type", future_std::any(std::string("PaddedConv2D")))}))}, + {ProdConso::inPlaceModel, Aidge::PaddedConvImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_PADDEDCONVIMPL_KERNELS_H_ */ diff --git a/src/operator/PaddedConvImpl.cpp b/src/operator/PaddedConvImpl.cpp new file mode 100644 index 00000000..b85039d1 --- /dev/null +++ b/src/operator/PaddedConvImpl.cpp @@ -0,0 +1,128 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/PaddedConvImpl.hpp" +#include "aidge/backend/cpu/operator/PaddedConvImpl_kernels.hpp" + +#include <memory> +#include <vector> + +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/MetaOperator.hpp" +#include "aidge/operator/Conv.hpp" +#include "aidge/operator/Pad.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" + +template <> +void Aidge::PaddedConvImpl1D_cpu::forward() { + const auto& op_ = static_cast<const MetaOperator_Op&>(mOp); + + // FIXME: uncomment the following code once memory handling will work + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); + AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); + + // Find the correct kernel type + const auto impl = Registrar<PaddedConvImpl1D_cpu>::create(getBestMatch(getRequiredSpec())); + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); + const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); + const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); + + std::shared_ptr<Conv_Op<1>> conv_op; + std::shared_ptr<Pad_Op<1>> pad_op; + for (const auto& n : op_.getMicroGraph()->getNodes()) { + if (n->getOperator()->type() == Conv_Op<1>::Type) { + conv_op = std::static_pointer_cast<Conv_Op<1>>(n->getOperator()); + } else { + pad_op = std::static_pointer_cast<Pad_Op<1>>(n->getOperator()); + } + } + + // Call kernel + impl.forward( + pad_op->beginEndBorders(), + conv_op->strideDims(), + conv_op->dilationDims(), + conv_op->kernelDims(), + op_.getInput(0)->template dims<3>(), // input dimensions + conv_op->outChannels(), // outChannels + input0.getImpl()->rawPtr(), // input + input1.getImpl()->rawPtr(), // weight + op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias + getCPUPtr(mOp.getRawOutput(0)) // output + ); +} + +template <> +void Aidge::PaddedConvImpl1D_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Conv_Op<1> on backend cpu"); +} + +template <> +void Aidge::PaddedConvImpl2D_cpu::forward() { + const auto& op_ = dynamic_cast<const MetaOperator_Op&>(mOp); + + // FIXME: uncomment the following code once memory handling will work + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); + AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); + + // Find the correct kernel type + const auto impl = Registrar<PaddedConvImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); + const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); + const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); + + std::shared_ptr<Conv_Op<2>> conv_op; + std::shared_ptr<Pad_Op<2>> pad_op; + + for (const auto& n : op_.getMicroGraph()->getNodes()) { + if (n->getOperator()->type() == Conv_Op<2>::Type) { + conv_op = std::static_pointer_cast<Conv_Op<2>>(n->getOperator()); + } else { + pad_op = std::static_pointer_cast<Pad_Op<2>>(n->getOperator()); + } + } + + // Call kernel + impl.forward( + pad_op->beginEndBorders(), + conv_op->strideDims(), + conv_op->dilationDims(), + conv_op->kernelDims(), + op_.getInput(0)->template dims<4>(), // input dimensions + conv_op->outChannels(), // outChannels + input0.getImpl()->rawPtr(), // input + input1.getImpl()->rawPtr(), // weight + op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias + getCPUPtr(mOp.getRawOutput(0)) // output + ); +} + +template <> +void Aidge::PaddedConvImpl2D_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Conv_Op<2> on backend cpu"); +} -- GitLab