diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp index c06d0912f419909013f930867ce3c3238c1a5555..41784ac96f4345aa2653e40d1fd575efa284e3b7 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp @@ -26,16 +26,16 @@ namespace Aidge { // Operator implementation entry point for the backend using Conv1D_Op = Conv_Op<1>; -using ConvImpl1D_cpu = OperatorImpl_cpu<Conv_Op<1>, - void(const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 3> &, - DimSize_t, - const void *, - const void *, - const void *, - void *)>; +using ConvImpl1D_cpu = OperatorImpl_cpu<Conv1D_Op, + void(const DimSize_t &, + const DimSize_t &, + const DimSize_t &, + const std::array<DimSize_t, 3> &, + DimSize_t, + const void *, + const void *, + const void *, + void *), using Conv2D_Op = Conv_Op<2>; using ConvImpl2D_cpu = OperatorImpl_cpu<Conv_Op<2>, diff --git a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp index 1229d5714e6b0cbae4e42ece9130c2c2305f133e..93230f54713ad7063ffbcd76f0a4545035fd5267 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp @@ -39,16 +39,15 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class W, class B, class O> -void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, - const std::array<DimSize_t, 1>& dilationDims, - const std::array<DimSize_t, 1>& kernelDims, - const std::array<DimSize_t, 3>& inputDims, - DimSize_t outChannels, - const void *input_, - const void *weights_, - const void *biases_, - void *output_) -{ +void ConvImpl1D_cpu_forward_kernel(const DimSize_t &strideDim, + const DimSize_t &dilationDim, + const DimSize_t &kernelDim, + const std::array<DimSize_t, 3> &inputDims, + DimSize_t outChannels, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) { // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); const W *weights = static_cast<const W *>(weights_); @@ -56,34 +55,51 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, O *output = static_cast<O *>(output_); // output H size - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilationDims[0]*(kernelDims[0] - 1) - 1 + strideDims[0]) / - static_cast<float>(strideDims[0]))); - const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; + const std::size_t oxSize = static_cast<std::size_t>(std::floor( + static_cast<float>(inputDims[2] - dilationDim * (kernelDim - 1) - 1 + + strideDim) / + static_cast<float>(strideDim))); + const DimSize_t dilated_kernel_x = dilationDim * (kernelDim - 1) + 1; - // TODO: kernel computation - // output (batch, outCh, Xout, Yout) - // input (batch, inCh, Xin, Yin) - // weight (outCh, inCh, kernelX, kernelY) - // does not take Dilation attribute into account using signedsize = std::make_signed<std::size_t>::type; for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { - const std::size_t oIndex = (outCh + batch*outChannels) * oxSize; + const std::size_t oIndex = (outCh + batch * outChannels) * oxSize; // If bias = nullptr, set B(0) B biasVal = (biases != nullptr) ? biases[outCh] : B(0); - std::fill(output + oIndex, output+(oIndex+oxSize), biasVal); + std::fill(output + oIndex, output + (oIndex + oxSize), biasVal); for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { - const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2]; - const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0]; + const std::size_t iIndex = + (inCh + batch * inputDims[1]) * inputDims[2]; + const std::size_t wIndex = + (inCh + outCh * inputDims[1]) * kernelDim; for (std::size_t ox = 0; ox < oxSize; ++ox) { - // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + // const signedsize difx = static_cast<signedsize>(- ox * + // strideDims); const std::size_t sxMin = + // static_cast<std::size_t>(std::max(difx, signedsize(0))); + // const std::size_t sxMax = + // (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : + // ((inputDims[2] + difx) > kernelDims[0] ? kernelDims : + // inputDims[2] + difx); const std::size_t sxMin = 0; const std::size_t sxMax = dilated_kernel_x; const std::size_t oIndexFull = oIndex + ox; - const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); + const signedsize ix = + static_cast<signedsize>(ox * strideDim); + + for (std::size_t sx = sxMin; sx * dilationDim < sxMax; + ++sx) { + output[oIndexFull] += + weights[wIndex + sx] * + input[iIndex + static_cast<std::size_t>( + ix + static_cast<signedsize>( + sx * dilationDim))]; + } + } + } + } + } +} for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) { output[oIndexFull] += weights[wIndex + sx] * diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index fdfe19fbf4bf3e71c86aa28b966cfb21a1b5ba40..20f03708c00741992eb24064face411149af4662 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -43,16 +43,17 @@ void Aidge::ConvImpl1D_cpu::forward() { const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); // Call kernel - impl.forward(op_.strideDims(), - op_.dilationDims(), - op_.kernelDims(), - op_.getInput(0)->template dims<3>(), // input dimensions - dynamic_cast<const Conv_Op<1>&>(mOp).outChannels(), // outChannels - input0.getImpl()->rawPtr(), // input - input1.getImpl()->rawPtr(), // weight - op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias - getCPUPtr(mOp.getRawOutput(0)) // output - ); + impl.forward( + op_.strideDims()[0], + op_.dilationDims()[0], + op_.kernelDims()[0], + op_.getInput(0)->template dims<3>(), // input dimensions + dynamic_cast<const Conv_Op<1> &>(mOp).outChannels(), // outChannels + input0.getImpl()->rawPtr(), // input + input1.getImpl()->rawPtr(), // weight + op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias + getCPUPtr(mOp.getRawOutput(0)) // output + ); } template <>