From 10d79752d05c990575f30a9bfa6b6e225d9117f6 Mon Sep 17 00:00:00 2001 From: Maxence Naud <maxence.naud@cea.fr> Date: Sun, 20 Oct 2024 14:31:29 +0000 Subject: [PATCH] Resolve "Missing ``dilation`` parameter in Conv[DepthWise] forward kernel" --- .../operator/ConvDepthWiseImpl_kernels.hpp | 42 +++++++++------- .../backend/cpu/operator/ConvImpl_kernels.hpp | 48 +++++++++++-------- 2 files changed, 55 insertions(+), 35 deletions(-) diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp index ff9bb148..39a35703 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp @@ -38,7 +38,7 @@ namespace Aidge { */ template <class I, class W, class B, class O> void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, - const std::array<DimSize_t, 1>& /*dilationDims*/, + const std::array<DimSize_t, 1>& dilationDims, const std::array<DimSize_t, 1>& kernelDims, const std::array<DimSize_t, 3>& inputDims, const void *input_, @@ -56,6 +56,8 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri const std::size_t oxSize = static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / static_cast<float>(strideDims[0]))); + const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; + // TODO: kernel computation // output (batch, outCh, Xout, Yout) @@ -71,15 +73,17 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2]; const std::size_t wIndex = ch * kernelDims[0]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); + // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + const std::size_t sxMin = 0; + const std::size_t sxMax = dilated_kernel_x; const std::size_t oIndexFull = oIndex + ox; const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); - for (std::size_t sx = sxMin; sx < sxMax; ++sx) { + for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) { output[oIndexFull] += weights[wIndex + sx] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))]; + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))]; } } } @@ -113,7 +117,7 @@ REGISTRAR(ConvDepthWiseImpl1D_cpu, */ template <class I, class W, class B, class O> void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, - const std::array<DimSize_t, 2>& /*dilationDims*/, + const std::array<DimSize_t, 2>& dilationDims, const std::array<DimSize_t, 2>& kernelDims, const std::array<DimSize_t, 4>& inputDims, const void *input_, @@ -132,10 +136,12 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri const std::size_t oxSize = static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / static_cast<float>(strideDims[0]))); + const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; // output W size const std::size_t oySize = static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) / static_cast<float>(strideDims[1]))); + const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1; // TODO: kernel computation // output (batch, outCh, Xout, Yout) @@ -151,13 +157,17 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3]; const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); + // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + const std::size_t sxMin = 0; + const std::size_t sxMax = dilated_kernel_x; for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); - const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify); + // const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); + // const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); + // const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify); + const std::size_t syMin = 0; + const std::size_t syMax = dilated_kernel_y; const std::size_t oIndexFull = oIndex + ox*oySize + oy; const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); const signedsize iy = static_cast<signedsize>(oy * strideDims[1]); @@ -173,10 +183,10 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); } else { - for (std::size_t sx = sxMin; sx < sxMax; ++sx) { - for (std::size_t sy = syMin; sy < syMax; ++sy) { + for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) { + for (std::size_t sy = syMin; sy*dilationDims[1] < syMax; ++sy) { output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy*dilationDims[1]))]; } } } diff --git a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp index cc3bd57c..e800c252 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp @@ -40,7 +40,7 @@ namespace Aidge { */ template <class I, class W, class B, class O> void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, - const std::array<DimSize_t, 1>& /*dilationDims*/, + const std::array<DimSize_t, 1>& dilationDims, const std::array<DimSize_t, 1>& kernelDims, const std::array<DimSize_t, 3>& inputDims, DimSize_t outChannels, @@ -57,8 +57,9 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilationDims[0]*(kernelDims[0] - 1) - 1 + strideDims[0]) / static_cast<float>(strideDims[0]))); + const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; // TODO: kernel computation // output (batch, outCh, Xout, Yout) @@ -76,15 +77,17 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2]; const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); + // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + const std::size_t sxMin = 0; + const std::size_t sxMax = dilated_kernel_x; const std::size_t oIndexFull = oIndex + ox; const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); - for (std::size_t sx = sxMin; sx < sxMax; ++sx) { + for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) { output[oIndexFull] += weights[wIndex + sx] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))]; + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))]; } } } @@ -122,7 +125,7 @@ REGISTRAR(ConvImpl1D_cpu, */ template <class I, class W, class B, class O> void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, - const std::array<DimSize_t, 2>& /*dilationDims*/, + const std::array<DimSize_t, 2>& dilationDims, const std::array<DimSize_t, 2>& kernelDims, const std::array<DimSize_t, 4> &inputDims, DimSize_t outChannels, @@ -139,12 +142,15 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilationDims[0]*(kernelDims[0] - 1) - 1 + strideDims[0]) / static_cast<float>(strideDims[0]))); + const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1; // output W size const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) / + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilationDims[1]*(kernelDims[1] - 1) - 1 + strideDims[1]) / static_cast<float>(strideDims[1]))); + const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1; + // TODO: kernel computation // output (batch, outCh, Xout, Yout) @@ -162,13 +168,17 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); + // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + const std::size_t sxMin = 0; + const std::size_t sxMax = dilated_kernel_x; for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); - const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify); + // const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); + // const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); + // const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify); + const std::size_t syMin = 0; + const std::size_t syMax = dilated_kernel_y; const std::size_t oIndexFull = oIndex + ox*oySize + oy; const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); const signedsize iy = static_cast<signedsize>(oy * strideDims[1]); @@ -184,10 +194,10 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); } else { - for (std::size_t sx = sxMin; sx < sxMax; ++sx) { - for (std::size_t sy = syMin; sy < syMax; ++sy) { + for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) { + for (std::size_t sy = syMin; sy*dilationDims[1] < syMax; ++sy) { output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy*dilationDims[1]))]; } } } -- GitLab