Skip to content
Snippets Groups Projects
Commit 00a0a61c authored by Maxence Naud's avatar Maxence Naud
Browse files

[Add] 'dilation' parameter to Conv[DepthWise] forward kernel

parent f9d74d9f
No related branches found
No related tags found
No related merge requests found
Pipeline #56893 failed
...@@ -38,7 +38,7 @@ namespace Aidge { ...@@ -38,7 +38,7 @@ namespace Aidge {
*/ */
template <class I, class W, class B, class O> template <class I, class W, class B, class O>
void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
const std::array<DimSize_t, 1>& /*dilationDims*/, const std::array<DimSize_t, 1>& dilationDims,
const std::array<DimSize_t, 1>& kernelDims, const std::array<DimSize_t, 1>& kernelDims,
const std::array<DimSize_t, 3>& inputDims, const std::array<DimSize_t, 3>& inputDims,
const void *input_, const void *input_,
...@@ -56,6 +56,8 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri ...@@ -56,6 +56,8 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri
const std::size_t oxSize = const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0]))); static_cast<float>(strideDims[0])));
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
// TODO: kernel computation // TODO: kernel computation
// output (batch, outCh, Xout, Yout) // output (batch, outCh, Xout, Yout)
...@@ -71,15 +73,17 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri ...@@ -71,15 +73,17 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2]; const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2];
const std::size_t wIndex = ch * kernelDims[0]; const std::size_t wIndex = ch * kernelDims[0];
for (std::size_t ox = 0; ox < oxSize; ++ox) { for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
const std::size_t sxMin = 0;
const std::size_t sxMax = dilated_kernel_x;
const std::size_t oIndexFull = oIndex + ox; const std::size_t oIndexFull = oIndex + ox;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
for (std::size_t sx = sxMin; sx < sxMax; ++sx) { for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) {
output[oIndexFull] += weights[wIndex + sx] * output[oIndexFull] += weights[wIndex + sx] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))]; input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))];
} }
} }
} }
...@@ -113,7 +117,7 @@ REGISTRAR(ConvDepthWiseImpl1D_cpu, ...@@ -113,7 +117,7 @@ REGISTRAR(ConvDepthWiseImpl1D_cpu,
*/ */
template <class I, class W, class B, class O> template <class I, class W, class B, class O>
void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& /*dilationDims*/, const std::array<DimSize_t, 2>& dilationDims,
const std::array<DimSize_t, 2>& kernelDims, const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 4>& inputDims, const std::array<DimSize_t, 4>& inputDims,
const void *input_, const void *input_,
...@@ -132,10 +136,12 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri ...@@ -132,10 +136,12 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri
const std::size_t oxSize = const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0]))); static_cast<float>(strideDims[0])));
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
// output W size // output W size
const std::size_t oySize = const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) / static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) /
static_cast<float>(strideDims[1]))); static_cast<float>(strideDims[1])));
const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1;
// TODO: kernel computation // TODO: kernel computation
// output (batch, outCh, Xout, Yout) // output (batch, outCh, Xout, Yout)
...@@ -151,13 +157,17 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri ...@@ -151,13 +157,17 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3]; const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1]; const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) { for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
const std::size_t sxMin = 0;
const std::size_t sxMax = dilated_kernel_x;
for (std::size_t oy = 0; oy < oySize; ++oy) { for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); // const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); // const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify); // const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify);
const std::size_t syMin = 0;
const std::size_t syMax = dilated_kernel_y;
const std::size_t oIndexFull = oIndex + ox*oySize + oy; const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
const signedsize iy = static_cast<signedsize>(oy * strideDims[1]); const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
...@@ -173,10 +183,10 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri ...@@ -173,10 +183,10 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri
weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
} else { } else {
for (std::size_t sx = sxMin; sx < sxMax; ++sx) { for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) {
for (std::size_t sy = syMin; sy < syMax; ++sy) { for (std::size_t sy = syMin; sy*dilationDims[1] < syMax; ++sy) {
output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] * output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy*dilationDims[1]))];
} }
} }
} }
......
...@@ -40,7 +40,7 @@ namespace Aidge { ...@@ -40,7 +40,7 @@ namespace Aidge {
*/ */
template <class I, class W, class B, class O> template <class I, class W, class B, class O>
void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
const std::array<DimSize_t, 1>& /*dilationDims*/, const std::array<DimSize_t, 1>& dilationDims,
const std::array<DimSize_t, 1>& kernelDims, const std::array<DimSize_t, 1>& kernelDims,
const std::array<DimSize_t, 3>& inputDims, const std::array<DimSize_t, 3>& inputDims,
DimSize_t outChannels, DimSize_t outChannels,
...@@ -57,8 +57,9 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, ...@@ -57,8 +57,9 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
// output H size // output H size
const std::size_t oxSize = const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilationDims[0]*(kernelDims[0] - 1) - 1 + strideDims[0]) /
static_cast<float>(strideDims[0]))); static_cast<float>(strideDims[0])));
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
// TODO: kernel computation // TODO: kernel computation
// output (batch, outCh, Xout, Yout) // output (batch, outCh, Xout, Yout)
...@@ -76,15 +77,17 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, ...@@ -76,15 +77,17 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2]; const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0]; const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0];
for (std::size_t ox = 0; ox < oxSize; ++ox) { for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
const std::size_t sxMin = 0;
const std::size_t sxMax = dilated_kernel_x;
const std::size_t oIndexFull = oIndex + ox; const std::size_t oIndexFull = oIndex + ox;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
for (std::size_t sx = sxMin; sx < sxMax; ++sx) { for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) {
output[oIndexFull] += weights[wIndex + sx] * output[oIndexFull] += weights[wIndex + sx] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))]; input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))];
} }
} }
} }
...@@ -122,7 +125,7 @@ REGISTRAR(ConvImpl1D_cpu, ...@@ -122,7 +125,7 @@ REGISTRAR(ConvImpl1D_cpu,
*/ */
template <class I, class W, class B, class O> template <class I, class W, class B, class O>
void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& /*dilationDims*/, const std::array<DimSize_t, 2>& dilationDims,
const std::array<DimSize_t, 2>& kernelDims, const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 4> &inputDims, const std::array<DimSize_t, 4> &inputDims,
DimSize_t outChannels, DimSize_t outChannels,
...@@ -139,12 +142,15 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, ...@@ -139,12 +142,15 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
// output H size // output H size
const std::size_t oxSize = const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilationDims[0]*(kernelDims[0] - 1) - 1 + strideDims[0]) /
static_cast<float>(strideDims[0]))); static_cast<float>(strideDims[0])));
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
// output W size // output W size
const std::size_t oySize = const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) / static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilationDims[1]*(kernelDims[1] - 1) - 1 + strideDims[1]) /
static_cast<float>(strideDims[1]))); static_cast<float>(strideDims[1])));
const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1;
// TODO: kernel computation // TODO: kernel computation
// output (batch, outCh, Xout, Yout) // output (batch, outCh, Xout, Yout)
...@@ -162,13 +168,17 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, ...@@ -162,13 +168,17 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1]; const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) { for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
const std::size_t sxMin = 0;
const std::size_t sxMax = dilated_kernel_x;
for (std::size_t oy = 0; oy < oySize; ++oy) { for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); // const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); // const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify); // const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify);
const std::size_t syMin = 0;
const std::size_t syMax = dilated_kernel_y;
const std::size_t oIndexFull = oIndex + ox*oySize + oy; const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
const signedsize iy = static_cast<signedsize>(oy * strideDims[1]); const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
...@@ -184,10 +194,10 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, ...@@ -184,10 +194,10 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
} else { } else {
for (std::size_t sx = sxMin; sx < sxMax; ++sx) { for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) {
for (std::size_t sy = syMin; sy < syMax; ++sy) { for (std::size_t sy = syMin; sy*dilationDims[1] < syMax; ++sy) {
output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] * output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy*dilationDims[1]))];
} }
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment