Skip to content
Snippets Groups Projects
Commit 9c03c1f6 authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Added dilation support

parent 490df27c
No related branches found
No related tags found
2 merge requests!490.3.1,!40Add ConvDepthWise support
Pipeline #71607 failed
......@@ -179,23 +179,25 @@ void convolution_depthwise_forward(
{
static_assert(NB_OUTPUTS % NB_CHANNELS == 0,
"NB_OUTPUTS should be a multiple of NB_CHANNELS.");
static_assert(DILATION_Y == 1,
"DILATION_Y != 1 not supported.");
static_assert(DILATION_X == 1,
"DILATION_X != 1 not supported.");
constexpr int DILATED_KERNEL_HEIGHT
= KERNEL_HEIGHT + (DILATION_Y - 1) * (KERNEL_HEIGHT - 1);
constexpr int DILATED_KERNEL_WIDTH
= KERNEL_WIDTH + (DILATION_X - 1) * (KERNEL_WIDTH - 1);
constexpr int OUTPUTS_HEIGHT_NOPAD
= (CHANNELS_HEIGHT - KERNEL_HEIGHT + STRIDE_Y) / STRIDE_Y;
= (CHANNELS_HEIGHT - DILATION_Y * (KERNEL_HEIGHT - 1) - 1 + STRIDE_Y) / STRIDE_Y;
constexpr int OUTPUTS_WIDTH_NOPAD
= (CHANNELS_WIDTH - KERNEL_WIDTH + STRIDE_X) / STRIDE_X;
= (CHANNELS_WIDTH - DILATION_X * (KERNEL_WIDTH - 1) - 1 + STRIDE_X) / STRIDE_X;
for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) {
const int syMin = (PADDING_Y == 0) ? 0
: max(PADDING_Y - (oy * STRIDE_Y), 0);
const int syMax = (PADDING_Y == 0
&& OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? KERNEL_HEIGHT
&& OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? DILATED_KERNEL_HEIGHT
: clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y),
0, KERNEL_HEIGHT);
0, DILATED_KERNEL_HEIGHT);
const int iy = (oy * STRIDE_Y) - PADDING_Y;
#pragma omp parallel for collapse(2)
......@@ -206,9 +208,9 @@ void convolution_depthwise_forward(
: max(PADDING_X - (ox * STRIDE_X), 0);
const int sxMax = (PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
? KERNEL_WIDTH
? DILATED_KERNEL_WIDTH
: clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X),
0, KERNEL_WIDTH);
0, DILATED_KERNEL_WIDTH);
const int ix = (ox * STRIDE_X) - PADDING_X;
const int oPos = (ox + OUTPUTS_WIDTH * oy);
......@@ -222,21 +224,20 @@ void convolution_depthwise_forward(
for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) {
if ((PADDING_Y != 0
|| OUTPUTS_HEIGHT != OUTPUTS_HEIGHT_NOPAD)
&& sy >= syMax - syMin)
&& ((sy*DILATION_Y < syMin) || (sy*DILATION_Y >= syMax)))
{
break;
continue;
}
const int iPos = ((sxMin + ix)
+ CHANNELS_WIDTH * (iy + syMin + sy));
int iOffset = NB_CHANNELS * iPos;
const int iPos = ix + CHANNELS_WIDTH * (iy + sy*DILATION_Y);
const int iOffset = NB_CHANNELS * iPos;
const int wOffset = (sxMin
+ KERNEL_WIDTH * (syMin + sy + KERNEL_HEIGHT * output));
const int wOffset = (output*KERNEL_HEIGHT + sy)
* KERNEL_WIDTH * NB_CHANNELS;
if ((PADDING_X == 0
if (DILATION_X == 1 && ((PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
|| sxMax - sxMin == KERNEL_WIDTH)
|| sxMax - sxMin == KERNEL_WIDTH))
{
macsOnRange<KERNEL_WIDTH, NB_CHANNELS>(
inputs + iOffset + channel,
......@@ -247,13 +248,13 @@ void convolution_depthwise_forward(
for (int sx = 0; sx < KERNEL_WIDTH; ++sx) {
if ((PADDING_X != 0
|| OUTPUTS_WIDTH != OUTPUTS_WIDTH_NOPAD)
&& sx >= sxMax - sxMin)
&& ((sx*DILATION_X < sxMin) || (sx*DILATION_X >= sxMax)))
{
break;
continue;
}
const int iOffsetInRange = iOffset
+ sx * NB_CHANNELS;
+ sx * DILATION_X * NB_CHANNELS;
weightedSum += inputs[iOffsetInRange + channel]
* weights[wOffset + sx];
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment