Skip to content
Snippets Groups Projects
Commit 490df27c authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Added ConvDepthWise support

parent 07226da3
No related branches found
No related tags found
2 merge requests!490.3.1,!40Add ConvDepthWise support
Pipeline #71606 passed
...@@ -62,7 +62,7 @@ void convolution_forward( ...@@ -62,7 +62,7 @@ void convolution_forward(
const int ix = (ox * STRIDE_X) - PADDING_X; const int ix = (ox * STRIDE_X) - PADDING_X;
const int oPos = (ox + OUTPUTS_WIDTH * oy); const int oPos = (ox + OUTPUTS_WIDTH * oy);
int oOffset = NB_OUTPUTS * oPos; const int oOffset = NB_OUTPUTS * oPos;
// <-- // <--
// Check if the biases are defined // Check if the biases are defined
...@@ -77,7 +77,7 @@ void convolution_forward( ...@@ -77,7 +77,7 @@ void convolution_forward(
} }
const int iPos = ix + CHANNELS_WIDTH * (iy + sy*DILATION_Y); const int iPos = ix + CHANNELS_WIDTH * (iy + sy*DILATION_Y);
int iOffset = NB_CHANNELS * iPos; const int iOffset = NB_CHANNELS * iPos;
const int wOffset = (output*KERNEL_HEIGHT + sy) * KERNEL_WIDTH * NB_CHANNELS; const int wOffset = (output*KERNEL_HEIGHT + sy) * KERNEL_WIDTH * NB_CHANNELS;
...@@ -98,7 +98,7 @@ void convolution_forward( ...@@ -98,7 +98,7 @@ void convolution_forward(
continue; continue;
} }
int iOffsetInRange = iOffset const int iOffsetInRange = iOffset
+ sx * DILATION_X * NB_CHANNELS; + sx * DILATION_X * NB_CHANNELS;
macsOnRange<NB_CHANNELS>( macsOnRange<NB_CHANNELS>(
...@@ -157,4 +157,114 @@ void convolution_forward( ...@@ -157,4 +157,114 @@ void convolution_forward(
(inputs, outputs, weights, b, rescaling); (inputs, outputs, weights, b, rescaling);
} }
template<int NB_CHANNELS,
int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
int NB_OUTPUTS,
int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
int PADDING_Y, int PADDING_X,
int STRIDE_Y, int STRIDE_X,
int DILATION_Y, int DILATION_X,
int KERNEL_HEIGHT, int KERNEL_WIDTH,
ActivationFunction_T ACTIVATION,
typename Input_T, typename Output_T,
typename Weight_T, typename Bias_T,
typename Rescaling_T>
__attribute__((always_inline)) inline
void convolution_depthwise_forward(
const Input_T* __restrict inputs,
Output_T* __restrict outputs,
const Weight_T* __restrict weights,
const Bias_T* __restrict biases,
const Rescaling_T& __restrict rescaling)
{
static_assert(NB_OUTPUTS % NB_CHANNELS == 0,
"NB_OUTPUTS should be a multiple of NB_CHANNELS.");
static_assert(DILATION_Y == 1,
"DILATION_Y != 1 not supported.");
static_assert(DILATION_X == 1,
"DILATION_X != 1 not supported.");
constexpr int OUTPUTS_HEIGHT_NOPAD
= (CHANNELS_HEIGHT - KERNEL_HEIGHT + STRIDE_Y) / STRIDE_Y;
constexpr int OUTPUTS_WIDTH_NOPAD
= (CHANNELS_WIDTH - KERNEL_WIDTH + STRIDE_X) / STRIDE_X;
for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) {
const int syMin = (PADDING_Y == 0) ? 0
: max(PADDING_Y - (oy * STRIDE_Y), 0);
const int syMax = (PADDING_Y == 0
&& OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? KERNEL_HEIGHT
: clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y),
0, KERNEL_HEIGHT);
const int iy = (oy * STRIDE_Y) - PADDING_Y;
#pragma omp parallel for collapse(2)
for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) {
for (int output = 0; output < NB_OUTPUTS; ++output) {
// moved to inner loop for collapsing -->
const int sxMin = (PADDING_X == 0) ? 0
: max(PADDING_X - (ox * STRIDE_X), 0);
const int sxMax = (PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
? KERNEL_WIDTH
: clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X),
0, KERNEL_WIDTH);
const int ix = (ox * STRIDE_X) - PADDING_X;
const int oPos = (ox + OUTPUTS_WIDTH * oy);
const int oOffset = NB_OUTPUTS * oPos;
// <--
const int channel = (output * NB_CHANNELS) / NB_OUTPUTS;
Bias_T weightedSum = biases ? biases[output] : 0;
for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) {
if ((PADDING_Y != 0
|| OUTPUTS_HEIGHT != OUTPUTS_HEIGHT_NOPAD)
&& sy >= syMax - syMin)
{
break;
}
const int iPos = ((sxMin + ix)
+ CHANNELS_WIDTH * (iy + syMin + sy));
int iOffset = NB_CHANNELS * iPos;
const int wOffset = (sxMin
+ KERNEL_WIDTH * (syMin + sy + KERNEL_HEIGHT * output));
if ((PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
|| sxMax - sxMin == KERNEL_WIDTH)
{
macsOnRange<KERNEL_WIDTH, NB_CHANNELS>(
inputs + iOffset + channel,
weights + wOffset,
weightedSum);
}
else {
for (int sx = 0; sx < KERNEL_WIDTH; ++sx) {
if ((PADDING_X != 0
|| OUTPUTS_WIDTH != OUTPUTS_WIDTH_NOPAD)
&& sx >= sxMax - sxMin)
{
break;
}
const int iOffsetInRange = iOffset
+ sx * NB_CHANNELS;
weightedSum += inputs[iOffsetInRange + channel]
* weights[wOffset + sx];
}
}
}
outputs[oOffset + output] = activation_forward_value<Output_T>(weightedSum, output, ACTIVATION, rescaling);
}
}
}
}
#endif // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__ #endif // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__
...@@ -187,6 +187,39 @@ class PaddedConvCPP(ExportNodeCpp): ...@@ -187,6 +187,39 @@ class PaddedConvCPP(ExportNodeCpp):
_setup_conv2D(self) _setup_conv2D(self)
@ExportLibCpp.register("ConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class ConvCPP(ExportNodeCpp):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
self.attributes["depthwise"] = True
# No padding with Conv
# Use PaddedConv to add padding attribute
self.attributes["padding"] = [0, 0]
_setup_conv2D(self)
@ExportLibCpp.register_metaop("PaddedConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class PaddedConvCPP(ExportNodeCpp):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
self.attributes["depthwise"] = True
# TODO find a way to retrive attr for meta op
for n in self.operator.get_micro_graph().get_nodes():
if n.type() == "Pad2D":
self.attributes["padding"] = n.get_operator(
).attr.begin_end_borders
if n.type() == "ConvDepthWise2D":
self.attributes["kernel_dims"] = n.get_operator(
).attr.kernel_dims
self.attributes["stride_dims"] = n.get_operator(
).attr.stride_dims
self.attributes["dilation_dims"] = n.get_operator(
).attr.dilation_dims
_setup_conv2D(self)
def _setup_elemwise_op(elemwise, op): def _setup_elemwise_op(elemwise, op):
"""Common code (template and kernel setup) shared across all the different elementWise operator (Add, Sub,...).""" """Common code (template and kernel setup) shared across all the different elementWise operator (Add, Sub,...)."""
......
{% filter indent(width=4, first=False) %} {% filter indent(width=4, first=False) %}
{% include "./_mem_offset.jinja" %} {% include "./_mem_offset.jinja" %}
convolution_forward<{{ in_name[0]|upper }}_NB_CHANNELS, convolution{{ "_depthwise" if depthwise is defined else "" }}_forward<{{ in_name[0]|upper }}_NB_CHANNELS,
{{ in_name[0]|upper }}_IN_HEIGHT, {{ in_name[0]|upper }}_IN_HEIGHT,
{{ in_name[0]|upper }}_IN_WIDTH, {{ in_name[0]|upper }}_IN_WIDTH,
{{ out_name[0]|upper }}_NB_OUTPUTS, {{ out_name[0]|upper }}_NB_OUTPUTS,
......
...@@ -410,6 +410,14 @@ class test_operator_export(unittest.TestCase): ...@@ -410,6 +410,14 @@ class test_operator_export(unittest.TestCase):
self.unit_test_export(model, "Conv2D", [[1, 3, 12, 12]], False, False) self.unit_test_export(model, "Conv2D", [[1, 3, 12, 12]], False, False)
def test_export_convDepthWise2D(self):
print("Conv2D")
model = aidge_core.sequential([
aidge_core.ConvDepthWise2D(nb_channels=3, kernel_dims=(3, 3), name="conv")
])
self.unit_test_export(model, "ConvDepthWise2D", [[1, 3, 12, 12]], False, False)
def test_export_max_pooling(self): def test_export_max_pooling(self):
print("MaxPooling2D") print("MaxPooling2D")
model = aidge_core.sequential([ model = aidge_core.sequential([
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment