Skip to content
Snippets Groups Projects
Commit 490df27c authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Added ConvDepthWise support

parent 07226da3
No related branches found
No related tags found
2 merge requests!490.3.1,!40Add ConvDepthWise support
Pipeline #71606 passed
......@@ -62,7 +62,7 @@ void convolution_forward(
const int ix = (ox * STRIDE_X) - PADDING_X;
const int oPos = (ox + OUTPUTS_WIDTH * oy);
int oOffset = NB_OUTPUTS * oPos;
const int oOffset = NB_OUTPUTS * oPos;
// <--
// Check if the biases are defined
......@@ -77,7 +77,7 @@ void convolution_forward(
}
const int iPos = ix + CHANNELS_WIDTH * (iy + sy*DILATION_Y);
int iOffset = NB_CHANNELS * iPos;
const int iOffset = NB_CHANNELS * iPos;
const int wOffset = (output*KERNEL_HEIGHT + sy) * KERNEL_WIDTH * NB_CHANNELS;
......@@ -98,7 +98,7 @@ void convolution_forward(
continue;
}
int iOffsetInRange = iOffset
const int iOffsetInRange = iOffset
+ sx * DILATION_X * NB_CHANNELS;
macsOnRange<NB_CHANNELS>(
......@@ -157,4 +157,114 @@ void convolution_forward(
(inputs, outputs, weights, b, rescaling);
}
template<int NB_CHANNELS,
int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
int NB_OUTPUTS,
int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
int PADDING_Y, int PADDING_X,
int STRIDE_Y, int STRIDE_X,
int DILATION_Y, int DILATION_X,
int KERNEL_HEIGHT, int KERNEL_WIDTH,
ActivationFunction_T ACTIVATION,
typename Input_T, typename Output_T,
typename Weight_T, typename Bias_T,
typename Rescaling_T>
__attribute__((always_inline)) inline
void convolution_depthwise_forward(
const Input_T* __restrict inputs,
Output_T* __restrict outputs,
const Weight_T* __restrict weights,
const Bias_T* __restrict biases,
const Rescaling_T& __restrict rescaling)
{
static_assert(NB_OUTPUTS % NB_CHANNELS == 0,
"NB_OUTPUTS should be a multiple of NB_CHANNELS.");
static_assert(DILATION_Y == 1,
"DILATION_Y != 1 not supported.");
static_assert(DILATION_X == 1,
"DILATION_X != 1 not supported.");
constexpr int OUTPUTS_HEIGHT_NOPAD
= (CHANNELS_HEIGHT - KERNEL_HEIGHT + STRIDE_Y) / STRIDE_Y;
constexpr int OUTPUTS_WIDTH_NOPAD
= (CHANNELS_WIDTH - KERNEL_WIDTH + STRIDE_X) / STRIDE_X;
for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) {
const int syMin = (PADDING_Y == 0) ? 0
: max(PADDING_Y - (oy * STRIDE_Y), 0);
const int syMax = (PADDING_Y == 0
&& OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? KERNEL_HEIGHT
: clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y),
0, KERNEL_HEIGHT);
const int iy = (oy * STRIDE_Y) - PADDING_Y;
#pragma omp parallel for collapse(2)
for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) {
for (int output = 0; output < NB_OUTPUTS; ++output) {
// moved to inner loop for collapsing -->
const int sxMin = (PADDING_X == 0) ? 0
: max(PADDING_X - (ox * STRIDE_X), 0);
const int sxMax = (PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
? KERNEL_WIDTH
: clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X),
0, KERNEL_WIDTH);
const int ix = (ox * STRIDE_X) - PADDING_X;
const int oPos = (ox + OUTPUTS_WIDTH * oy);
const int oOffset = NB_OUTPUTS * oPos;
// <--
const int channel = (output * NB_CHANNELS) / NB_OUTPUTS;
Bias_T weightedSum = biases ? biases[output] : 0;
for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) {
if ((PADDING_Y != 0
|| OUTPUTS_HEIGHT != OUTPUTS_HEIGHT_NOPAD)
&& sy >= syMax - syMin)
{
break;
}
const int iPos = ((sxMin + ix)
+ CHANNELS_WIDTH * (iy + syMin + sy));
int iOffset = NB_CHANNELS * iPos;
const int wOffset = (sxMin
+ KERNEL_WIDTH * (syMin + sy + KERNEL_HEIGHT * output));
if ((PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
|| sxMax - sxMin == KERNEL_WIDTH)
{
macsOnRange<KERNEL_WIDTH, NB_CHANNELS>(
inputs + iOffset + channel,
weights + wOffset,
weightedSum);
}
else {
for (int sx = 0; sx < KERNEL_WIDTH; ++sx) {
if ((PADDING_X != 0
|| OUTPUTS_WIDTH != OUTPUTS_WIDTH_NOPAD)
&& sx >= sxMax - sxMin)
{
break;
}
const int iOffsetInRange = iOffset
+ sx * NB_CHANNELS;
weightedSum += inputs[iOffsetInRange + channel]
* weights[wOffset + sx];
}
}
}
outputs[oOffset + output] = activation_forward_value<Output_T>(weightedSum, output, ACTIVATION, rescaling);
}
}
}
}
#endif // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__
......@@ -187,6 +187,39 @@ class PaddedConvCPP(ExportNodeCpp):
_setup_conv2D(self)
@ExportLibCpp.register("ConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class ConvCPP(ExportNodeCpp):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
self.attributes["depthwise"] = True
# No padding with Conv
# Use PaddedConv to add padding attribute
self.attributes["padding"] = [0, 0]
_setup_conv2D(self)
@ExportLibCpp.register_metaop("PaddedConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
class PaddedConvCPP(ExportNodeCpp):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
self.attributes["depthwise"] = True
# TODO find a way to retrive attr for meta op
for n in self.operator.get_micro_graph().get_nodes():
if n.type() == "Pad2D":
self.attributes["padding"] = n.get_operator(
).attr.begin_end_borders
if n.type() == "ConvDepthWise2D":
self.attributes["kernel_dims"] = n.get_operator(
).attr.kernel_dims
self.attributes["stride_dims"] = n.get_operator(
).attr.stride_dims
self.attributes["dilation_dims"] = n.get_operator(
).attr.dilation_dims
_setup_conv2D(self)
def _setup_elemwise_op(elemwise, op):
"""Common code (template and kernel setup) shared across all the different elementWise operator (Add, Sub,...)."""
......
{% filter indent(width=4, first=False) %}
{% include "./_mem_offset.jinja" %}
convolution_forward<{{ in_name[0]|upper }}_NB_CHANNELS,
convolution{{ "_depthwise" if depthwise is defined else "" }}_forward<{{ in_name[0]|upper }}_NB_CHANNELS,
{{ in_name[0]|upper }}_IN_HEIGHT,
{{ in_name[0]|upper }}_IN_WIDTH,
{{ out_name[0]|upper }}_NB_OUTPUTS,
......
......@@ -410,6 +410,14 @@ class test_operator_export(unittest.TestCase):
self.unit_test_export(model, "Conv2D", [[1, 3, 12, 12]], False, False)
def test_export_convDepthWise2D(self):
print("Conv2D")
model = aidge_core.sequential([
aidge_core.ConvDepthWise2D(nb_channels=3, kernel_dims=(3, 3), name="conv")
])
self.unit_test_export(model, "ConvDepthWise2D", [[1, 3, 12, 12]], False, False)
def test_export_max_pooling(self):
print("MaxPooling2D")
model = aidge_core.sequential([
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment