Skip to content
Snippets Groups Projects
Commit af87b9e5 authored by Matthew  Newson's avatar Matthew Newson
Browse files

added parameters groups

parent 9f86629b
No related branches found
No related tags found
No related merge requests found
Pipeline #73948 failed
...@@ -6,159 +6,77 @@ ...@@ -6,159 +6,77 @@
#include "network/utils.hpp" #include "network/utils.hpp"
#include "network/macs.hpp" #include "network/macs.hpp"
#include "network/activation_utils.hpp" #include "network/activation_utils.hpp"
#include <omp.h>
// Weights index en NHWC
constexpr int inds_pos(int n, int c, int h, int w, int N, int C, int H, int W) {
return n * (H * W * C) +
h * (W * C) +
w * C +
c;
}
// Image index in CHW
constexpr int inds_pos(int c, int h, int w, int C, int H, int W) {
return c * (H * W) +
h * W +
w;
}
template<int NB_CHANNELS, template<int NB_CHANNELS,
int CHANNELS_HEIGHT, int CHANNELS_WIDTH, int IN_HEIGHT, int IN_WIDTH,
int NB_OUTPUTS, int NB_OUTPUTS, int GROUPS,
int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH, int OUT_HEIGHT, int OUT_WIDTH,
int PADDING_Y, int PADDING_X, int PADDING_Y, int PADDING_X,
int STRIDE_Y, int STRIDE_X, int STRIDE_Y, int STRIDE_X,
int DILATION_Y, int DILATION_X, int DILATION_Y, int DILATION_X,
int KERNEL_HEIGHT, int KERNEL_WIDTH, int KERNEL_HEIGHT, int KERNEL_WIDTH,
ActivationFunction_T ACTIVATION, ActivationFunction_T ACTIVATION,
typename Input_T, typename Output_T, typename Input_T, typename Output_T,
typename Weight_T, typename Bias_T, typename Weight_T, typename Bias_T,
typename Rescaling_T> typename Rescaling_T>
__attribute__((always_inline)) inline __attribute__((always_inline)) inline
void convolution_depthwise_forward( void convolution_depthwise_forward(
const Input_T* __restrict inputs, const Input_T* __restrict inputs,
Output_T* __restrict outputs, Output_T* __restrict outputs,
const Weight_T* __restrict weights, const Weight_T* __restrict weights,
const Bias_T* __restrict biases, const Bias_T* __restrict biases,
const Rescaling_T& __restrict rescaling) const Rescaling_T& __restrict rescaling)
{ {
static_assert(NB_OUTPUTS % NB_CHANNELS == 0,
"NB_OUTPUTS should be a multiple of NB_CHANNELS.");
int c_in_g = NB_CHANNELS / GROUPS;
constexpr int DILATED_KERNEL_HEIGHT int c_out_g = NB_OUTPUTS / GROUPS;
= KERNEL_HEIGHT + (DILATION_Y - 1) * (KERNEL_HEIGHT - 1); #pragma omp parallel for
for (int oc = 0; oc < NB_OUTPUTS; oc++) {
constexpr int DILATED_KERNEL_WIDTH int g_oc = oc / c_out_g;
= KERNEL_WIDTH + (DILATION_X - 1) * (KERNEL_WIDTH - 1); #pragma omp parallel for
for (int i = 0; i < OUT_HEIGHT; ++i) {
constexpr int OUTPUTS_HEIGHT_NOPAD #pragma omp parallel for
= (CHANNELS_HEIGHT - DILATION_Y * (KERNEL_HEIGHT - 1) - 1 + STRIDE_Y) / STRIDE_Y; for (int j = 0; j < OUT_WIDTH; ++j) {
constexpr int OUTPUTS_WIDTH_NOPAD Output_T value = biases[oc];
= (CHANNELS_WIDTH - DILATION_X * (KERNEL_WIDTH - 1) - 1 + STRIDE_X) / STRIDE_X; #pragma omp parallel for
for (int ic = g_oc * c_in_g; ic < (g_oc + 1) * c_in_g; ++ic) {
for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) { #pragma omp parallel for
const int syMin = (PADDING_Y == 0) ? 0 for (int m = 0; m < KERNEL_HEIGHT; ++m) {
: max(PADDING_Y - (oy * STRIDE_Y), 0); #pragma omp parallel for
const int syMax = (PADDING_Y == 0 for (int n = 0; n < KERNEL_WIDTH; ++n) {
&& OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? DILATED_KERNEL_HEIGHT int i_p = i * STRIDE_X - PADDING_X + m * DILATION_X;
: clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y), int j_p = j * STRIDE_Y - PADDING_Y + n * DILATION_Y;
0, DILATED_KERNEL_HEIGHT); if (i_p >= 0 && i_p < IN_HEIGHT && j_p >= 0 && j_p < IN_WIDTH) {
const int iy = (oy * STRIDE_Y) - PADDING_Y; value += weights[inds_pos(oc, ic % c_in_g, m, n, NB_OUTPUTS, c_in_g, KERNEL_HEIGHT, KERNEL_WIDTH)] *
inputs[inds_pos(ic, i_p, j_p, NB_CHANNELS, IN_HEIGHT, IN_WIDTH)];
#ifdef _OPENMP }
#pragma omp parallel for collapse(2) }
#endif }
for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) { }
for (int output = 0; output < NB_OUTPUTS; ++output) { outputs[inds_pos(oc, i, j, NB_OUTPUTS, OUT_HEIGHT, OUT_WIDTH)] = activation_forward_value<Output_T>(value, oc, ACTIVATION, rescaling);
// moved to inner loop for collapsing --> }
const int sxMin = (PADDING_X == 0) ? 0 }
: max(PADDING_X - (ox * STRIDE_X), 0);
const int sxMax = (PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
? DILATED_KERNEL_WIDTH
: clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X),
0, DILATED_KERNEL_WIDTH);
const int ix = (ox * STRIDE_X) - PADDING_X;
const int oPos = (ox + OUTPUTS_WIDTH * oy);
const int oOffset = NB_OUTPUTS * oPos;
// <--
const int channel = (output * NB_CHANNELS) / NB_OUTPUTS;
Bias_T weightedSum = biases ? biases[output] : 0;
for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) {
if ((PADDING_Y != 0
|| OUTPUTS_HEIGHT != OUTPUTS_HEIGHT_NOPAD)
&& ((sy*DILATION_Y < syMin) || (sy*DILATION_Y >= syMax)))
{
continue;
}
const int iPos = ix + CHANNELS_WIDTH * (iy + sy*DILATION_Y);
const int iOffset = NB_CHANNELS * iPos;
const int wOffset = (output*KERNEL_HEIGHT + sy)
* KERNEL_WIDTH;
if (DILATION_X == 1 && ((PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
|| sxMax - sxMin == KERNEL_WIDTH))
{
macsOnRange<KERNEL_WIDTH, NB_CHANNELS>(
inputs + iOffset + channel,
weights + wOffset,
weightedSum);
}
else {
for (int sx = 0; sx < KERNEL_WIDTH; ++sx) {
if ((PADDING_X != 0
|| OUTPUTS_WIDTH != OUTPUTS_WIDTH_NOPAD)
&& ((sx*DILATION_X < sxMin) || (sx*DILATION_X >= sxMax)))
{
continue;
}
const int iOffsetInRange = iOffset
+ sx * DILATION_X * NB_CHANNELS;
weightedSum += inputs[iOffsetInRange + channel]
* weights[wOffset + sx];
}
}
}
outputs[oOffset + output] = activation_forward_value<Output_T>(weightedSum, output, ACTIVATION, rescaling);
}
}
} }
} }
// Template specialization when biases are not given to the convolution
template<int NB_CHANNELS,
int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
int NB_OUTPUTS,
int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
int PADDING_Y, int PADDING_X,
int STRIDE_Y, int STRIDE_X,
int DILATION_Y, int DILATION_X,
int KERNEL_HEIGHT, int KERNEL_WIDTH,
ActivationFunction_T ACTIVATION,
typename Input_T, typename Output_T,
typename Weight_T,
typename Rescaling_T>
__attribute__((always_inline)) inline
void convolution_depthwise_forward(
const Input_T* __restrict inputs,
Output_T* __restrict outputs,
const Weight_T* __restrict weights,
std::nullptr_t __restrict,
const Rescaling_T& __restrict rescaling)
{
const float* b = nullptr;
convolution_depthwise_forward<NB_CHANNELS,
CHANNELS_HEIGHT,
CHANNELS_WIDTH,
NB_OUTPUTS,
OUTPUTS_HEIGHT,
OUTPUTS_WIDTH,
PADDING_Y,
PADDING_X,
STRIDE_Y,
STRIDE_X,
DILATION_Y,
DILATION_X,
KERNEL_HEIGHT,
KERNEL_WIDTH,
ACTIVATION>
(inputs, outputs, weights, b, rescaling);
}
#endif // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION_DEPTHWISE__ #endif // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION_DEPTHWISE__
...@@ -16,6 +16,9 @@ class Conv(ExportNodeCpp): ...@@ -16,6 +16,9 @@ class Conv(ExportNodeCpp):
self.attributes["rescaling"] = "NoScaling" self.attributes["rescaling"] = "NoScaling"
self.attributes["shift_value"] = 0 self.attributes["shift_value"] = 0
## Groups
self.attributes["groups"] = 1
# Browse the metaop to update kernel attributes # Browse the metaop to update kernel attributes
ConvNode = get_node_from_metaop(node, "Conv2D") ConvNode = get_node_from_metaop(node, "Conv2D")
self.attributes["kernel_dims"] = ConvNode[0].get_operator().attr.kernel_dims self.attributes["kernel_dims"] = ConvNode[0].get_operator().attr.kernel_dims
......
...@@ -17,6 +17,9 @@ class ConvDw(ExportNodeCpp): ...@@ -17,6 +17,9 @@ class ConvDw(ExportNodeCpp):
self.attributes["rescaling"] = "NoScaling" self.attributes["rescaling"] = "NoScaling"
self.attributes["shift_value"] = 0 self.attributes["shift_value"] = 0
## Groups
self.attributes["groups"] = self.attributes["out_chan"][0]
# Browse the metaop to update kernel attributes # Browse the metaop to update kernel attributes
ConvDwNode = get_node_from_metaop(node, "ConvDepthWise2D") ConvDwNode = get_node_from_metaop(node, "ConvDepthWise2D")
self.attributes["kernel_dims"] = ConvDwNode[0].get_operator().attr.kernel_dims self.attributes["kernel_dims"] = ConvDwNode[0].get_operator().attr.kernel_dims
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
{# For layer configuration -#} {# For layer configuration -#}
{% include "./_def_io.jinja" %} {% include "./_def_io.jinja" %}
{% include "./_meminfo.jinja" %} {% include "./_meminfo.jinja" %}
#define {{ name|upper }}_GROUPS {{ groups }}
#define {{ name|upper }}_PADDING_Y {{ padding[0] }} #define {{ name|upper }}_PADDING_Y {{ padding[0] }}
#define {{ name|upper }}_PADDING_X {{ padding[1] }} #define {{ name|upper }}_PADDING_X {{ padding[1] }}
#define {{ name|upper }}_STRIDE_Y {{ stride_dims[0] }} #define {{ name|upper }}_STRIDE_Y {{ stride_dims[0] }}
......
...@@ -4,6 +4,7 @@ convolution{{ "_depthwise" if depthwise is defined else "" }}_forward<{{ in_name ...@@ -4,6 +4,7 @@ convolution{{ "_depthwise" if depthwise is defined else "" }}_forward<{{ in_name
{{ in_name[0]|upper }}_IN_HEIGHT, {{ in_name[0]|upper }}_IN_HEIGHT,
{{ in_name[0]|upper }}_IN_WIDTH, {{ in_name[0]|upper }}_IN_WIDTH,
{{ out_name[0]|upper }}_NB_OUTPUTS, {{ out_name[0]|upper }}_NB_OUTPUTS,
{{name|upper}}_GROUPS,
{{ out_name[0]|upper }}_OUT_HEIGHT, {{ out_name[0]|upper }}_OUT_HEIGHT,
{{ out_name[0]|upper }}_OUT_WIDTH, {{ out_name[0]|upper }}_OUT_WIDTH,
{{name|upper}}_PADDING_Y, {{name|upper}}_PADDING_Y,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment