Skip to content
Snippets Groups Projects
Commit 8eaf4766 authored by Axel Farrugia's avatar Axel Farrugia
Browse files

[Refactor] Adapt ConvDw to the new export system

parent 573425cf
No related branches found
No related tags found
No related merge requests found
Pipeline #71885 failed
......@@ -27,6 +27,12 @@ def cpp_fuse_to_metaops(graph_view: aidge_core.GraphView):
"ConvAct": "(QConv|Conv2D)->ReLU",
"PadConvAct": "PadConv->ReLU",
# ConvDw
"QConvDw": "ConvDepthWise2D->(Quantizer|QMul)",
"ConvDwPad": "(QConvDw|ConvDepthWise2D)->Pad2D",
"ConvDwAct": "(QConvDw|ConvConvDepthWise2D2D)->ReLU",
"ConvDwActPad": "ConvDwAct->Pad2D",
# Max Pooling
"PadMaxPool": "MaxPooling2D<-Pad2D",
"MaxPoolAct": "MaxPooling2D->ReLU",
......
......@@ -159,158 +159,4 @@ void convolution_forward(
(inputs, outputs, weights, b, rescaling);
}
template<int NB_CHANNELS,
int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
int NB_OUTPUTS,
int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
int PADDING_Y, int PADDING_X,
int STRIDE_Y, int STRIDE_X,
int DILATION_Y, int DILATION_X,
int KERNEL_HEIGHT, int KERNEL_WIDTH,
ActivationFunction_T ACTIVATION,
typename Input_T, typename Output_T,
typename Weight_T, typename Bias_T,
typename Rescaling_T>
__attribute__((always_inline)) inline
void convolution_depthwise_forward(
const Input_T* __restrict inputs,
Output_T* __restrict outputs,
const Weight_T* __restrict weights,
const Bias_T* __restrict biases,
const Rescaling_T& __restrict rescaling)
{
static_assert(NB_OUTPUTS % NB_CHANNELS == 0,
"NB_OUTPUTS should be a multiple of NB_CHANNELS.");
constexpr int DILATED_KERNEL_HEIGHT
= KERNEL_HEIGHT + (DILATION_Y - 1) * (KERNEL_HEIGHT - 1);
constexpr int DILATED_KERNEL_WIDTH
= KERNEL_WIDTH + (DILATION_X - 1) * (KERNEL_WIDTH - 1);
constexpr int OUTPUTS_HEIGHT_NOPAD
= (CHANNELS_HEIGHT - DILATION_Y * (KERNEL_HEIGHT - 1) - 1 + STRIDE_Y) / STRIDE_Y;
constexpr int OUTPUTS_WIDTH_NOPAD
= (CHANNELS_WIDTH - DILATION_X * (KERNEL_WIDTH - 1) - 1 + STRIDE_X) / STRIDE_X;
for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) {
const int syMin = (PADDING_Y == 0) ? 0
: max(PADDING_Y - (oy * STRIDE_Y), 0);
const int syMax = (PADDING_Y == 0
&& OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? DILATED_KERNEL_HEIGHT
: clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y),
0, DILATED_KERNEL_HEIGHT);
const int iy = (oy * STRIDE_Y) - PADDING_Y;
#ifdef _OPENMP
#pragma omp parallel for collapse(2)
#endif
for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) {
for (int output = 0; output < NB_OUTPUTS; ++output) {
// moved to inner loop for collapsing -->
const int sxMin = (PADDING_X == 0) ? 0
: max(PADDING_X - (ox * STRIDE_X), 0);
const int sxMax = (PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
? DILATED_KERNEL_WIDTH
: clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X),
0, DILATED_KERNEL_WIDTH);
const int ix = (ox * STRIDE_X) - PADDING_X;
const int oPos = (ox + OUTPUTS_WIDTH * oy);
const int oOffset = NB_OUTPUTS * oPos;
// <--
const int channel = (output * NB_CHANNELS) / NB_OUTPUTS;
Bias_T weightedSum = biases ? biases[output] : 0;
for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) {
if ((PADDING_Y != 0
|| OUTPUTS_HEIGHT != OUTPUTS_HEIGHT_NOPAD)
&& ((sy*DILATION_Y < syMin) || (sy*DILATION_Y >= syMax)))
{
continue;
}
const int iPos = ix + CHANNELS_WIDTH * (iy + sy*DILATION_Y);
const int iOffset = NB_CHANNELS * iPos;
const int wOffset = (output*KERNEL_HEIGHT + sy)
* KERNEL_WIDTH;
if (DILATION_X == 1 && ((PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
|| sxMax - sxMin == KERNEL_WIDTH))
{
macsOnRange<KERNEL_WIDTH, NB_CHANNELS>(
inputs + iOffset + channel,
weights + wOffset,
weightedSum);
}
else {
for (int sx = 0; sx < KERNEL_WIDTH; ++sx) {
if ((PADDING_X != 0
|| OUTPUTS_WIDTH != OUTPUTS_WIDTH_NOPAD)
&& ((sx*DILATION_X < sxMin) || (sx*DILATION_X >= sxMax)))
{
continue;
}
const int iOffsetInRange = iOffset
+ sx * DILATION_X * NB_CHANNELS;
weightedSum += inputs[iOffsetInRange + channel]
* weights[wOffset + sx];
}
}
}
outputs[oOffset + output] = activation_forward_value<Output_T>(weightedSum, output, ACTIVATION, rescaling);
}
}
}
}
// Template specialization when biases are not given to the convolution
template<int NB_CHANNELS,
int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
int NB_OUTPUTS,
int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
int PADDING_Y, int PADDING_X,
int STRIDE_Y, int STRIDE_X,
int DILATION_Y, int DILATION_X,
int KERNEL_HEIGHT, int KERNEL_WIDTH,
ActivationFunction_T ACTIVATION,
typename Input_T, typename Output_T,
typename Weight_T,
typename Rescaling_T>
__attribute__((always_inline)) inline
void convolution_depthwise_forward(
const Input_T* __restrict inputs,
Output_T* __restrict outputs,
const Weight_T* __restrict weights,
std::nullptr_t __restrict,
const Rescaling_T& __restrict rescaling)
{
const float* b = nullptr;
convolution_depthwise_forward<NB_CHANNELS,
CHANNELS_HEIGHT,
CHANNELS_WIDTH,
NB_OUTPUTS,
OUTPUTS_HEIGHT,
OUTPUTS_WIDTH,
PADDING_Y,
PADDING_X,
STRIDE_Y,
STRIDE_X,
DILATION_Y,
DILATION_X,
KERNEL_HEIGHT,
KERNEL_WIDTH,
ACTIVATION>
(inputs, outputs, weights, b, rescaling);
}
#endif // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__
#ifndef __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION_DEPTHWISE__
#define __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION_DEPTHWISE__
#include "network/typedefs.hpp"
#include "network/rescaling_utils.hpp"
#include "network/utils.hpp"
#include "network/macs.hpp"
#include "network/activation_utils.hpp"
template<int NB_CHANNELS,
int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
int NB_OUTPUTS,
int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
int PADDING_Y, int PADDING_X,
int STRIDE_Y, int STRIDE_X,
int DILATION_Y, int DILATION_X,
int KERNEL_HEIGHT, int KERNEL_WIDTH,
ActivationFunction_T ACTIVATION,
typename Input_T, typename Output_T,
typename Weight_T, typename Bias_T,
typename Rescaling_T>
__attribute__((always_inline)) inline
void convolution_depthwise_forward(
const Input_T* __restrict inputs,
Output_T* __restrict outputs,
const Weight_T* __restrict weights,
const Bias_T* __restrict biases,
const Rescaling_T& __restrict rescaling)
{
static_assert(NB_OUTPUTS % NB_CHANNELS == 0,
"NB_OUTPUTS should be a multiple of NB_CHANNELS.");
constexpr int DILATED_KERNEL_HEIGHT
= KERNEL_HEIGHT + (DILATION_Y - 1) * (KERNEL_HEIGHT - 1);
constexpr int DILATED_KERNEL_WIDTH
= KERNEL_WIDTH + (DILATION_X - 1) * (KERNEL_WIDTH - 1);
constexpr int OUTPUTS_HEIGHT_NOPAD
= (CHANNELS_HEIGHT - DILATION_Y * (KERNEL_HEIGHT - 1) - 1 + STRIDE_Y) / STRIDE_Y;
constexpr int OUTPUTS_WIDTH_NOPAD
= (CHANNELS_WIDTH - DILATION_X * (KERNEL_WIDTH - 1) - 1 + STRIDE_X) / STRIDE_X;
for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) {
const int syMin = (PADDING_Y == 0) ? 0
: max(PADDING_Y - (oy * STRIDE_Y), 0);
const int syMax = (PADDING_Y == 0
&& OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? DILATED_KERNEL_HEIGHT
: clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y),
0, DILATED_KERNEL_HEIGHT);
const int iy = (oy * STRIDE_Y) - PADDING_Y;
#ifdef _OPENMP
#pragma omp parallel for collapse(2)
#endif
for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) {
for (int output = 0; output < NB_OUTPUTS; ++output) {
// moved to inner loop for collapsing -->
const int sxMin = (PADDING_X == 0) ? 0
: max(PADDING_X - (ox * STRIDE_X), 0);
const int sxMax = (PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
? DILATED_KERNEL_WIDTH
: clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X),
0, DILATED_KERNEL_WIDTH);
const int ix = (ox * STRIDE_X) - PADDING_X;
const int oPos = (ox + OUTPUTS_WIDTH * oy);
const int oOffset = NB_OUTPUTS * oPos;
// <--
const int channel = (output * NB_CHANNELS) / NB_OUTPUTS;
Bias_T weightedSum = biases ? biases[output] : 0;
for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) {
if ((PADDING_Y != 0
|| OUTPUTS_HEIGHT != OUTPUTS_HEIGHT_NOPAD)
&& ((sy*DILATION_Y < syMin) || (sy*DILATION_Y >= syMax)))
{
continue;
}
const int iPos = ix + CHANNELS_WIDTH * (iy + sy*DILATION_Y);
const int iOffset = NB_CHANNELS * iPos;
const int wOffset = (output*KERNEL_HEIGHT + sy)
* KERNEL_WIDTH;
if (DILATION_X == 1 && ((PADDING_X == 0
&& OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
|| sxMax - sxMin == KERNEL_WIDTH))
{
macsOnRange<KERNEL_WIDTH, NB_CHANNELS>(
inputs + iOffset + channel,
weights + wOffset,
weightedSum);
}
else {
for (int sx = 0; sx < KERNEL_WIDTH; ++sx) {
if ((PADDING_X != 0
|| OUTPUTS_WIDTH != OUTPUTS_WIDTH_NOPAD)
&& ((sx*DILATION_X < sxMin) || (sx*DILATION_X >= sxMax)))
{
continue;
}
const int iOffsetInRange = iOffset
+ sx * DILATION_X * NB_CHANNELS;
weightedSum += inputs[iOffsetInRange + channel]
* weights[wOffset + sx];
}
}
}
outputs[oOffset + output] = activation_forward_value<Output_T>(weightedSum, output, ACTIVATION, rescaling);
}
}
}
}
// Template specialization when biases are not given to the convolution
template<int NB_CHANNELS,
int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
int NB_OUTPUTS,
int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
int PADDING_Y, int PADDING_X,
int STRIDE_Y, int STRIDE_X,
int DILATION_Y, int DILATION_X,
int KERNEL_HEIGHT, int KERNEL_WIDTH,
ActivationFunction_T ACTIVATION,
typename Input_T, typename Output_T,
typename Weight_T,
typename Rescaling_T>
__attribute__((always_inline)) inline
void convolution_depthwise_forward(
const Input_T* __restrict inputs,
Output_T* __restrict outputs,
const Weight_T* __restrict weights,
std::nullptr_t __restrict,
const Rescaling_T& __restrict rescaling)
{
const float* b = nullptr;
convolution_depthwise_forward<NB_CHANNELS,
CHANNELS_HEIGHT,
CHANNELS_WIDTH,
NB_OUTPUTS,
OUTPUTS_HEIGHT,
OUTPUTS_WIDTH,
PADDING_Y,
PADDING_X,
STRIDE_Y,
STRIDE_X,
DILATION_Y,
DILATION_X,
KERNEL_HEIGHT,
KERNEL_WIDTH,
ACTIVATION>
(inputs, outputs, weights, b, rescaling);
}
#endif // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION_DEPTHWISE__
import aidge_core
from aidge_core.export_utils import ExportNodeCpp, get_node_from_metaop
from aidge_export_cpp import ROOT, ExportLibCpp, set_scaling_attributes
@ExportLibCpp.register("ConvDw2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
class ConvDw(ExportNodeCpp):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
# Initialize kernel attributes
self.attributes["padding"] = [0, 0, 0, 0]
self.attributes["activation"] = "Linear"
self.attributes["depthwise"] = True
self.attributes["aidge_cmp"] = node.attributes().has_attr("aidge_cmp")
## Scaling
self.attributes["rescaling"] = "NoScaling"
self.attributes["shift_value"] = 0
# Browse the metaop to update kernel attributes
ConvDwNode = get_node_from_metaop(node, "ConvDw2D")
self.attributes["kernel_dims"] = ConvDwNode[0].get_operator().attr.kernel_dims
self.attributes["stride_dims"] = ConvDwNode[0].get_operator().attr.stride_dims
self.attributes["dilation_dims"] = ConvDwNode[0].get_operator().attr.dilation_dims
# Template for layer configutation file generation
self.config_template = str(ROOT / "templates" / "configuration" / "convolution_config.jinja")
# Template layer call function generation within the forward file
self.forward_template = str(ROOT / "templates" / "kernel_forward" / "convolution_forward.jinja")
# Files to include within the generated forward.cpp file
self.include_list = []
# Path to the kernel(s) files to copy
self.add_kernel_to_copy(ROOT / "kernels" / "convolution_depthwise.hpp")
self.add_kernel_to_copy(ROOT / "static" / "macs.hpp", "include/network", fwd_include=False)
# Include aidge outputs within the fwd file
if self.attributes["aidge_cmp"]:
self.include_list.append("network/utils.hpp") # aidge_cmp function
self.include_list.append("data/aidge_outputs/" + node.name() + ".hpp")
@ExportLibCpp.register_metaop("QConvDw", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
class QConvDw(ConvDw):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
# Look for Quantizer node and set shift and coef export node attributes
set_scaling_attributes(self, node)
## Set the scaling type
if self.attributes["shift_value"] != 0:
self.attributes["rescaling"] = "SingleShiftScaling"
@ExportLibCpp.register_metaop("PadConvDw", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
class PadConvDw(QConvDw):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
# Browse the metaop to update kernel attributes
PadNode = get_node_from_metaop(node, "Pad2D")
self.attributes["padding"] = PadNode[0].get_operator().attr.begin_end_borders
@ExportLibCpp.register_metaop("ConvDwAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
class ConvDwAct(QConvDw):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
# Browse the metaop to update kernel attributes
if get_node_from_metaop(node, "ReLU"):
self.attributes["activation"] = "Rectifier"
else:
aidge_core.Log.error(f"{node.type()} activation is not yet supported.")
@ExportLibCpp.register_metaop("PadConvDwAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
class PadConvDwAct(PadConvDw, ConvDwAct):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
......@@ -17,7 +17,8 @@
{% include "./_rescaling.jinja" %}
{#- Calculate sizes #}
{%- set weights_size = out_chan[0] * in_chan[0] * kernel_dims[1] * kernel_dims[0] %}
{%- set weights_size = out_chan[0] * kernel_dims[1] * kernel_dims[0] if depthwise is defined
else out_chan[0] * in_chan[0] * kernel_dims[1] * kernel_dims[0] %}
#define {{ name|upper }}_WEIGHTS_SIZE {{ weights_size }}
#define {{ name|upper }}_BIASES_SIZE {{ out_chan[0] }}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment