[Refactor] Adapt ConvDw to the new export system

8eaf4766 · Axel Farrugia · 573425cf · 8eaf4766 · 8eaf4766 · 8eaf4766
Commit 8eaf4766 authored 1 month ago by Axel Farrugia
--- a/aidge_export_cpp/export_utils.py
+++ b/aidge_export_cpp/export_utils.py
@@ -27,6 +27,12 @@ def cpp_fuse_to_metaops(graph_view: aidge_core.GraphView):
        "ConvAct":        "(QConv|Conv2D)->ReLU",
        "PadConvAct":     "PadConv->ReLU",

+        # ConvDw
+        "QConvDw":          "ConvDepthWise2D->(Quantizer|QMul)",
+        "ConvDwPad":        "(QConvDw|ConvDepthWise2D)->Pad2D",
+        "ConvDwAct":        "(QConvDw|ConvConvDepthWise2D2D)->ReLU",
+        "ConvDwActPad":     "ConvDwAct->Pad2D",
+
        # Max Pooling
        "PadMaxPool":     "MaxPooling2D<-Pad2D",
        "MaxPoolAct":     "MaxPooling2D->ReLU",

--- a/aidge_export_cpp/kernels/convolution.hpp
+++ b/aidge_export_cpp/kernels/convolution.hpp
@@ -159,158 +159,4 @@ void convolution_forward(
                        (inputs, outputs, weights, b, rescaling);
 }

-template<int NB_CHANNELS, 
-         int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
-         int NB_OUTPUTS,
-         int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
-         int PADDING_Y, int PADDING_X,
-         int STRIDE_Y, int STRIDE_X,
-         int DILATION_Y, int DILATION_X,
-         int KERNEL_HEIGHT, int KERNEL_WIDTH,
-         ActivationFunction_T ACTIVATION,
-         typename Input_T, typename Output_T,
-         typename Weight_T, typename Bias_T,
-         typename Rescaling_T>
-__attribute__((always_inline)) inline
-void convolution_depthwise_forward(
-    const Input_T* __restrict inputs,
-    Output_T* __restrict outputs,
-    const Weight_T* __restrict weights,
-    const Bias_T* __restrict biases,
-    const Rescaling_T& __restrict rescaling)
-{
-    static_assert(NB_OUTPUTS % NB_CHANNELS == 0,
-        "NB_OUTPUTS should be a multiple of NB_CHANNELS.");
-
-    constexpr int DILATED_KERNEL_HEIGHT 
-            = KERNEL_HEIGHT + (DILATION_Y - 1) * (KERNEL_HEIGHT - 1);
-
-    constexpr int DILATED_KERNEL_WIDTH 
-            = KERNEL_WIDTH + (DILATION_X - 1) * (KERNEL_WIDTH - 1);
-
-    constexpr int OUTPUTS_HEIGHT_NOPAD
-        = (CHANNELS_HEIGHT - DILATION_Y * (KERNEL_HEIGHT - 1) - 1 + STRIDE_Y) / STRIDE_Y;
-    constexpr int OUTPUTS_WIDTH_NOPAD
-        = (CHANNELS_WIDTH - DILATION_X * (KERNEL_WIDTH - 1) - 1 + STRIDE_X) / STRIDE_X;
-
-    for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) {
-        const int syMin = (PADDING_Y == 0) ? 0
-            : max(PADDING_Y - (oy * STRIDE_Y), 0);
-        const int syMax = (PADDING_Y == 0
-                && OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? DILATED_KERNEL_HEIGHT
-            : clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y), 
-                    0, DILATED_KERNEL_HEIGHT);
-        const int iy = (oy * STRIDE_Y) - PADDING_Y;
-
-#ifdef _OPENMP
-#pragma omp parallel for collapse(2)
-#endif
-        for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) {
-            for (int output = 0; output < NB_OUTPUTS; ++output) {
-                // moved to inner loop for collapsing -->
-                const int sxMin = (PADDING_X == 0) ? 0
-                    : max(PADDING_X - (ox * STRIDE_X), 0);
-                const int sxMax = (PADDING_X == 0
-                        && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
-                            ? DILATED_KERNEL_WIDTH
-                    : clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X), 
-                            0, DILATED_KERNEL_WIDTH);
-                const int ix = (ox * STRIDE_X) - PADDING_X;
-
-                const int oPos = (ox + OUTPUTS_WIDTH * oy);
-                const int oOffset = NB_OUTPUTS * oPos;
-                // <--
-
-                const int channel = (output * NB_CHANNELS) / NB_OUTPUTS;
-
-                Bias_T weightedSum = biases ? biases[output] : 0;
-
-                for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) {
-                    if ((PADDING_Y != 0
-                            || OUTPUTS_HEIGHT != OUTPUTS_HEIGHT_NOPAD)
-                        && ((sy*DILATION_Y < syMin) || (sy*DILATION_Y >= syMax)))
-                    {
-                        continue;
-                    }
-
-                    const int iPos = ix + CHANNELS_WIDTH * (iy + sy*DILATION_Y);
-                    const int iOffset = NB_CHANNELS * iPos;
-
-                    const int wOffset = (output*KERNEL_HEIGHT + sy) 
-                                        * KERNEL_WIDTH;
-
-                    if (DILATION_X == 1 && ((PADDING_X == 0
-                            && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
-                        || sxMax - sxMin == KERNEL_WIDTH))
-                    {
-                        macsOnRange<KERNEL_WIDTH, NB_CHANNELS>(
-                            inputs + iOffset + channel, 
-                            weights + wOffset, 
-                            weightedSum);
-                    }
-                    else {
-                        for (int sx = 0; sx < KERNEL_WIDTH; ++sx) {
-                            if ((PADDING_X != 0
-                                    || OUTPUTS_WIDTH != OUTPUTS_WIDTH_NOPAD)
-                                && ((sx*DILATION_X < sxMin) || (sx*DILATION_X >= sxMax)))
-                            {
-                                continue;
-                            }
-
-                            const int iOffsetInRange = iOffset
-                                + sx * DILATION_X * NB_CHANNELS;
-
-                            weightedSum += inputs[iOffsetInRange + channel]
-                                * weights[wOffset + sx];
-                        }
-                    }
-                }
-
-                outputs[oOffset + output] = activation_forward_value<Output_T>(weightedSum, output, ACTIVATION, rescaling);
-            }
-        }
-    }
-}
-
-// Template specialization when biases are not given to the convolution
-template<int NB_CHANNELS,
-         int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
-         int NB_OUTPUTS,
-         int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
-         int PADDING_Y, int PADDING_X,
-         int STRIDE_Y, int STRIDE_X,
-         int DILATION_Y, int DILATION_X,
-         int KERNEL_HEIGHT, int KERNEL_WIDTH,
-         ActivationFunction_T ACTIVATION,
-         typename Input_T, typename Output_T,
-         typename Weight_T,
-         typename Rescaling_T>
-__attribute__((always_inline)) inline
-void convolution_depthwise_forward(
-    const Input_T* __restrict inputs,
-    Output_T* __restrict outputs,
-    const Weight_T* __restrict weights,
-    std::nullptr_t __restrict,
-    const Rescaling_T& __restrict rescaling)
-{
-    const float* b = nullptr;
-
-    convolution_depthwise_forward<NB_CHANNELS,
-                        CHANNELS_HEIGHT,
-                        CHANNELS_WIDTH,
-                        NB_OUTPUTS,
-                        OUTPUTS_HEIGHT,
-                        OUTPUTS_WIDTH,
-                        PADDING_Y,
-                        PADDING_X,
-                        STRIDE_Y,
-                        STRIDE_X,
-                        DILATION_Y,
-                        DILATION_X,
-                        KERNEL_HEIGHT,
-                        KERNEL_WIDTH,
-                        ACTIVATION>
-                        (inputs, outputs, weights, b, rescaling);
-}
-
 #endif  // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__
--- a/aidge_export_cpp/kernels/convolution_depthwise.hpp
+++ b/aidge_export_cpp/kernels/convolution_depthwise.hpp
+#ifndef __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION_DEPTHWISE__
+#define __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION_DEPTHWISE__
+
+#include "network/typedefs.hpp"
+#include "network/rescaling_utils.hpp"
+#include "network/utils.hpp"
+#include "network/macs.hpp"
+#include "network/activation_utils.hpp"
+
+template<int NB_CHANNELS, 
+         int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
+         int NB_OUTPUTS,
+         int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
+         int PADDING_Y, int PADDING_X,
+         int STRIDE_Y, int STRIDE_X,
+         int DILATION_Y, int DILATION_X,
+         int KERNEL_HEIGHT, int KERNEL_WIDTH,
+         ActivationFunction_T ACTIVATION,
+         typename Input_T, typename Output_T,
+         typename Weight_T, typename Bias_T,
+         typename Rescaling_T>
+__attribute__((always_inline)) inline
+void convolution_depthwise_forward(
+    const Input_T* __restrict inputs,
+    Output_T* __restrict outputs,
+    const Weight_T* __restrict weights,
+    const Bias_T* __restrict biases,
+    const Rescaling_T& __restrict rescaling)
+{
+    static_assert(NB_OUTPUTS % NB_CHANNELS == 0,
+        "NB_OUTPUTS should be a multiple of NB_CHANNELS.");
+
+    constexpr int DILATED_KERNEL_HEIGHT 
+            = KERNEL_HEIGHT + (DILATION_Y - 1) * (KERNEL_HEIGHT - 1);
+
+    constexpr int DILATED_KERNEL_WIDTH 
+            = KERNEL_WIDTH + (DILATION_X - 1) * (KERNEL_WIDTH - 1);
+
+    constexpr int OUTPUTS_HEIGHT_NOPAD
+        = (CHANNELS_HEIGHT - DILATION_Y * (KERNEL_HEIGHT - 1) - 1 + STRIDE_Y) / STRIDE_Y;
+    constexpr int OUTPUTS_WIDTH_NOPAD
+        = (CHANNELS_WIDTH - DILATION_X * (KERNEL_WIDTH - 1) - 1 + STRIDE_X) / STRIDE_X;
+
+    for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) {
+        const int syMin = (PADDING_Y == 0) ? 0
+            : max(PADDING_Y - (oy * STRIDE_Y), 0);
+        const int syMax = (PADDING_Y == 0
+                && OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? DILATED_KERNEL_HEIGHT
+            : clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y), 
+                    0, DILATED_KERNEL_HEIGHT);
+        const int iy = (oy * STRIDE_Y) - PADDING_Y;
+
+#ifdef _OPENMP
+#pragma omp parallel for collapse(2)
+#endif
+        for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) {
+            for (int output = 0; output < NB_OUTPUTS; ++output) {
+                // moved to inner loop for collapsing -->
+                const int sxMin = (PADDING_X == 0) ? 0
+                    : max(PADDING_X - (ox * STRIDE_X), 0);
+                const int sxMax = (PADDING_X == 0
+                        && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
+                            ? DILATED_KERNEL_WIDTH
+                    : clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X), 
+                            0, DILATED_KERNEL_WIDTH);
+                const int ix = (ox * STRIDE_X) - PADDING_X;
+
+                const int oPos = (ox + OUTPUTS_WIDTH * oy);
+                const int oOffset = NB_OUTPUTS * oPos;
+                // <--
+
+                const int channel = (output * NB_CHANNELS) / NB_OUTPUTS;
+
+                Bias_T weightedSum = biases ? biases[output] : 0;
+
+                for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) {
+                    if ((PADDING_Y != 0
+                            || OUTPUTS_HEIGHT != OUTPUTS_HEIGHT_NOPAD)
+                        && ((sy*DILATION_Y < syMin) || (sy*DILATION_Y >= syMax)))
+                    {
+                        continue;
+                    }
+
+                    const int iPos = ix + CHANNELS_WIDTH * (iy + sy*DILATION_Y);
+                    const int iOffset = NB_CHANNELS * iPos;
+
+                    const int wOffset = (output*KERNEL_HEIGHT + sy) 
+                                        * KERNEL_WIDTH;
+
+                    if (DILATION_X == 1 && ((PADDING_X == 0
+                            && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
+                        || sxMax - sxMin == KERNEL_WIDTH))
+                    {
+                        macsOnRange<KERNEL_WIDTH, NB_CHANNELS>(
+                            inputs + iOffset + channel, 
+                            weights + wOffset, 
+                            weightedSum);
+                    }
+                    else {
+                        for (int sx = 0; sx < KERNEL_WIDTH; ++sx) {
+                            if ((PADDING_X != 0
+                                    || OUTPUTS_WIDTH != OUTPUTS_WIDTH_NOPAD)
+                                && ((sx*DILATION_X < sxMin) || (sx*DILATION_X >= sxMax)))
+                            {
+                                continue;
+                            }
+
+                            const int iOffsetInRange = iOffset
+                                + sx * DILATION_X * NB_CHANNELS;
+
+                            weightedSum += inputs[iOffsetInRange + channel]
+                                * weights[wOffset + sx];
+                        }
+                    }
+                }
+
+                outputs[oOffset + output] = activation_forward_value<Output_T>(weightedSum, output, ACTIVATION, rescaling);
+            }
+        }
+    }
+}
+
+// Template specialization when biases are not given to the convolution
+template<int NB_CHANNELS,
+         int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
+         int NB_OUTPUTS,
+         int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
+         int PADDING_Y, int PADDING_X,
+         int STRIDE_Y, int STRIDE_X,
+         int DILATION_Y, int DILATION_X,
+         int KERNEL_HEIGHT, int KERNEL_WIDTH,
+         ActivationFunction_T ACTIVATION,
+         typename Input_T, typename Output_T,
+         typename Weight_T,
+         typename Rescaling_T>
+__attribute__((always_inline)) inline
+void convolution_depthwise_forward(
+    const Input_T* __restrict inputs,
+    Output_T* __restrict outputs,
+    const Weight_T* __restrict weights,
+    std::nullptr_t __restrict,
+    const Rescaling_T& __restrict rescaling)
+{
+    const float* b = nullptr;
+
+    convolution_depthwise_forward<NB_CHANNELS,
+                        CHANNELS_HEIGHT,
+                        CHANNELS_WIDTH,
+                        NB_OUTPUTS,
+                        OUTPUTS_HEIGHT,
+                        OUTPUTS_WIDTH,
+                        PADDING_Y,
+                        PADDING_X,
+                        STRIDE_Y,
+                        STRIDE_X,
+                        DILATION_Y,
+                        DILATION_X,
+                        KERNEL_HEIGHT,
+                        KERNEL_WIDTH,
+                        ACTIVATION>
+                        (inputs, outputs, weights, b, rescaling);
+}
+
+#endif  // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION_DEPTHWISE__
--- a/aidge_export_cpp/operators/ConvDw.py
+++ b/aidge_export_cpp/operators/ConvDw.py
+import aidge_core
+from aidge_core.export_utils import ExportNodeCpp, get_node_from_metaop
+from aidge_export_cpp import ROOT, ExportLibCpp, set_scaling_attributes
+
+@ExportLibCpp.register("ConvDw2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
+class ConvDw(ExportNodeCpp):
+    def __init__(self, node, mem_info):
+        super().__init__(node, mem_info)
+
+        # Initialize kernel attributes
+        self.attributes["padding"] = [0, 0, 0, 0]
+        self.attributes["activation"] = "Linear"
+        self.attributes["depthwise"] = True
+        self.attributes["aidge_cmp"] = node.attributes().has_attr("aidge_cmp")
+
+        ## Scaling
+        self.attributes["rescaling"] = "NoScaling"
+        self.attributes["shift_value"] = 0
+
+        # Browse the metaop to update kernel attributes
+        ConvDwNode = get_node_from_metaop(node, "ConvDw2D") 
+        self.attributes["kernel_dims"] = ConvDwNode[0].get_operator().attr.kernel_dims
+        self.attributes["stride_dims"] = ConvDwNode[0].get_operator().attr.stride_dims
+        self.attributes["dilation_dims"] = ConvDwNode[0].get_operator().attr.dilation_dims
+
+        # Template for layer configutation file generation
+        self.config_template = str(ROOT / "templates" / "configuration" / "convolution_config.jinja")
+        
+        # Template layer call function generation within the forward file
+        self.forward_template = str(ROOT / "templates" / "kernel_forward" / "convolution_forward.jinja")
+        
+        # Files to include within the generated forward.cpp file
+        self.include_list = []
+        
+        # Path to the kernel(s) files to copy
+        self.add_kernel_to_copy(ROOT / "kernels" / "convolution_depthwise.hpp")
+        self.add_kernel_to_copy(ROOT / "static" / "macs.hpp", "include/network", fwd_include=False)
+        
+        # Include aidge outputs within the fwd file
+        if self.attributes["aidge_cmp"]:
+            self.include_list.append("network/utils.hpp")   # aidge_cmp function
+            self.include_list.append("data/aidge_outputs/" + node.name() + ".hpp") 
+
+
+@ExportLibCpp.register_metaop("QConvDw", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
+class QConvDw(ConvDw):
+    def __init__(self, node, mem_info):
+        super().__init__(node, mem_info)
+
+        # Look for Quantizer node and set shift and coef export node attributes
+        set_scaling_attributes(self, node)
+
+        ## Set the scaling type
+        if self.attributes["shift_value"] != 0:
+            self.attributes["rescaling"] = "SingleShiftScaling"
+
+
+@ExportLibCpp.register_metaop("PadConvDw", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
+class PadConvDw(QConvDw):
+    def __init__(self, node, mem_info):
+        super().__init__(node, mem_info)
+
+        # Browse the metaop to update kernel attributes
+        PadNode = get_node_from_metaop(node, "Pad2D")
+        self.attributes["padding"] = PadNode[0].get_operator().attr.begin_end_borders
+
+
+@ExportLibCpp.register_metaop("ConvDwAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
+class ConvDwAct(QConvDw):
+    def __init__(self, node, mem_info):
+        super().__init__(node, mem_info)
+
+        # Browse the metaop to update kernel attributes
+        if get_node_from_metaop(node, "ReLU"):
+            self.attributes["activation"] = "Rectifier"
+        else:
+            aidge_core.Log.error(f"{node.type()} activation is not yet supported.")
+
+@ExportLibCpp.register_metaop("PadConvDwAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
+class PadConvDwAct(PadConvDw, ConvDwAct):
+    def __init__(self, node, mem_info):
+        super().__init__(node, mem_info)
--- a/aidge_export_cpp/templates/configuration/convolution_config.jinja
+++ b/aidge_export_cpp/templates/configuration/convolution_config.jinja
@@ -17,7 +17,8 @@
 {% include "./_rescaling.jinja" %}

 {#- Calculate sizes #}
-{%- set weights_size = out_chan[0] * in_chan[0] * kernel_dims[1] * kernel_dims[0] %}
+{%- set weights_size = out_chan[0] * kernel_dims[1] * kernel_dims[0] if depthwise is defined
+    else out_chan[0] * in_chan[0] * kernel_dims[1] * kernel_dims[0] %}
 #define {{ name|upper }}_WEIGHTS_SIZE {{ weights_size }}
 #define {{ name|upper }}_BIASES_SIZE {{ out_chan[0] }}