Add ConvDW and Softmax to export.

10868393 · Cyril Moineau · 2ae66838 · 10868393 · 10868393 · 10868393
Commit 10868393 authored 5 months ago by Cyril Moineau
--- a/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Convolution/ConvDW.hpp
+++ b/aidge_export_arm_cortexm/_Aidge_Arm/kernels/Convolution/ConvDW.hpp
+/*
+    (C) Copyright 2017 CEA LIST. All Rights Reserved.
+    Contributor(s): N2D2 Team
+    This software is governed by the CeCILL-C license under French law and
+    abiding by the rules of distribution of free software.  You can  use,
+    modify and/ or redistribute the software under the terms of the CeCILL-C
+    license as circulated by CEA, CNRS and INRIA at the following URL
+    "http://www.cecill.info".
+    As a counterpart to the access to the source code and  rights to copy,
+    modify and redistribute granted by the license, users are provided only
+    with a limited warranty  and the software's author,  the holder of the
+    economic rights,  and the successive licensors  have only  limited
+    liability.
+    The fact that you are presently reading this means that you have had
+    knowledge of the CeCILL-C license and that you accept its terms.
+*/
+#ifndef __N2D2_EXPORT_CPP_CONV_DW_HPP__
+#define __N2D2_EXPORT_CPP_CONV_DW_HPP__
+#include "typedefs.h"
+#include "assert.h"
+#include "utils.hpp"
+#include "kernels/Macs.hpp"
+namespace N2D2_Export {
+template<int NB_CHANNELS,
+         int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
+         int NB_OUTPUTS,
+         int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
+         int PADDING_Y, int PADDING_X,
+         int STRIDE_Y, int STRIDE_X,
+         int KERNEL_HEIGHT, int KERNEL_WIDTH,
+         ActivationFunction_T ACTIVATION,
+         // Memory mapping: inputs
+         int INPUT_MEM_CONT_OFFSET,
+         int INPUT_MEM_CONT_SIZE,
+         int INPUT_MEM_WRAP_OFFSET,
+         int INPUT_MEM_WRAP_SIZE,
+         int INPUT_MEM_STRIDE,
+         // Memory mapping: outputs
+         int OUTPUT_MEM_CONT_OFFSET,
+         int OUTPUT_MEM_CONT_SIZE,
+         int OUTPUT_MEM_WRAP_OFFSET,
+         int OUTPUT_MEM_WRAP_SIZE,
+         int OUTPUT_MEM_STRIDE,
+         typename Input_T, typename Output_T,
+         typename Weight_T, typename Bias_T,
+         typename Rescaling_T>
+__attribute__((always_inline)) inline void convcellDWPropagate(
+    const Input_T* __restrict inputs,
+    Output_T* __restrict outputs,
+    const Bias_T* __restrict biasses,
+    const Weight_T* __restrict weights,
+    const Rescaling_T& __restrict rescaling)
+{
+    static_assert(NB_OUTPUTS % NB_CHANNELS == 0,
+        "NB_OUTPUTS should be a multiple of NB_CHANNELS.");
+    constexpr int OUTPUTS_HEIGHT_NOPAD
+        = (CHANNELS_HEIGHT - KERNEL_HEIGHT + STRIDE_Y) / STRIDE_Y;
+    constexpr int OUTPUTS_WIDTH_NOPAD
+        = (CHANNELS_WIDTH - KERNEL_WIDTH + STRIDE_X) / STRIDE_X;
+    for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) {
+        const int syMin = (PADDING_Y == 0) ? 0
+            : max(PADDING_Y - (oy * STRIDE_Y), 0);
+        const int syMax = (PADDING_Y == 0
+                && OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? KERNEL_HEIGHT
+            : clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y),
+                    0, KERNEL_HEIGHT);
+        const int iy = (oy * STRIDE_Y) - PADDING_Y;
+        for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) {
+            const int sxMin = (PADDING_X == 0) ? 0
+                : max(PADDING_X - (ox * STRIDE_X), 0);
+            const int sxMax = (PADDING_X == 0
+                    && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
+                        ? KERNEL_WIDTH
+                : clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X),
+                        0, KERNEL_WIDTH);
+            const int ix = (ox * STRIDE_X) - PADDING_X;
+            const int oPos = (ox + OUTPUTS_WIDTH * oy);
+            int oOffset = OUTPUT_MEM_STRIDE * oPos;
+            if (OUTPUT_MEM_WRAP_SIZE > 0 && oOffset >= OUTPUT_MEM_CONT_SIZE) {
+                oOffset += OUTPUT_MEM_WRAP_OFFSET - OUTPUT_MEM_CONT_OFFSET
+                            - OUTPUT_MEM_CONT_SIZE;
+            }
+            for (int output = 0; output < NB_OUTPUTS; ++output) {
+                const int channel = (output * NB_CHANNELS) / NB_OUTPUTS;
+                SUM_T weightedSum = biasses[output];
+                for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) {
+                    if ((PADDING_Y != 0
+                            || OUTPUTS_HEIGHT != OUTPUTS_HEIGHT_NOPAD)
+                        && sy >= syMax - syMin)
+                    {
+                        break;
+                    }
+                    const int iPos = ((sxMin + ix)
+                                        + CHANNELS_WIDTH * (iy + syMin + sy));
+                    int iOffset = INPUT_MEM_STRIDE * iPos;
+                    // Wrapping cannot occur in the middle of a line, except if
+                    // there is only one line (1D)!
+                    bool wrapInRange = false;
+                    if (INPUT_MEM_WRAP_SIZE > 0
+                        && iOffset >= INPUT_MEM_CONT_SIZE)
+                    {
+                        iOffset += INPUT_MEM_WRAP_OFFSET - INPUT_MEM_CONT_OFFSET
+                                    - INPUT_MEM_CONT_SIZE;
+                    }
+                    else if (INPUT_MEM_WRAP_SIZE > 0 && KERNEL_WIDTH > 1
+                        && CHANNELS_HEIGHT == 1 // single line (1D)!
+                        && iOffset + KERNEL_WIDTH * INPUT_MEM_STRIDE
+                            > INPUT_MEM_CONT_SIZE)
+                    {
+                        wrapInRange = true;
+                    }
+                    const int wOffset = (sxMin
+                        + KERNEL_WIDTH * (syMin + sy + KERNEL_HEIGHT * output));
+                    if (!wrapInRange && ((PADDING_X == 0
+                            && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
+                        || sxMax - sxMin == KERNEL_WIDTH))
+                    {
+                        macsOnRange<KERNEL_WIDTH, INPUT_MEM_STRIDE>(
+                            inputs + iOffset + channel,
+                            weights + wOffset,
+                            weightedSum);
+                    }
+                    else {
+                        for (int sx = 0; sx < KERNEL_WIDTH; ++sx) {
+                            if ((PADDING_X != 0
+                                    || OUTPUTS_WIDTH != OUTPUTS_WIDTH_NOPAD)
+                                && sx >= sxMax - sxMin)
+                            {
+                                break;
+                            }
+                            int iOffsetInRange = iOffset
+                                + sx * INPUT_MEM_STRIDE;
+                            if (wrapInRange &&
+                                iOffsetInRange >= INPUT_MEM_CONT_SIZE)
+                            {
+                                iOffsetInRange += INPUT_MEM_WRAP_OFFSET
+                                            - INPUT_MEM_CONT_OFFSET
+                                            - INPUT_MEM_CONT_SIZE;
+                            }
+                            weightedSum += inputs[channel + iOffsetInRange]
+                                * weights[wOffset + sx];
+                        }
+                    }
+                }
+                outputs[output + oOffset]
+                    = sat<Output_T>(weightedSum, output, ACTIVATION, rescaling);
+            }
+        }
+    }
+}
+}   // N2D2_Export
+#endif  // __N2D2_EXPORT_CPP_CONV_HPP__
--- a/aidge_export_arm_cortexm/_Aidge_Arm/templates/configuration/softmax.jinja
+++ b/aidge_export_arm_cortexm/_Aidge_Arm/templates/configuration/softmax.jinja
+{#- For name header -#}
+#ifndef {{ name|upper }}_LAYER_H
+#define {{ name|upper }}_LAYER_H
+{# For layer configuration -#}
+#define {{ name|upper }}_INPUTS_SIZE {{ in_size[0] }}
+#define {{ name|upper }}_OUTPUTS_SIZE {{ out_size[0] }}
+#define {{ name|upper }}_DIMS {{ in_dims[0] }}
+#define {{ name|upper }}_AXIS {{ axis }}
+#define {{ name|upper }}_INPUT_DIMS_SIZE {{ in_dims[0]|length}}
+#endif /* {{ name|upper }}_LAYER_H */
--- a/aidge_export_arm_cortexm/_Aidge_Arm/templates/forward_call/softmax.jinja
+++ b/aidge_export_arm_cortexm/_Aidge_Arm/templates/forward_call/softmax.jinja
+{% filter indent(width=4, first=False) %}
+{% include "./_mem_offset.jinja" %}
+aidge_softmax_chw_float32({{in_name[0]}}, {{out_name[0]}}, {{name|upper}}_DIMS, {{name|upper}}_AXIS, {{name|upper}}_INPUT_DIMS_SIZE, {{name|upper}}_OUTPUTS_SIZE);
+{% endfilter %}
--- a/aidge_export_arm_cortexm/operators.py
+++ b/aidge_export_arm_cortexm/operators.py
@@ -224,6 +224,49 @@ class Conv_ARMCortexM(ExportNodeCpp):
            str(ROOT / "_Aidge_Arm" / "kernels" / "Convolution" / "Conv.hpp")
        ]
+@ExportLibAidgeARM.register("ConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
+class ConvDW_ARMCortexM(ExportNodeCpp):
+    def __init__(self, node, mem_info):
+        super().__init__(node, mem_info)
+        self.attributes["activation"] = "Linear"
+        self.attributes.update(Scaling()("no_scaling"))
+        # No padding with Conv
+        # Use PaddedConv to add padding attribute
+        self.attributes["padding"] = [0, 0]
+        self.config_template = str(ROOT / "_Aidge_Arm" / "templates" / "configuration" / "conv_config.jinja")
+        self.forward_template = str(ROOT / "_Aidge_Arm" / "templates" / "forward_call" / "conv_kernel.jinja")
+        self.include_list = []
+        self.kernels_to_copy = [
+            str(ROOT / "_Aidge_Arm" / "kernels" / "Convolution" / "ConvDW.hpp")
+        ]
+@ExportLibAidgeARM.register_metaop("PaddedConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
+class PaddedConvDW_ARMCortexM(ExportNodeCpp):
+    def __init__(self, node, mem_info):
+        super().__init__(node, mem_info)
+        self.attributes["activation"] = "Linear"
+        self.attributes.update(Scaling()("no_scaling"))
+        for n in self.operator.get_micro_graph().get_nodes():
+            if n.type() == "Pad2D":
+                self.attributes["padding"] = n.get_operator(
+                ).attr.begin_end_borders
+            if n.type() == "ConvDepthWise2D":
+                self.attributes["kernel_dims"] = n.get_operator(
+                ).attr.kernel_dims
+                self.attributes["stride_dims"] = n.get_operator(
+                ).attr.stride_dims
+                self.attributes["dilation_dims"] = n.get_operator(
+                ).attr.dilation_dims
+        self.config_template = str(ROOT / "_Aidge_Arm" / "templates" / "configuration" / "conv_config.jinja")
+        self.forward_template = str(ROOT / "_Aidge_Arm" / "templates" / "forward_call" / "conv_kernel.jinja")
+        self.include_list = []
+        self.kernels_to_copy = [
+            str(ROOT / "_Aidge_Arm" / "kernels" / "Convolution" / "ConvDW.hpp")
+        ]
 @ExportLibAidgeARM.register_metaop("PaddedConv2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
 class PaddedConv_ARMCortexM(ExportNodeCpp):
@@ -231,7 +274,6 @@ class PaddedConv_ARMCortexM(ExportNodeCpp):
        super().__init__(node, mem_info)
        self.attributes["activation"] = "Linear"
        self.attributes.update(Scaling()("no_scaling"))
-        print(self.attributes)
        for n in self.operator.get_micro_graph().get_nodes():
            if n.type() == "Pad2D":
                self.attributes["padding"] = n.get_operator(
@@ -361,10 +403,8 @@ class Mul_ARMCortexM(ExportNodeCpp):
 class Softmax_ARMCortexM(ExportNodeCpp):
    def __init__(self, node, mem_info):
        super().__init__(node, mem_info)
-        self.attributes["activation_type"] = "\"SOFTMAX\""
+        self.config_template = str(ROOT / "_Aidge_Arm" / "templates" / "configuration" / "softmax.jinja")
+        self.forward_template = str(ROOT / "_Aidge_Arm" / "templates" / "forward_call" / "softmax.jinja")
-        self.config_template = str(ROOT / "_Aidge_Arm" / "templates" / "configuration" / "activation.jinja")
-        self.forward_template = str(ROOT / "_Aidge_Arm" / "templates" / "forward_call" / "activation_chw.jinja")
        self.include_list = []
        self.kernels_to_copy = [
            str(ROOT / "_Aidge_Arm" / "kernels" / "Softmax" / "aidge_softmax_chw_float32.h"),