Add new parameter groups for convolution

2a06f059 · Matthew Newson · 81d728ac · 2a06f059 · 2a06f059 · 2a06f059
Commit 2a06f059 authored 4 months ago by Matthew Newson
--- a/aidge_export_cpp/kernels/convolution_groups.hpp
+++ b/aidge_export_cpp/kernels/convolution_groups.hpp
+#ifndef __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__
+#define __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__
+
+#include "network/typedefs.hpp"
+#include "kernels/rescaling.hpp"
+#include "network/utils.hpp"
+#include "kernels/macs.hpp"
+#include "kernels/activation.hpp"
+#include <omp.h>
+#include <iostream>
+
+// Weights index en NHWC
+constexpr int inds_pos(int n, int c, int h, int w, int N, int C, int H, int W) {
+    return n * (H * W * C) +
+           h * (W * C) + 
+           w * C +
+           c;
+}
+
+// Image index in CHW
+constexpr int inds_pos(int c, int h, int w, int C, int H, int W) {
+    return c * (H * W) + 
+           h * W +
+           w;
+}
+
+
+
+template<int NB_CHANNELS, 
+         int IN_HEIGHT, int IN_WIDTH,
+         int NB_OUTPUTS, int GROUPS,
+         int OUT_HEIGHT, int OUT_WIDTH,
+         int PADDING_Y, int PADDING_X,
+         int STRIDE_Y, int STRIDE_X,
+         int DILATION_Y, int DILATION_X,
+         int KERNEL_HEIGHT, int KERNEL_WIDTH,
+         ActivationFunction_T ACTIVATION,
+         typename Input_T, typename Output_T, 
+         typename Weight_T, typename Bias_T,
+         typename Rescaling_T>
+__attribute__((always_inline)) inline 
+void convolution_forward(
+    const Input_T* __restrict inputs,  
+    Output_T* __restrict outputs,
+    const Weight_T* __restrict weights,
+    const Bias_T* __restrict biases,
+    const Rescaling_T& __restrict rescaling)
+{
+
+    if (NB_CHANNELS % GROUPS != 0 || NB_OUTPUTS % GROUPS != 0) {
+        throw std::invalid_argument("Groups must be a divisor of both NB_CHANNELS and NB_OUTPUTS!");
+    }
+    
+    int c_in_g = NB_CHANNELS / GROUPS;
+    int c_out_g = NB_OUTPUTS / GROUPS;
+    #pragma omp parallel for
+    for (int oc = 0; oc < NB_OUTPUTS; oc++) {
+    	int g_oc = oc / c_out_g;
+        #pragma omp parallel for
+    	for (int i = 0; i < OUT_HEIGHT; ++i) {
+            #pragma omp parallel for
+    	    for (int j = 0; j < OUT_WIDTH; ++j) {
+    	        Output_T value = biases[oc];
+                #pragma omp parallel for
+    	        for (int ic = g_oc * c_in_g; ic < (g_oc + 1) * c_in_g; ++ic) {
+                    #pragma omp parallel for
+    	            for (int m = 0; m < KERNEL_HEIGHT; ++m) {
+                        #pragma omp parallel for
+    	                for (int n = 0; n < KERNEL_WIDTH; ++n) {
+    	                    int i_p = i * STRIDE_X - PADDING_X + m * DILATION_X;
+                            int j_p = j * STRIDE_Y - PADDING_Y + n * DILATION_Y;
+                            if (i_p >= 0 && i_p < IN_HEIGHT && j_p >= 0 && j_p < IN_WIDTH) {
+                                value += weights[inds_pos(oc, ic % c_in_g, m, n, NB_OUTPUTS, c_in_g, KERNEL_HEIGHT, KERNEL_WIDTH)] *
+                                         inputs[inds_pos(ic, i_p, j_p, NB_CHANNELS, IN_HEIGHT, IN_WIDTH)];
+    	                    }
+    	                }
+    	            }
+    	        }
+    	        outputs[inds_pos(oc, i, j, NB_OUTPUTS, OUT_HEIGHT, OUT_WIDTH)] = activation_forward_value<Output_T>(value, oc, ACTIVATION, rescaling);
+    	    }
+    	} 
+    }
+}
+#endif  // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__
\ No newline at end of file
--- a/aidge_export_cpp/operators.py
+++ b/aidge_export_cpp/operators.py
@@ -4,14 +4,27 @@ from pathlib import Path
 import aidge_core
 from aidge_core.export_utils import ExportNode, ExportNodeCpp, generate_file
 from aidge_export_cpp.utils import ROOT
-from aidge_export_cpp.utils.converter import numpy_dtype2ctype
 from aidge_export_cpp import ExportLibCpp

 ##############################################
 ############## Export functions ##############
 ##############################################
-
-
+def numpy_dtype2ctype(dtype):
+    if dtype == np.int8:
+        return "int8_t"
+    elif dtype == np.int16:
+        return "int16_t"
+    elif dtype == np.int32:
+        return "int32_t"
+    elif dtype == np.int64:
+        return "int64_t"
+    elif dtype == np.float32:
+        return "float"
+    elif dtype == np.float64:
+        return "double"
+    # Add more dtype mappings as needed
+    else:
+        raise ValueError(f"Unsupported {dtype} dtype")

 def export_params(name: str,
                  array: np.ndarray,
@@ -43,7 +56,7 @@ class ProducerCPP(ExportNode):
        super().__init__(node, mem_info)
        self.values = np.array(self.operator.get_output(0))

-        if len(self.values.shape) == 4:  # Note: export in HWC
+        if len(self.values.shape) == 4:  # Note: export in HWC   
            self.values =  np.transpose(self.values, (0, 2, 3, 1))

    def export(self, export_folder: Path):
@@ -130,6 +143,24 @@ def _setup_conv2D(conv):
        str(ROOT / "kernels" / "rescaling.hpp")
    ]

+
+def _setup_elemwise_op(elemwise, op):
+    """Common code (template and kernel setup) shared across all the different elementWise operator (Add, Sub,...)."""
+
+    elemwise.attributes["elemwise_op"] = op
+    elemwise.attributes["activation"] = "Linear"
+    elemwise.attributes["rescaling"] = "NoScaling"
+    elemwise.config_template = str(
+        ROOT / "templates" / "configuration" / "elemwise_config.jinja")
+    elemwise.forward_template = str(
+        ROOT / "templates" / "kernel_forward" / "elemwise_forward.jinja")
+    elemwise.include_list = []
+    elemwise.kernels_to_copy = [
+        str(ROOT / "kernels" / "elemwise.hpp"),
+        str(ROOT / "kernels" / "activation.hpp"),
+        str(ROOT / "kernels" / "rescaling.hpp")
+    ]
+
 @ExportLibCpp.register("Conv2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
 class ConvCPP(ExportNodeCpp):
    def __init__(self, node, mem_info):
@@ -137,7 +168,7 @@ class ConvCPP(ExportNodeCpp):
        # No padding with Conv
        # Use PaddedConv to add padding attribute
        self.attributes["padding"] = [0, 0]
-
+        self.attributes["groups"] = 1
        _setup_conv2D(self)

 @ExportLibCpp.register_metaop("PaddedConv2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
@@ -156,25 +187,28 @@ class PaddedConvCPP(ExportNodeCpp):
                ).attr.stride_dims
                self.attributes["dilation_dims"] = n.get_operator(
                ).attr.dilation_dims
-
+        self.attributes["groups"] = 1
        _setup_conv2D(self)

-def _setup_elemwise_op(elemwise, op):
-    """Common code (template and kernel setup) shared across all the different elementWise operator (Add, Sub,...)."""
+@ExportLibCpp.register_metaop("PaddedConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
+class PaddedConvDepthWiseCPP(ExportNodeCpp):
+    def __init__(self, node, mem_info):
+        super().__init__(node, mem_info)
+        # TODO find a way to retrive attr for meta op
+        for n in self.operator.get_micro_graph().get_nodes():
+            if n.type() == "Pad2D":
+                self.attributes["padding"] = n.get_operator(
+                ).attr.begin_end_borders
+            if n.type() == "ConvDepthWise2D":
+                self.attributes["kernel_dims"] = n.get_operator(
+                ).attr.kernel_dims
+                self.attributes["stride_dims"] = n.get_operator(
+                ).attr.stride_dims
+                self.attributes["dilation_dims"] = n.get_operator(
+                ).attr.dilation_dims

-    elemwise.attributes["elemwise_op"] = op
-    elemwise.attributes["activation"] = "Linear"
-    elemwise.attributes["rescaling"] = "NoScaling"
-    elemwise.config_template = str(
-        ROOT / "templates" / "configuration" / "elemwise_config.jinja")
-    elemwise.forward_template = str(
-        ROOT / "templates" / "kernel_forward" / "elemwise_forward.jinja")
-    elemwise.include_list = []
-    elemwise.kernels_to_copy = [
-        str(ROOT / "kernels" / "elemwise.hpp"),
-        str(ROOT / "kernels" / "activation.hpp"),
-        str(ROOT / "kernels" / "rescaling.hpp")
-    ]
+        self.attributes["groups"] = self.attributes["out_chan"][0]
+        _setup_conv2D(self)

 @ExportLibCpp.register("Add", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
 class AddCPP(ExportNodeCpp):
@@ -197,6 +231,14 @@ class MulCPP(ExportNodeCpp):

        _setup_elemwise_op(self, "Mul")

+@ExportLibCpp.register("Div", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
+class MulCPP(ExportNodeCpp):
+    def __init__(self, node, mem_info):
+        super().__init__(node, mem_info)
+
+        _setup_elemwise_op(self, "Div")
+
+
 def _setup_pooling(pooling):
    """Common code (template and kernel setup) shared across all the different pooling operator."""

@@ -211,25 +253,6 @@ def _setup_pooling(pooling):
        str(ROOT / "kernels" / "rescaling.hpp")
    ]

-@ExportLibCpp.register("Div", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
-class DivCPP(ExportNodeCpp):
-    def __init__(self, node, mem_info):
-        super().__init__(node, mem_info)
-        self.attributes["div_op"] = "Div"
-        self.attributes["activation"] = "Linear"
-        self.attributes["rescaling"] = "NoScaling"
-        self.config_template = str(
-            ROOT / "templates" / "configuration" / "div_config.jinja")
-        self.forward_template = str(
-            ROOT / "templates" / "kernel_forward" / "div_forward.jinja")
-        self.include_list = []
-        self.kernels_to_copy = [
-            str(ROOT / "kernels" / "div.hpp"),
-            str(ROOT / "kernels" / "activation.hpp"),
-            str(ROOT / "kernels" / "rescaling.hpp")
-        ]
-
-
 @ExportLibCpp.register("MaxPooling2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
 class MaxPoolCPP(ExportNodeCpp):
    def __init__(self, node, mem_info):
@@ -297,23 +320,20 @@ class FcCPP(ExportNodeCpp):
            str(ROOT / "kernels" / "rescaling.hpp")
        ]

-@ExportLibCpp.register("MatMul", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
-class MatMulCPP(ExportNodeCpp):
+@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
+class TransposeCPP(ExportNodeCpp):
    def __init__(self, node, mem_info):
        super().__init__(node, mem_info)
-        self.attributes["activation"] = "Linear"
-        self.attributes["rescaling"] = "NoScaling"
        self.config_template = str(
-            ROOT / "templates" / "configuration" / "matmul_config.jinja")
+            ROOT / "templates" / "configuration" / "transpose_ND_config.jinja")
        self.forward_template = str(
-            ROOT / "templates" / "kernel_forward" / "matmul_forward.jinja")
+            ROOT / "templates" / "kernel_forward" / "transpose_ND_forward.jinja")
        self.include_list = []
        self.kernels_to_copy = [
-            str(ROOT / "kernels" / "matmul.hpp"),
-            str(ROOT / "kernels" / "activation.hpp"),
-            str(ROOT / "kernels" / "rescaling.hpp")
+            str(ROOT / "kernels" / "transpose.hpp")
        ]

+
 @ExportLibCpp.register("Erf", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
 class ErfCPP(ExportNodeCpp):
    def __init__(self, node, mem_info):
@@ -331,26 +351,6 @@ class ErfCPP(ExportNodeCpp):
            str(ROOT / "kernels" / "rescaling.hpp")
        ]

-@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
-class TransposeCPP(ExportNodeCpp):
-    def __init__(self, node, mem_info):
-        super().__init__(node, mem_info)
-        # Get parameter permutation from transpose
-        self.attributes["output_dims_order"] = self.operator.attr.get_attr("output_dims_order")
-
-        self.attributes["activation"] = "Linear"
-        self.attributes["rescaling"] = "NoScaling"
-        self.config_template = str(
-            ROOT / "templates" / "configuration" / "transpose_config.jinja")
-        self.forward_template = str(
-            ROOT / "templates" / "kernel_forward" / "transpose_forward.jinja")
-        self.include_list = []
-        self.kernels_to_copy = [
-            str(ROOT / "kernels" / "transpose.hpp"),
-            str(ROOT / "kernels" / "activation.hpp"),
-            str(ROOT / "kernels" / "rescaling.hpp")
-        ]
-
 @ExportLibCpp.register("BatchNorm2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
 class BatchNorm2DCPP(ExportNodeCpp):
    def __init__(self, node, mem_info):
@@ -366,16 +366,4 @@ class BatchNorm2DCPP(ExportNodeCpp):
            str(ROOT / "kernels" / "batchnorm.hpp"),
            str(ROOT / "kernels" / "activation.hpp"),
            str(ROOT / "kernels" / "rescaling.hpp")
-        ]
-@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
-class TransposeCPP(ExportNodeCpp):
-    def __init__(self, node, mem_info):
-        super().__init__(node, mem_info)
-        self.config_template = str(
-            ROOT / "templates" / "configuration" / "transpose_ND_config.jinja")
-        self.forward_template = str(
-            ROOT / "templates" / "kernel_forward" / "transpose_ND_forward.jinja")
-        self.include_list = []
-        self.kernels_to_copy = [
-            str(ROOT / "kernels" / "transpose.hpp")
        ]
\ No newline at end of file
--- a/aidge_export_cpp/templates/configuration/convolution_config.jinja
+++ b/aidge_export_cpp/templates/configuration/convolution_config.jinja
@@ -23,5 +23,4 @@ static const {{ rescaling }} {{ name|upper }}_RESCALING = {};
 #define {{ name|upper }}_WEIGHTS_SIZE {{ weights_size }}
 #define {{ name|upper }}_BIASES_SIZE {{ out_chan[0] }}

-
 #endif /* {{ name|upper }}_LAYER_H */
--- a/aidge_export_cpp/templates/kernel_forward/convolution_forward.jinja
+++ b/aidge_export_cpp/templates/kernel_forward/convolution_forward.jinja
@@ -19,4 +19,4 @@ convolution_forward<{{ in_name[0]|upper }}_NB_CHANNELS,
                    ({{in_name[0]}}, {{out_name[0]}}, {{in_name[1]}}, {{in_name[2]}}, {{name|upper}}_RESCALING);

 {% include "./_save_outputs.jinja" %}
-{% endfilter %}
+{% endfilter %}
\ No newline at end of file