Merge branch 'dev' into 'main'

0.1.1 See merge request !9

Merge branch 'dev' into 'main'
0.1.1 See merge request !9
c9e1336b · Cyril Moineau · ed0ceba3 · ab2a3549 · c9e1336b · c9e1336b
Commit c9e1336b authored 1 year ago by Cyril Moineau
--- a/aidge_export_cpp/__init__.py
+++ b/aidge_export_cpp/__init__.py
-import re
-from . import register
-from . import operators
+r"""
+Aidge Export for CPP standalone projects

+"""
 from .register import *
 from .operators import *
+from collections import defaultdict
+import aidge_core

+from aidge_export_cpp.utils import ROOT

-def get_functions_from_c_file(file_path):
-    functions = []
-    pattern = r'\w+\s+(\w+)\s*\(([^)]*)\)\s*{'
+__version__ = open(ROOT / "version.txt", "r").read().strip()

-    with open(file_path, 'r') as file:
-        file_content = file.read()
-
-    matches = re.findall(pattern, file_content)
-    for match in matches:
-        function_name = match[0]
-        arguments = match[1].split(',')
-        arguments = [arg.strip() for arg in arguments]
-
-        return_type = get_return_type(file_content, function_name)
-
-        function_string = f"{return_type} {function_name}({', '.join(arguments)});"
-        functions.append(function_string)
-
-    return functions
-
-def get_return_type(file_content, function_name):
-    pattern = rf'\w+\s+{function_name}\s*\([^)]*\)\s*{{'
-    return_type = re.search(pattern, file_content).group()
-    return_type = return_type.split()[0].strip()
-    return return_type
-
-
-
-def export(export_folder, graphview, scheduler):
-
-    os.makedirs(export_folder, exist_ok=True)
-
-    dnn_folder = export_folder + "/dnn"
-    os.makedirs(dnn_folder, exist_ok=True)
-
-    list_actions = []
-    list_configs = []
-
-    list_forward_nodes = [i for i in scheduler.get_static_scheduling() if i.type() != "Producer"]
-    list_op = {}
-
-    for node in graphview.get_nodes():
-        if node.type() in supported_operators():
-            op = EXPORT_CPP_REGISTRY[node.type()](node)
-            list_op[node.name()] = op
-        else:
-            continue
-
-        list_configs = op.export(dnn_folder, list_configs)
-
-    for node in list_forward_nodes:
-        list_actions = list_op[node.name()].forward(list_actions)
-
-
-    # Memory management
-    mem_offsets = []
-    mem_size = 0
-    for i, node in enumerate(list_forward_nodes):
-        if i != len(list_forward_nodes) - 1:
-            mem_offsets.append(f"{node.name().upper()}_OFFSET {mem_size}")
-            dims = node.get_operator().get_output(0).dims()
-            mem = 1
-            for dim in dims:
-                mem *= dim
-            mem_size += mem
-
-    # Generate the memory file
-    generate_file(
-        f"{dnn_folder}/memory/mem_info.h",
-        dirpath + "/templates/memory/mem_info.jinja",
-        mem_size=mem_size,
-        offsets=mem_offsets
-    )
-    list_configs.append("memory/mem_info.h")
-
-    generate_file(
-        f"{dnn_folder}/src/forward.cpp",
-        dirpath + "/templates/network/network_forward.jinja",
-        headers=list_configs,
-        actions=list_actions,
-        input_t="float",
-        inputs= list_forward_nodes[0].name()+"_input" if list_forward_nodes[0].get_parents()[0] is None else list_forward_nodes[0].get_parents()[0].name(),
-        output_t="float",
-        outputs=list_forward_nodes[-1].name()
-    )
-
-    generate_file(
-        f"{dnn_folder}/include/dnn.hpp",
-        dirpath + "/templates/network/dnn_header.jinja",
-        libraries=[],
-        functions=get_functions_from_c_file(f"{dnn_folder}/src/forward.cpp"),
-    )
-
-
-    # Copy all static files in the export
-    shutil.copy(dirpath + "/static/main.cpp", export_folder)
-    shutil.copy(dirpath + "/static/Makefile", export_folder)
-    shutil.copytree(dirpath + "/static/include", dnn_folder + "/include/", dirs_exist_ok=True)
+from .export import *

--- a/aidge_export_cpp/export.py
+++ b/aidge_export_cpp/export.py
+import re
+import os
+from pathlib import Path
+import shutil
+import numpy as np
+from typing import List, Union
+from jinja2 import Environment, FileSystemLoader
+
+import aidge_core
+from aidge_core.export.code_generation import *
+from aidge_export_cpp.utils import (ROOT, OPERATORS_REGISTRY, supported_operators)
+from aidge_export_cpp.utils.converter import aidge_datatype2ctype, numpy_dtype2ctype
+import aidge_export_cpp.operators
+from aidge_export_cpp.utils.generation import *
+from aidge_export_cpp.memory import *
+
+
+def generate_input_file(export_folder:str,
+                        array_name:str,
+                        array: np.ndarray):
+    
+    # If directory doesn't exist, create it
+    if not os.path.exists(export_folder):
+        os.makedirs(export_folder)
+
+    generate_file(
+        file_path=f"{export_folder}/{array_name}.h",
+        template_path=str(ROOT / "templates" / "data" / "inputs.jinja"),
+        dims = array.shape,
+        data_t = numpy_dtype2ctype(array.dtype),
+        name = array_name,
+        values = array.tolist()
+    )
+
+
+def export(export_folder_name, graphview, scheduler):
+
+    export_folder = Path().absolute() / export_folder_name
+
+    os.makedirs(str(export_folder), exist_ok=True)
+
+    dnn_folder = export_folder / "dnn"
+    os.makedirs(str(dnn_folder), exist_ok=True)
+
+    list_actions = []
+    list_configs = []
+
+    list_forward_nodes = scheduler.get_static_scheduling()
+
+    for node in list_forward_nodes:
+        if node.type() in supported_operators():
+            op = OPERATORS_REGISTRY[node.type()](node)
+            
+            # For configuration files
+            list_configs = op.export(dnn_folder, list_configs)
+
+            # For forward file
+            list_actions = op.forward(list_actions)
+
+
+    # Memory management
+    mem_size, mem_info = compute_default_mem_info(scheduler)
+
+    # Generate the memory file
+    generate_file(
+        str(dnn_folder / "memory" / "mem_info.h"),
+        str(ROOT / "templates" / "memory" / "mem_info.jinja"),
+        mem_size = mem_size,
+        mem_info_legends = MEMORY_INFO_TEMPLATE,
+        mem_info = mem_info
+    )
+    list_configs.append("memory/mem_info.h")
+
+    # Get entry nodes
+    # It supposes the entry nodes are producers with constant=false
+    # Store the datatype & name
+    list_inputs_name = []
+    for node in graphview.get_nodes():
+        if node.type() == "Producer":
+            if not node.get_operator().get_attr("Constant"):
+                export_type = aidge_datatype2ctype(node.get_operator().get_output(0).dtype())
+                list_inputs_name.append((export_type, node.name()))
+
+    # Get output nodes
+    # Store the datatype & name, like entry nodes
+    list_outputs_name = []
+    for node in graphview.get_nodes():
+        if len(node.get_children()) == 0:
+            export_type = aidge_datatype2ctype(node.get_operator().get_output(0).dtype())
+            list_outputs_name.append((export_type, node.name()))
+
+    # Generate forward file
+    generate_file(
+        str(dnn_folder / "src" / "forward.cpp"),
+        str(ROOT / "templates" / "network" / "network_forward.jinja"),
+        headers=list_configs,
+        actions=list_actions,
+        inputs= list_inputs_name,
+        outputs=list_outputs_name
+    )
+
+    # Generate dnn API
+    generate_file(
+        str(dnn_folder / "include" / "dnn.hpp"),
+        str(ROOT / "templates" / "network" / "dnn_header.jinja"),
+        libraries=[],
+        functions=get_functions_from_c_file(str(dnn_folder / "src" / "forward.cpp")),
+    )
+
+    # Copy all static files in the export
+    shutil.copy(str(ROOT / "static" / "main.cpp"), str(export_folder))
+    shutil.copy(str(ROOT / "static" / "Makefile"), str(export_folder))
+    shutil.copytree(str(ROOT / "static" / "include"), str(dnn_folder / "include"), dirs_exist_ok=True)
\ No newline at end of file
--- a/aidge_export_cpp/kernels/activation.hpp
+++ b/aidge_export_cpp/kernels/activation.hpp
@@ -9,7 +9,7 @@
 template<typename Output_T, typename T,  
         typename std::enable_if<std::is_floating_point<T>::value>::type* = nullptr>
 __attribute__((always_inline)) inline
-Output_T saturate (T value, int32_t sat) 
+Output_T saturate (T value, int32_t /*sat*/) 
 {
    return value;
 }
@@ -49,6 +49,10 @@ Output_T activation_forward_value (Sum_T weightedSum,
            break;
    }

+    // Value fixed here for now but it should be generated by 
+    // the export module or determined by the type of Output_T
+    // For now only works for int8_t and uint8_t
+    const uint32_t NB_BITS = 8;
    return saturate<Output_T>(rescaling(weightedSum, output), NB_BITS);
 }


--- a/aidge_export_cpp/kernels/elemwise.hpp
+++ b/aidge_export_cpp/kernels/elemwise.hpp
@@ -6,8 +6,7 @@

 // Generic function for two inputs

-template<int NB_OUTPUTS,
-         int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
+template<int NB_ELTS,
         ElemWise_T ELEM_OP,
         ActivationFunction_T ACTIVATION,
         typename Input_T, typename Output_T,
@@ -19,23 +18,28 @@ void elemwise_forward (
    const Input_T* __restrict inputs1,
    const Input_T* __restrict inputs2)
 {
-    const int size = NB_OUTPUTS*OUTPUTS_WIDTH*OUTPUTS_HEIGHT;
-
    if (std::is_floating_point<Input_T>::value)
    {
        Input_T val = 0;

        switch (ELEM_OP) {
-            case Sum: {
-                for (int i = 0; i < size; ++i) {
+            case Add: {
+                for (int i = 0; i < NB_ELTS; ++i) {
                    val = inputs1[i] + inputs2[i];
                    outputs[i] = activation_forward_value<Output_T>(val, i, ACTIVATION, rescaling);
-                    
+                }
+                break;
+            }
+            case Sub: {
+                for (int i = 0; i < NB_ELTS; ++i) {
+                    val = inputs1[i] - inputs2[i];
+                    outputs[i] = activation_forward_value<Output_T>(val, i, ACTIVATION, rescaling);
+
                }
                break;
            }
            case Mul: {
-                for (int i = 0; i < size; ++i) {
+                for (int i = 0; i < NB_ELTS; ++i) {
                    val = inputs1[i] * inputs2[i];
                    outputs[i] = activation_forward_value<Output_T>(val, i, ACTIVATION, rescaling);
                }
@@ -43,7 +47,7 @@ void elemwise_forward (
            }
            default: {
                // Copy inputs1 in outputs for default case
-                for (int i = 0; i < size; ++i) {
+                for (int i = 0; i < NB_ELTS; ++i) {
                    val = inputs1[i];
                    outputs[i] = activation_forward_value<Output_T>(val, i, ACTIVATION, rescaling);
                }
@@ -56,15 +60,22 @@ void elemwise_forward (
        int32_t val = 0;

        switch (ELEM_OP) {
-            case Sum: {
-                for (int i = 0; i < size; ++i) {
+            case Add: {
+                for (int i = 0; i < NB_ELTS; ++i) {
                    val = inputs1[i] + inputs2[i];
                    outputs[i] = activation_forward_value<Output_T>(val, i, ACTIVATION, rescaling);
                }
                break;
            }
+            case Sub: {
+                for (int i = 0; i < NB_ELTS; ++i) {
+                    val = inputs1[i] - inputs2[i];
+                    outputs[i] = activation_forward_value<Output_T>(val, i, ACTIVATION, rescaling);
+                }
+                break;
+            }
            case Mul: {
-                for (int i = 0; i < size; ++i) {
+                for (int i = 0; i < NB_ELTS; ++i) {
                    val = inputs1[i] * inputs2[i];
                    outputs[i] = activation_forward_value<Output_T>(val, i, ACTIVATION, rescaling);
                }
@@ -72,7 +83,7 @@ void elemwise_forward (
            }
            default: {
                // Copy inputs1 in outputs for default case
-                for (int i = 0; i < size; ++i) {
+                for (int i = 0; i < NB_ELTS; ++i) {
                    val = inputs1[i];
                    outputs[i] = activation_forward_value<Output_T>(val, i, ACTIVATION, rescaling);
                }
@@ -114,7 +125,7 @@ void elemwise_forward (
 // template<// For all inputs
 //          int NB_CHANNELS,
 //          int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
-//          int NB_OUTPUTS,
+//          int NB_ELTS,
 //          int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
 //          ElemWise_T ELEM_OP,
 //          ActivationFunction_T ACTIVATION,
@@ -134,15 +145,15 @@ void elemwise_forward (
 //     for (int oy = 0; oy < OUTPUTS_HEIGHT; oy++) {
 //         for (int ox = 0; ox < OUTPUTS_WIDTH; ox++) {
 //             const int pos = (ox + OUTPUTS_WIDTH * oy);
-//             int oOffset = NB_OUTPUTS * pos;
+//             int oOffset = NB_ELTS * pos;

-//             for (int ch = 0; ch < NB_OUTPUTS; ++ch) {
-//                 const SUM_T val = elemWise<ELEM_OP,
+//             for (int ch = 0; ch < NB_ELTS; ++ch) {
+//                 const Add_T val = elemWise<ELEM_OP,
 //                                         INPUT_NB_CHANNELS,
 //                                         INPUT_MEM_CONT_OFFSET,
-//                                         INPUT_MEM_CONT_SIZE,
+//                                         INPUT_MEM_CONT_NB_ELTS,
 //                                         INPUT_MEM_WRAP_OFFSET,
-//                                         INPUT_MEM_WRAP_SIZE,
+//                                         INPUT_MEM_WRAP_NB_ELTS,
 //                                         INPUT_MEM_STRIDE,
 //                                         ARGS...>(pos, ch, firstInputs, inputs...);

@@ -157,4 +168,4 @@ void elemwise_forward (



-#endif  // __AIDGE_EXPORT_CPP_KERNELS_ELEMWISE__
\ No newline at end of file
+#endif  // __AIDGE_EXPORT_CPP_KERNELS_ELEMWISE__
--- a/aidge_export_cpp/kernels/fullyconnected.hpp
+++ b/aidge_export_cpp/kernels/fullyconnected.hpp
@@ -23,7 +23,28 @@ void fullyconnected_forward (
    const Bias_T* __restrict biases,
    const Rescaling_T& __restrict rescaling)
 {
+    // Warning, there is a trick here !
+    // To use this kernel, the inputs have to be in NHWC and the weights are in NCHW
+    // It is only an issue if the FC was after a flatten layer.
+    // Otherwise it is not an issue for the other FC because CHANNELS_WIDTH = CHANNELS_HEIGHT = 1
+    // Solution: Add a system to check dataformat
+    for (int och = 0; och < NB_OUTPUTS; och++) {
+
+        Bias_T weightedSum = biases[och];
+
+        for (int iy = 0; iy < CHANNELS_HEIGHT; ++iy) {
+            for (int ix = 0; ix < CHANNELS_WIDTH; ++ix) {
+                for (int ch = 0; ch < NB_CHANNELS; ++ch) {
+                    weightedSum += inputs[CHANNELS_WIDTH*NB_CHANNELS*iy + NB_CHANNELS*ix + ch] 
+                                * weights[CHANNELS_HEIGHT*CHANNELS_WIDTH*NB_CHANNELS*och + CHANNELS_HEIGHT*CHANNELS_WIDTH*ch + CHANNELS_HEIGHT*iy + ix];
+                }
+            }
+        }

+        outputs[och] = activation_forward_value<Output_T>(weightedSum, och, ACTIVATION, rescaling);
+    }
+/*
+Here the kernel to use with inputs in NHWC and weights in NHWC
 #pragma omp parallel for
    for (int och = 0; och < NB_OUTPUTS; och++) {

@@ -44,6 +65,7 @@ void fullyconnected_forward (

        outputs[och] = activation_forward_value<Output_T>(weightedSum, och, ACTIVATION, rescaling);
    }
+*/
 }



--- a/aidge_export_cpp/kernels/pooling.hpp
+++ b/aidge_export_cpp/kernels/pooling.hpp
@@ -86,7 +86,7 @@ void pooling_forward(
                    outputs[oOffset + output] = maxVal;
                }
                else if (POOLING_TYPE == Average) {
-                    SUM_T sum = 0;
+                    int32_t sum = 0;

                    for (int sy = 0; sy < POOL_HEIGHT; ++sy) {
                        if ((PADDING_Y != 0

--- a/aidge_export_cpp/memory.py
+++ b/aidge_export_cpp/memory.py
+import aidge_core
+import aidge_backend_cpu
+from typing import List
+
+# for each layer, name: [size, offset start]
+# Example:
+#define ENV_MEM_SIZE 3
+#define ENV_OFFSET 0
+MEMORY_INFO_TEMPLATE = ["layer_name", "size", "offset"]
+
+
+# Default memory management, which can be used for development
+def compute_default_mem_info(scheduler: aidge_core.Scheduler):
+    
+    list_forward_nodes = scheduler.get_static_scheduling()
+    mem_info = []
+    mem_size = 0
+
+    # Exclude Producers and the last layers (because the results are stored outside the export)
+    for i, node in enumerate(list_forward_nodes):
+        if node.type() != "Producer":
+            if len(node.get_children()) != 0:
+                dims = node.get_operator().get_output(0).dims()
+                mem = 1
+                for dim in dims:
+                    mem *= dim
+
+                # Add memeory info
+                mem_info.append([node.name(), mem, mem_size])
+                
+                # Increment offset for the next layer
+                mem_size += mem
+
+    return mem_size, mem_info
+
+
+def generate_optimized_memory_info(scheduler: aidge_core.Scheduler,
+                                   wrapping:bool = False):
+    
+    # The forward dims has to done outside the function
+
+    # Generate the memory manager
+    mem_manager = scheduler.generate_memory(inc_producers=False, wrap_around_buffer=wrapping)
+
+    mem_size = 0
+    mem_info = []
+
+
+    return mem_size, mem_info
\ No newline at end of file
--- a/aidge_export_cpp/operators.py
+++ b/aidge_export_cpp/operators.py
--- a/aidge_export_cpp/operators_old.py
+++ b/aidge_export_cpp/operators_old.py
+from aidge_core import ExportNode
+import numpy as np
+from jinja2 import Environment, FileSystemLoader
+import os
+import shutil
+from aidge_export_cpp.register import export_cpp_register
+
+dirpath = os.path.dirname(__file__)
+
+class KERNELS:
+    ACTIVATION = dirpath + "/kernels/activation.hpp"
+    BATCHNORM = dirpath + "/kernels/batchnorm.hpp"
+    CONV = dirpath + "/kernels/convolution.hpp"
+    ADD = dirpath + "/kernels/elemwise.hpp"
+    FC = dirpath + "/kernels//fullyconnected.hpp"
+    POOLING = dirpath + "/kernels/pooling.hpp"
+    LEAKYRELU = dirpath + "/kernels/leakyrelu.hpp"
+
+class KERNELS_FORWARD:
+    ACTIVATION = dirpath + "/templates/kernel_forward/activation_forward.jinja"
+    BATCHNORM = dirpath + "/templates/kernel_forward/batchnorm_forward.jinja"
+    CONV = dirpath + "/templates/kernel_forward/convolution_forward.jinja"
+    ADD = dirpath + "/templates/kernel_forward/elemwise_forward.jinja"
+    FC = dirpath + "/templates/kernel_forward/fullyconnected_forward.jinja"
+    POOLING = dirpath + "/templates/kernel_forward/pooling_forward.jinja"
+    LEAKYRELU = dirpath + "/templates/kernel_forward/leakyrelu_forward.jinja"
+
+class CONFIGURATIONS:
+    ACTIVATION = dirpath + "/templates/configuration/activation_config.jinja"
+    BATCHNORM = dirpath + "/templates/configuration/batchnorm_config.jinja"
+    CONV = dirpath + "/templates/configuration/convolution_config.jinja"
+    ADD = dirpath + "/templates/configuration/elemwise_config.jinja"
+    FC = dirpath + "/templates/configuration//fullyconnected_config.jinja"
+    POOLING = dirpath + "/templates/configuration/pooling_config.jinja"
+    LEAKYRELU = dirpath + "/templates/configuration/leakyrelu_config.jinja"
+
+##############################################
+############## Export functions ##############
+##############################################
+
+def generate_file(filename, templatename, **kwargs):
+
+    # Get directory name of the file
+    dirname = os.path.dirname(filename)
+
+    # If directory doesn't exist, create it
+    if not os.path.exists(dirname):
+        os.makedirs(dirname)
+
+    # Get directory name and name of the template
+    template_dir = os.path.dirname(templatename)
+    template_name = os.path.basename(templatename)
+
+    # Select template
+    template = Environment(loader=FileSystemLoader(template_dir)).get_template(template_name)
+
+    # Generate file
+    content = template.render(kwargs)
+    with open(filename, mode="w", encoding="utf-8") as message:
+        message.write(content)
+
+
+def generate_action(template_path, **kwargs):
+    dirname = os.path.dirname(template_path)
+    filename = os.path.basename(template_path)
+    template = Environment(loader=FileSystemLoader(dirname)).get_template(filename)
+    return template.render(kwargs)
+
+
+def copyfile(filename, dst_folder):
+
+    # If directory doesn't exist, create it
+    if not os.path.exists(dst_folder):
+        os.makedirs(dst_folder)
+
+    shutil.copy(filename, dst_folder)
+
+
+def export_to_static(name, array, filepath):
+
+    # Get directory name of the file
+    dirname = os.path.dirname(filepath)
+
+    # If directory doesn't exist, create it
+    if not os.path.exists(dirname):
+        os.makedirs(dirname)
+
+    generate_file(
+        filepath,
+        dirpath + "/templates/data/data_static.jinja",
+        dims = array.shape,
+        data_t = "float",
+        name = name,
+        values = array.tolist()
+    )
+
+
+##############################################
+################### Utils ####################
+##############################################
+
+def get_node_parents(node):
+    parents = []
+    for parent in node.get_parents():
+        if parent.type() != "Producer":
+            parents.append(parent)
+    return parents
+
+def get_producer_parents(node):
+    parents = []
+    for parent in node.get_parents():
+        if parent.type() == "Producer":
+            parents.append(parent)
+    return parents
+
+
+##############################################
+################### Actions ##################
+##############################################
+
+def set_up_output(name, datatype):
+    return f"{datatype}* {name} = ({datatype}*) mem + {name.upper()}_OFFSET;"
+
+
+##############################################
+############## Operators helper ##############
+##############################################
+
+@export_cpp_register("Conv")
+class ConvCPP(ExportNode):
+    def __init__(self, node):
+        super().__init__(node)
+
+        self.kernel = node.get_operator().get_attr("KernelDims")
+        self.stride = node.get_operator().get_attr("StrideDims")
+
+        # Not working anymore because Padding is a standalone operator
+        # self.padding = node.get_operator().get_attr("PaddingDims")
+        self.padding = [1, 1]
+        self.dilation = node.get_operator().get_attr("DilationDims")
+        self.nb_channels = node.get_operator().get_attr("InChannels")
+        self.nb_outputs = node.get_operator().get_attr("OutChannels")
+
+    def export(self, export_folder:str, list_configs:list):
+
+        copyfile(KERNELS.CONV, f"{export_folder}/include/kernels/")
+        copyfile(dirpath + "/kernels/macs.hpp", f"{export_folder}/include/kernels/")
+        copyfile(dirpath + "/kernels/activation.hpp", f"{export_folder}/include/kernels/")
+
+        list_configs.append("kernels/convolution.hpp")
+        list_configs.append(f"layers/{self.name}.h")
+        generate_file(
+            f"{export_folder}/layers/{self.name}.h",
+            CONFIGURATIONS.CONV,
+            name=self.name,
+            input_dims=self.inputs_dims[0][1:],
+            output_dims=self.outputs_dims[0][1:],
+            kernel=self.kernel,
+            stride=self.stride,
+            padding=self.padding,
+            dilation=self.dilation,
+            activation="Linear",
+            rescaling="NoScaling")
+
+        return list_configs
+
+    def forward(self, list_actions:list):
+
+        if not self.is_last:
+            list_actions.append(set_up_output(self.name, "float"))
+
+        list_actions.append(generate_action(
+            KERNELS_FORWARD.CONV,
+            name=self.name,
+            input_name=self.inputs[0].name(),
+            output_name=self.name,
+            weights_name=self.inputs[1].name(),
+            biases_name=self.inputs[2].name()
+        ))
+        return list_actions
+
+
+class BatchNormCPP:
+    def __init__(self, node):
+        self.name = node.name()
+        self.epsilon = node.get_operator().get_attr("Epsilon")
+
+        self.producers = get_producer_parents(node)
+
+        self.scales = np.array(self.producers[0].get_operator().get_output(0)).reshape(-1).tolist()
+        self.biases = np.array(self.producers[1].get_operator().get_output(0)).reshape(-1).tolist()
+        self.means = np.array(self.producers[2].get_operator().get_output(0)).reshape(-1).tolist()
+        self.vars = np.array(self.producers[3].get_operator().get_output(0)).reshape(-1).tolist()
+
+        parents = get_node_parents(node)
+        if len(parents) == 0:
+            self.input_name = "in"
+        else :
+            self.input_name = parents[0].name()
+
+    def export(self, export_folder:str, list_configs:list):
+        list_configs.append(f"layers/{self.name}.h")
+        generate_file(
+            f"{export_folder}/layers/{self.name}.h",
+            "tensorrt/templates/configuration/batchnorm_config.jinja",
+            name=self.name,
+            input_dims=[0, 0, 0],
+            output_dims=[0, 0, 0],
+            activation="Linear",
+            epsilon=self.epsilon)
+
+        # export the batchnorm parameters
+
+        return list_configs
+
+    def forward(self, list_actions:list):
+
+        list_actions.append(set_up_output(self.name, "float"))
+        list_actions.append(generate_action(
+            "cpp/templates/kernel_forward/batchnorm_forward.jinja",
+            name=self.name,
+            input_name=self.input_name,
+            output_name=self.name,
+            biases_name=self.producers[0].name(),
+            variances_name=self.producers[1].name(),
+            means_name=self.producers[2].name(),
+            scales_name=self.producers[3].name()
+        ))
+        return list_actions
+
+@export_cpp_register("ReLU")
+class ReLUCPP(ExportNode):
+    def __init__(self, node):
+        super().__init__(node)
+
+        self.nb_data = 1
+        for i in self.inputs_dims[0]:
+            self.nb_data *= i
+
+    def export(self, export_folder:str, list_configs:list):
+
+        copyfile(KERNELS.ACTIVATION, f"{export_folder}/include/kernels/")
+
+        list_configs.append("kernels/activation.hpp")
+        list_configs.append(f"layers/{self.name}.h")
+        generate_file(
+            f"{export_folder}/layers/{self.name}.h",
+            CONFIGURATIONS.ACTIVATION,
+            name=self.name,
+            nb_data=self.nb_data,
+            activation="Rectifier",
+            rescaling="NoScaling")
+
+        return list_configs
+
+    def forward(self, list_actions:list):
+
+        if not self.is_last:
+            list_actions.append(set_up_output(self.name, "float"))
+
+        list_actions.append(generate_action(
+            KERNELS_FORWARD.ACTIVATION,
+            name=self.name,
+            input_name=self.inputs[0].name(),
+            output_name=self.name
+        ))
+        return list_actions
+
+
+@export_cpp_register("LeakyReLU")
+class LeakyReLUCPP(ExportNode):
+    def __init__(self, node):
+        super().__init__(node)
+
+        self.alpha = node.get_operator().get_attr("NegativeSlope")
+
+        self.nb_data = 1
+        for i in self.inputs_dims[0]:
+            self.nb_data *= i
+
+    def export(self, export_folder:str, list_configs:list):
+
+        copyfile(KERNELS.LEAKYRELU, f"{export_folder}/include/kernels/")
+
+        list_configs.append("kernels/activation.hpp")
+        list_configs.append(f"layers/{self.name}.h")
+        generate_file(
+            f"{export_folder}/layers/{self.name}.h",
+            CONFIGURATIONS.LEAKYRELU,
+            name=self.name,
+            nb_data=self.nb_data,
+            alpha = self.alpha)
+
+        return list_configs
+
+    def forward(self, list_actions:list):
+
+        if not self.is_last:
+            list_actions.append(set_up_output(self.name, "float"))
+
+        list_actions.append(generate_action(
+            KERNELS_FORWARD.LEAKYRELU,
+            name=self.name,
+            input_name=self.inputs[0].name(),
+            output_name=self.name
+        ))
+        return list_actions
+
+
+class AddCPP:
+    def __init__(self, node):
+        self.name = node.name()
+        self.parents = get_node_parents(node)
+
+    def export(self, export_folder:str, list_configs:list):
+        list_configs.append(f"layers/{self.name}.h")
+        generate_file(
+            f"{export_folder}/layers/{self.name}.h",
+            CONFIGURATIONS.ADD,
+            name=self.name,
+            input_dims=[0, 0, 0],
+            output_dims=[0, 0, 0],
+            activation="Linear",
+            elemwise_op="Sum")
+
+        return list_configs
+
+    def forward(self, list_actions:list):
+
+        list_actions.append(set_up_output(self.name, "float"))
+        list_actions.append(generate_action(
+            "cpp/templates/kernel_forward/elemwise_forward.jinja",
+            name=self.name,
+            input1_name=self.parents[0].name(),
+            input2_name=self.parents[1].name(),
+            output_name=self.name
+        ))
+        return list_actions
+
+
+@export_cpp_register("MaxPooling")
+class MaxPoolCPP(ExportNode):
+    def __init__(self, node):
+        super().__init__(node)
+
+        self.kernel = node.get_operator().get_attr("KernelDims")
+        self.stride = node.get_operator().get_attr("StrideDims")
+
+        # Not supported by the core...
+        # self.padding = node.get_operator().get_attr("PaddingDims")
+        self.padding = [0, 0]
+
+    def export(self, export_folder:str, list_configs:list):
+
+        copyfile(KERNELS.POOLING, f"{export_folder}/include/kernels/")
+
+        list_configs.append("kernels/pooling.hpp")
+        list_configs.append(f"layers/{self.name}.h")
+        generate_file(
+            f"{export_folder}/layers/{self.name}.h",
+            CONFIGURATIONS.POOLING,
+            name=self.name,
+            input_dims=self.inputs_dims[0],
+            output_dims=self.outputs_dims[0],
+            kernel=self.kernel,
+            stride=self.stride,
+            padding=self.padding,
+            pool_type="Max",
+            activation="Linear")
+
+        return list_configs
+
+    def forward(self, list_actions:list):
+
+        if not self.is_last:
+            list_actions.append(set_up_output(self.name, "float"))
+
+        list_actions.append(generate_action(
+            KERNELS_FORWARD.POOLING,
+            name=self.name,
+            input_name=self.inputs[0].name(),
+            output_name=self.name
+        ))
+        return list_actions
+
+
+class GlobalAvgPoolCPP:
+    def __init__(self, node):
+
+        # node.get_operator().set_compute_output_dims(lambda x: [[x[0][0], x[0][1], 1, 1]])
+        pass
+
+    def export(self, export_folder:str, list_configs:list):
+
+        return list_configs
+
+    def forward(self, list_actions:list):
+
+        list_actions.append(set_up_output(self.name, "float"))
+        list_actions.append(generate_action(
+            "cpp/templates/kernel_forward/pooling_forward.jinja",
+            name=self.name,
+            input_name=self.input_name,
+            output_name=self.name
+        ))
+        return list_actions
+
+@export_cpp_register("FC")
+class FcCPP(ExportNode):
+    def __init__(self, node):
+        super().__init__(node)
+
+        if len(self.inputs_dims[0]) == 2:
+            self.inputs_dims[0] = [self.inputs_dims[0][1], 1, 1]
+        elif len(self.inputs_dims[0]) == 4:
+            self.inputs_dims[0] = self.inputs_dims[0][1:]
+
+        if len(self.outputs_dims[0]) == 2:
+            self.outputs_dims[0] = [self.outputs_dims[0][1], 1, 1]
+
+    def export(self, export_folder:str, list_configs:list):
+
+        copyfile(KERNELS.FC, f"{export_folder}/include/kernels/")
+        copyfile(dirpath + "/kernels/macs.hpp", f"{export_folder}/include/kernels/")
+        copyfile(dirpath + "/kernels/activation.hpp", f"{export_folder}/include/kernels/")
+
+        list_configs.append("kernels/fullyconnected.hpp")
+        list_configs.append(f"layers/{self.name}.h")
+        generate_file(
+            f"{export_folder}/layers/{self.name}.h",
+            CONFIGURATIONS.FC,
+            name=self.name,
+            input_dims=self.inputs_dims[0],
+            output_dims=self.outputs_dims[0],
+            activation="Linear",
+            rescaling="NoScaling")
+
+        return list_configs
+
+
+    def forward(self, list_actions:list):
+
+        if not self.is_last:
+            list_actions.append(set_up_output(self.name, "float"))
+
+        list_actions.append(generate_action(
+            KERNELS_FORWARD.FC,
+            name=self.name,
+            input_name=self.inputs[0].name() if self.inputs[0] else self.name + "_input",
+            output_name=self.name,
+            weights_name=self.inputs[1].name(),
+            biases_name=self.inputs[2].name()
+        ))
+        return list_actions
+
+
+@export_cpp_register("Producer")
+class ProducerCPP(ExportNode):
+    def __init__(self, node):
+        super().__init__(node)
+
+        self.values = np.array(self.operator.get_output(0))
+
+    def export(self, export_folder:str, list_configs:list):
+
+        list_configs.append(f"parameters/{self.name}.h")
+        export_to_static(self.name,
+                            self.values.reshape(-1),
+                            f"{export_folder}/parameters/{self.name}.h")
+
+        return list_configs
+
+    def forward(self, list_actions:list):
+        return list_actions
--- a/aidge_export_cpp/register.py
+++ b/aidge_export_cpp/register.py
-"""
-Copyright (c) 2023 CEA-List
-
-This program and the accompanying materials are made available under the
-terms of the Eclipse Public License 2.0 which is available at
-http://www.eclipse.org/legal/epl-2.0.
-
-SPDX-License-Identifier: EPL-2.0
-"""
-
-EXPORT_CPP_REGISTRY = {}
-
-
-def export_cpp_register(*args):
-   
-    key_list = [arg for arg in args]
-
-    def decorator(operator):
-        def wrapper(*args, **kwargs):
-            return operator(*args, **kwargs)
-        
-        for key in key_list:
-            EXPORT_CPP_REGISTRY[key] = operator
-
-        return wrapper
-    return decorator
-
-def supported_operators():
-    return list(EXPORT_CPP_REGISTRY.keys())
-
--- a/aidge_export_cpp/static/Makefile
+++ b/aidge_export_cpp/static/Makefile
 CC := g++
-CCFLAGS := ${CCFLAGS} -O2 -Wall -Wextra -MMD
+CCFLAGS := ${CCFLAGS} -O2 -Wall -Wextra -MMD -fopenmp
+LDFLAGS := ${LDFLAGS} -fopenmp
 OBJDIR := build
 DNNDIR := dnn
 BINDIR := bin

--- a/aidge_export_cpp/static/include/network/rescaling.hpp
+++ b/aidge_export_cpp/static/include/network/rescaling.hpp
 #ifndef __AIDGE_EXPORT_CPP_NETWORK_RESCALING__
 #define __AIDGE_EXPORT_CPP_NETWORK_RESCALING__

-// For this demo
-#define SUM_T float
-#define NB_BITS -32

 struct NoScaling {
-    SUM_T operator()(SUM_T weightedSum, unsigned int /*output*/) const {
+
+    template<typename Sum_T>
+    Sum_T operator()(Sum_T weightedSum, unsigned int /*output*/) const 
+    {
        return weightedSum;
    }
+
 };



--- a/aidge_export_cpp/static/include/network/typedefs.hpp
+++ b/aidge_export_cpp/static/include/network/typedefs.hpp
@@ -17,7 +17,8 @@ typedef enum {
 } Pooling_T;

 typedef enum {
-    Sum,
+    Add,
+    Sub,
    Mul
 } ElemWise_T;


--- a/aidge_export_cpp/static/main.cpp
+++ b/aidge_export_cpp/static/main.cpp
@@ -5,6 +5,8 @@

 int main()
 {
+    // Example for MNIST dataset
+    // Feel free to change this file for your own projects
    const unsigned int nb_classes = 10;

    float results[nb_classes];

--- a/aidge_export_cpp/templates/configuration/elemwise_config.jinja
+++ b/aidge_export_cpp/templates/configuration/elemwise_config.jinja
@@ -3,13 +3,8 @@
 #define {{ name|upper }}_LAYER_H

 {# For layer configuration -#}
-#define {{ name|upper }}_NB_CHANNELS {{ input_dims[0] }}
-#define {{ name|upper }}_CHANNELS_HEIGHT {{ input_dims[1] }}
-#define {{ name|upper }}_CHANNELS_WIDTH {{ input_dims[2] }}
-#define {{ name|upper }}_NB_OUTPUTS {{ output_dims[0] }}
-#define {{ name|upper }}_OUTPUTS_HEIGHT {{ output_dims[1] }}
-#define {{ name|upper }}_OUTPUTS_WIDTH {{ output_dims[2] }}
+#define {{ name|upper }}_NB_ELTS {{ nb_elts }}
 #define {{ name|upper }}_ACTIVATION {{ activation }}
 #define {{ name|upper }}_ELEM_OP {{ elemwise_op }}
-
+static const {{ rescaling }} {{ name|upper }}_RESCALING = {};
 #endif /* {{ name|upper }}_LAYER_H */
--- a/aidge_export_cpp/templates/data/data_static.jinja
+++ b/aidge_export_cpp/templates/data/data_static.jinja
@@ -23,7 +23,7 @@ static const {{ data_t }} {{ name }}{{ dims_str }} __attribute__((section(".nn_d
    {
            {%- for j in range(dims[1]) -%}
                {% for k in range(dims[2]) -%}
-                {%- if (j*(range(dims[2])|int) + k) > 5 -%}
+                {%- if (j*(range(dims[2])|int) + k) % 10 == 0 -%}
                {{ values[i][j][k]|string + ",\n " }}
                {%- else -%}
                {{ values[i][j][k]|string + ", " }}
@@ -37,7 +37,7 @@ static const {{ data_t }} {{ name }}{{ dims_str }} __attribute__((section(".nn_d
    {%- if dims|length == 2 %}
        {%- for i in range(dims[0]) %}
            {%- for j in range(dims[1]) -%}
-                {%- if (i*(range(dims[1])|int) + j +1) % 5 == 0 -%}
+                {%- if (i*(range(dims[1])|int) + j +1) % 10 == 0 -%}
                {{ values[i][j]|string + ",\n " }}
                {%- else -%}
                {{ values[i][j]|string + ", " }}
@@ -48,7 +48,7 @@ static const {{ data_t }} {{ name }}{{ dims_str }} __attribute__((section(".nn_d

    {%- if dims|length == 1 %}
        {%- for i in range(dims[0]) %}
-            {%- if (i+1) % 5 == 0 -%}
+            {%- if (i+1) % 10 == 0 -%}
            {{ values[i]|string + ",\n " }}
            {%- else -%}
            {{ values[i]|string + ", " }}

--- a/aidge_export_cpp/templates/data/parameters.jinja
+++ b/aidge_export_cpp/templates/data/parameters.jinja
+{#- For libraries -#}
+#include <stdint.h>
+
+{# Design header of the array -#}
+static const {{ data_t }} {{ name }}[{{ values|length }}] __attribute__((section(".nn_data"))) =
+{
+    {# For loop to add new elements -#}
+    {%- for i in range(values|length) %}
+
+        {#- Last value -#}
+        {%- if (i+1) == values|length -%}
+        {{ values[i]|string }}
+        {%- else -%}
+            {%- if (i+1) % 5 == 0 -%}
+            {{ values[i]|string + ",\n\t" }}
+            {%- else -%}
+            {{ values[i]|string + ", " }}
+            {%- endif -%}
+        {%- endif -%}
+    {%- endfor %}
+};
--- a/aidge_export_cpp/templates/kernel_forward/elemwise_forward.jinja
+++ b/aidge_export_cpp/templates/kernel_forward/elemwise_forward.jinja
-elemwise_forward<{{name|upper}}_NB_OUTPUTS,
-                 {{name|upper}}_OUTPUTS_HEIGHT,
-                 {{name|upper}}_OUTPUTS_WIDTH,
+elemwise_forward<{{name|upper}}_NB_ELTS,
                 {{name|upper}}_ELEM_OP,
                 {{name|upper}}_ACTIVATION>
-                 ({{output_name}}, {{name|upper}}_RESCALING, {{inputs1_name}}, {{inputs2_name}});
\ No newline at end of file
+                 ({{output_name}}, {{name|upper}}_RESCALING, {{inputs1_name}}, {{inputs2_name}});
--- a/aidge_export_cpp/templates/kernel_forward/fullyconnected_forward.jinja
+++ b/aidge_export_cpp/templates/kernel_forward/fullyconnected_forward.jinja
@@ -5,4 +5,4 @@ fullyconnected_forward<{{name|upper}}_NB_CHANNELS,
                       {{name|upper}}_OUTPUTS_HEIGHT,
                       {{name|upper}}_OUTPUTS_WIDTH,
                       {{name|upper}}_ACTIVATION>
-                       ({{input_name}}, {{output_name}}, {{weights_name}}, {{biases_name}}, {{name|upper}}_RESCALING);
\ No newline at end of file
+                       ({{inputs_name}}, {{outputs_name}}, {{weights_name}}, {{biases_name}}, {{name|upper}}_RESCALING);
\ No newline at end of file
--- a/aidge_export_cpp/templates/memory/mem_info.jinja
+++ b/aidge_export_cpp/templates/memory/mem_info.jinja
@@ -4,9 +4,13 @@

 #define MEMORY_SIZE {{ mem_size }}

-{% for offset in offsets %}
-#define {{ offset }}
+{% for i in range(mem_info|length) -%}
+{%- set layer_name = mem_info[i][0] %}
+/* {{layer_name}} memory */
+{% for j in range(1, mem_info[i]|length) %}
+#define {{ layer_name|upper }}_{{ mem_info_legends[j]|upper }} {{ mem_info[i][j] }}
 {%- endfor %}
+{% endfor %}


 #endif /* MEM_INFO_H */
\ No newline at end of file