Merge branch 'dev' into 'main'

v0.2.0 See merge request !27

Merge branch 'dev' into 'main'
v0.2.0 See merge request !27
fd867582 · Maxence Naud · 0d5c0581 · 05400e59 · fd867582 · fd867582
Commit fd867582 authored 6 months ago by Maxence Naud
--- a/CHANGELOG
+++ b/CHANGELOG
+# Version 0.2.0 (december 6, 2024)
+
 # Version 0.0.1 (January 23, 2024)

 Initial release
--- a/aidge_export_cpp/__init__.py
+++ b/aidge_export_cpp/__init__.py
@@ -2,6 +2,7 @@ r"""
 Aidge Export for CPP standalone projects

 """
+from .export_registry import ExportLibCpp

 from .operators import *
 from collections import defaultdict

--- a/aidge_export_cpp/export.py
+++ b/aidge_export_cpp/export.py
 import re
 import os
-from pathlib import Path
-import shutil
 import numpy as np
-from typing import List, Union
-from jinja2 import Environment, FileSystemLoader

 import aidge_core
+
 from aidge_core.export_utils.code_generation import *
-from aidge_export_cpp.utils import (ROOT, OPERATORS_REGISTRY, supported_operators)
-from aidge_export_cpp.utils.converter import aidge_datatype2ctype, numpy_dtype2ctype
-import aidge_export_cpp.operators
+from aidge_core.mem_info import compute_default_mem_info
+
+from aidge_export_cpp.utils import ROOT
+from aidge_export_cpp.utils.converter import numpy_dtype2ctype
+from aidge_export_cpp import ExportLibCpp
 from aidge_export_cpp.utils.generation import *
-from aidge_export_cpp.memory import *
+# from aidge_export_cpp.memory import *


 def generate_input_file(export_folder:str,
@@ -33,86 +32,100 @@ def generate_input_file(export_folder:str,
    )


-def export(export_folder_name, graphview, scheduler):
-
-    export_folder = Path().absolute() / export_folder_name
-
-    os.makedirs(str(export_folder), exist_ok=True)
-
-    dnn_folder = export_folder / "dnn"
-    os.makedirs(str(dnn_folder), exist_ok=True)
-
-    list_actions = []
-    list_configs = []
-
-    list_forward_nodes = scheduler.get_static_scheduling()
-
-    for node in list_forward_nodes:
-        if node.type() in supported_operators():
-            op = OPERATORS_REGISTRY[node.type()](node)
-
-            # For configuration files
-            list_configs = op.export(dnn_folder, list_configs)
-
-            # For forward file
-            list_actions = op.forward(list_actions)
-
-
-    # Memory management
-    mem_size, mem_info = compute_default_mem_info(scheduler)
-
-    # Generate the memory file
-    generate_file(
-        str(dnn_folder / "memory" / "mem_info.h"),
-        str(ROOT / "templates" / "memory" / "mem_info.jinja"),
-        mem_size = mem_size,
-        mem_info_legends = MEMORY_INFO_TEMPLATE,
-        mem_info = mem_info
-    )
-    list_configs.append("memory/mem_info.h")
-
-    # Get entry nodes
-    # It supposes the entry nodes are producers with constant=false
-    # Store the datatype & name
-    list_inputs_name = []
-    for node in graphview.get_nodes():
-        if node.type() == "Producer":
-            if not node.get_operator().attr.constant:
-                export_type = aidge_datatype2ctype(node.get_operator().get_output(0).dtype())
-                list_inputs_name.append((export_type, node.name()))
-
-    # Get output nodes
-    # Store the datatype & name, like entry nodes
-    list_outputs_name = []
-    for node in graphview.get_nodes():
-        if len(node.get_children()) == 0:
-            export_type = aidge_datatype2ctype(node.get_operator().get_output(0).dtype())
-            list_outputs_name.append((export_type, node.name()))
-
-    # Generate forward file
-    # TODO: for now the mem type is bound for all intermediate results, should change.
-    # Note that we may have all inputs constants, hence select output type
-    assert len(list_outputs_name) >= 1, f"TODO: requires some output to determine mem type"
-    mem_ctype = list_outputs_name[0][0]
-    generate_file(
-        str(dnn_folder / "src" / "forward.cpp"),
-        str(ROOT / "templates" / "network" / "network_forward.jinja"),
-        headers=list_configs,
-        actions=list_actions,
-        inputs= list_inputs_name,
-        outputs=list_outputs_name,
-        mem_ctype=mem_ctype,
-    )
-
-    # Generate dnn API
-    generate_file(
-        str(dnn_folder / "include" / "dnn.hpp"),
-        str(ROOT / "templates" / "network" / "dnn_header.jinja"),
-        libraries=[],
-        functions=get_functions_from_c_file(str(dnn_folder / "src" / "forward.cpp")),
+def export(export_folder_name, graphview, scheduler, mem_wrapping=False):
+    aidge_core.export_utils.scheduler_export(
+        scheduler,
+        export_folder_name,
+        ExportLibCpp,
+        memory_manager=compute_default_mem_info
    )

-    # Copy all static files in the export
-    shutil.copy(str(ROOT / "static" / "main.cpp"), str(export_folder))
-    shutil.copy(str(ROOT / "static" / "Makefile"), str(export_folder))
-    shutil.copytree(str(ROOT / "static" / "include"), str(dnn_folder / "include"), dirs_exist_ok=True)
+    # export_folder = Path().absolute() / export_folder_name
+
+    # os.makedirs(str(export_folder), exist_ok=True)
+
+    # dnn_folder = export_folder / "dnn"
+    # os.makedirs(str(dnn_folder), exist_ok=True)
+
+    # list_actions = []
+    # list_configs = []
+    # peak_mem, mem_info = compute_default_mem_info(scheduler)
+    # list_forward_nodes = scheduler.get_static_scheduling()
+
+    # for node in list_forward_nodes:
+    #     if ExportLibCpp.exportable(node):
+    #         op = ExportLibCpp.get_export_node(node)(node, mem_info[node])
+    #         # For configuration files
+    #         list_configs = op.export(dnn_folder, list_configs)
+
+    #         # For forward file
+    #         list_actions = op.forward(list_actions)
+    #     else:
+    #         raise RuntimeError(f"Operator not supported: {node.type()} !")
+
+    # # Memory management
+    # # stats_folder = export_folder / "statistics"
+    # # os.makedirs(str(stats_folder), exist_ok=True)
+    # # mem_size, mem_info = generate_optimized_memory_info(stats_folder, scheduler, mem_wrapping)
+    # # peak_mem, mem_info = compute_default_mem_info(scheduler)
+
+    # # Generate the memory file
+    # # generate_file(
+    # #     str(dnn_folder / "memory" / "mem_info.h"),
+    # #     str(ROOT / "templates" / "memory" / "mem_info.jinja"),
+    # #     mem_size = mem_size,
+    # #     mem_info_legends = MEMORY_INFO_TEMPLATE,
+    # #     mem_info = mem_info
+    # # )
+    # # list_configs.append("memory/mem_info.h")
+
+    # # Get entry nodes
+    # # Store the datatype & name
+    # list_inputs_name = []
+    # for node in graphview.get_input_nodes():
+    #     for idx, node_input_tuple in enumerate(node.inputs()):
+    #         node_input, _ = node_input_tuple
+    #         if node_input is None:
+    #             export_type = aidge2c(node.get_operator().get_output(0).dtype())
+    #             list_inputs_name.append((export_type, f"{node.name()}_input_{idx}"))
+    #         elif node_input not in graphview.get_nodes():
+    #             export_type = aidge2c(node_input.get_operator().get_output(0).dtype())
+    #             list_inputs_name.append((export_type, node_input.name()))
+
+
+    # # Get output nodes
+    # # Store the datatype & name, like entry nodes
+    # list_outputs_name = []
+    # for node in graphview.get_nodes():
+    #     if len(node.get_children()) == 0:
+    #         export_type = aidge2c(node.get_operator().get_output(0).dtype())
+    #         list_outputs_name.append((export_type, f"{node.name()}_output_0"))
+
+    # # Generate forward file
+    # # TODO: for now the mem type is bound for all intermediate results, should change.
+    # # Note that we may have all inputs constants, hence select output type
+    # assert len(list_outputs_name) >= 1, f"TODO: requires some output to determine mem type"
+    # mem_ctype = list_outputs_name[0][0]
+    # generate_file(
+    #     str(dnn_folder / "src" / "forward.cpp"),
+    #     str(ROOT / "templates" / "network" / "network_forward.jinja"),
+    #     headers=set(list_configs),
+    #     actions=list_actions,
+    #     inputs= list_inputs_name,
+    #     outputs=list_outputs_name,
+    #     mem_ctype=mem_ctype,
+    #     peak_mem=peak_mem
+    # )
+
+    # # Generate dnn API
+    # generate_file(
+    #     str(dnn_folder / "include" / "dnn.hpp"),
+    #     str(ROOT / "templates" / "network" / "dnn_header.jinja"),
+    #     libraries=[],
+    #     functions=get_functions_from_c_file(str(dnn_folder / "src" / "forward.cpp")),
+    # )
+
+    # # Copy all static files in the export
+    # shutil.copy(str(ROOT / "static" / "main.cpp"), str(export_folder))
+    # shutil.copy(str(ROOT / "static" / "Makefile"), str(export_folder))
+    # shutil.copytree(str(ROOT / "static" / "include"), str(dnn_folder / "include"), dirs_exist_ok=True)
--- a/aidge_export_cpp/export_registry.py
+++ b/aidge_export_cpp/export_registry.py
+from aidge_core.export_utils import ExportLib
+from aidge_export_cpp.utils import ROOT
+
+class ExportLibCpp(ExportLib):
+    _name="export_cpp"
+    static_files={
+        str(ROOT / "static" / "Makefile"): "",
+        str(ROOT / "static" / "include" / "network" / "typedefs.hpp"): "dnn/include/network",
+        str(ROOT / "static" / "include" / "network" / "utils.hpp"): "dnn/include/network",
+    }
--- a/aidge_export_cpp/kernels/activation.hpp
+++ b/aidge_export_cpp/kernels/activation.hpp
@@ -4,36 +4,36 @@
 #include <type_traits>
 #include "network/typedefs.hpp"
 #include "network/utils.hpp"
-#include "network/rescaling.hpp"
+#include "kernels/rescaling.hpp"

-template<typename Output_T, typename T,  
+template<typename Output_T, typename T,
         typename std::enable_if<std::is_floating_point<T>::value>::type* = nullptr>
 __attribute__((always_inline)) inline
-Output_T saturate (T value, int32_t /*sat*/) 
+Output_T saturate (T value, int32_t /*sat*/)
 {
    return value;
 }

-template<typename Output_T, typename T,  
+template<typename Output_T, typename T,
         typename std::enable_if<!std::is_floating_point<T>::value>::type* = nullptr>
 __attribute__((always_inline)) inline
-Output_T saturate (T value, uint32_t sat) 
+Output_T saturate (T value, uint32_t sat)
 {
    if (std::is_unsigned<Output_T>::value) {
        return clamp(value, T(0), (T(1) << sat) - 1);
    } else {
        return clamp(value, -(T(1) << (sat - 1)), (T(1) << (sat - 1)) - 1);
-    }                                                 
+    }
 }

-template<typename Output_T, 
-         typename Sum_T, 
+template<typename Output_T,
+         typename Sum_T,
         typename Rescaling_T>
-__attribute__((always_inline)) inline 
-Output_T activation_forward_value (Sum_T weightedSum, 
-                                   int output, 
-                                   ActivationFunction_T func, 
-                                   const Rescaling_T& __restrict rescaling) 
+__attribute__((always_inline)) inline
+Output_T activation_forward_value (Sum_T weightedSum,
+                                   int output,
+                                   ActivationFunction_T func,
+                                   const Rescaling_T& __restrict rescaling)
 {
    switch(func) {
        case Linear:
@@ -49,7 +49,7 @@ Output_T activation_forward_value (Sum_T weightedSum,
            break;
    }

-    // Value fixed here for now but it should be generated by 
+    // Value fixed here for now but it should be generated by
    // the export module or determined by the type of Output_T
    // For now only works for int8_t and uint8_t
    const uint32_t NB_BITS = 8;
@@ -60,7 +60,7 @@ Output_T activation_forward_value (Sum_T weightedSum,
 template<int NB_DATA,
         ActivationFunction_T ACTIVATION,
         typename Input_T, typename Output_T, typename Rescaling_T>
-__attribute__((always_inline)) inline 
+__attribute__((always_inline)) inline
 void activation_forward (
    const Input_T* __restrict inputs,
    Output_T* __restrict outputs,

--- a/aidge_export_cpp/kernels/batchnorm.hpp
+++ b/aidge_export_cpp/kernels/batchnorm.hpp
@@ -2,17 +2,17 @@
 #define __AIDGE_EXPORT_CPP_KERNELS_BATCHNORM__

 #include "network/typedefs.hpp"
-#include "network/rescaling.hpp"
+#include "kernels/rescaling.hpp"
 #include <math.h>

 // WARNING: this kernel only works for 32-bits floating point values

-template<int NB_OUTPUTS, 
+template<int NB_OUTPUTS,
         int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
         ActivationFunction_T ACTIVATION,
-         typename Input_T, typename Output_T, 
+         typename Input_T, typename Output_T,
         typename Param_T>
-__attribute__((always_inline)) inline 
+__attribute__((always_inline)) inline
 void batchnorm_forward (
    const Input_T* __restrict inputs,
    Output_T* __restrict outputs,

--- a/aidge_export_cpp/kernels/convolution.hpp
+++ b/aidge_export_cpp/kernels/convolution.hpp
@@ -2,13 +2,13 @@
 #define __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__

 #include "network/typedefs.hpp"
-#include "network/rescaling.hpp"
+#include "kernels/rescaling.hpp"
 #include "network/utils.hpp"
 #include "kernels/macs.hpp"
 #include "kernels/activation.hpp"


-template<int NB_CHANNELS, 
+template<int NB_CHANNELS,
         int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
         int NB_OUTPUTS,
         int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
@@ -17,10 +17,10 @@ template<int NB_CHANNELS,
         int DILATION_Y, int DILATION_X,
         int KERNEL_HEIGHT, int KERNEL_WIDTH,
         ActivationFunction_T ACTIVATION,
-         typename Input_T, typename Output_T, 
+         typename Input_T, typename Output_T,
         typename Weight_T, typename Bias_T,
         typename Rescaling_T>
-__attribute__((always_inline)) inline 
+__attribute__((always_inline)) inline
 void convolution_forward(
    const Input_T* __restrict inputs,
    Output_T* __restrict outputs,
@@ -28,10 +28,10 @@ void convolution_forward(
    const Bias_T* __restrict biases,
    const Rescaling_T& __restrict rescaling)
 {
-    constexpr int DILATED_KERNEL_HEIGHT 
+    constexpr int DILATED_KERNEL_HEIGHT
            = KERNEL_HEIGHT + (DILATION_Y - 1) * (KERNEL_HEIGHT - 1);

-    constexpr int DILATED_KERNEL_WIDTH 
+    constexpr int DILATED_KERNEL_WIDTH
            = KERNEL_WIDTH + (DILATION_X - 1) * (KERNEL_WIDTH - 1);

    constexpr int OUTPUTS_HEIGHT_NOPAD
@@ -44,7 +44,7 @@ void convolution_forward(
            : max(PADDING_Y - (oy * STRIDE_Y), 0);
        const int syMax = (PADDING_Y == 0
                && OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? DILATED_KERNEL_HEIGHT
-            : clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y), 
+            : clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y),
                    0, DILATED_KERNEL_HEIGHT);
        const int iy = (oy * STRIDE_Y) - PADDING_Y;

@@ -57,7 +57,7 @@ void convolution_forward(
                const int sxMax = (PADDING_X == 0
                        && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD)
                            ? DILATED_KERNEL_WIDTH
-                    : clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X), 
+                    : clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X),
                            0, DILATED_KERNEL_WIDTH);
                const int ix = (ox * STRIDE_X) - PADDING_X;

@@ -85,8 +85,8 @@ void convolution_forward(
                        || sxMax - sxMin == KERNEL_WIDTH))
                    {
                        macsOnRange<KERNEL_WIDTH * NB_CHANNELS>(
-                            inputs + iOffset, 
-                            weights + wOffset, 
+                            inputs + iOffset,
+                            weights + wOffset,
                            weightedSum);
                    }
                    else {
@@ -100,11 +100,11 @@ void convolution_forward(

                            int iOffsetInRange = iOffset
                                + sx * DILATION_X * NB_CHANNELS;
-                        
+
                            macsOnRange<NB_CHANNELS>(
                                // same input line so no wrapping can occur
-                                inputs + iOffsetInRange, 
-                                weights + wOffset + sx * NB_CHANNELS, 
+                                inputs + iOffsetInRange,
+                                weights + wOffset + sx * NB_CHANNELS,
                                weightedSum);
                        }
                    }

--- a/aidge_export_cpp/kernels/fullyconnected.hpp
+++ b/aidge_export_cpp/kernels/fullyconnected.hpp
@@ -2,20 +2,20 @@
 #define __AIDGE_EXPORT_CPP_KERNELS_FULLYCONNECTED__

 #include "network/typedefs.hpp"
-#include "network/rescaling.hpp"
+#include "kernels/rescaling.hpp"
 #include "network/utils.hpp"
 #include "kernels/macs.hpp"
 #include "kernels/activation.hpp"

-template<int NB_CHANNELS, 
+template<int NB_CHANNELS,
         int CHANNELS_HEIGHT, int CHANNELS_WIDTH,
         int NB_OUTPUTS,
         int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
         ActivationFunction_T ACTIVATION,
-         typename Input_T, typename Output_T, 
+         typename Input_T, typename Output_T,
         typename Weight_T, typename Bias_T,
         typename Rescaling_T>
-__attribute__((always_inline)) inline 
+__attribute__((always_inline)) inline
 void fullyconnected_forward (
    const Input_T* __restrict inputs,
    Output_T* __restrict outputs,
@@ -35,7 +35,7 @@ void fullyconnected_forward (
        for (int iy = 0; iy < CHANNELS_HEIGHT; ++iy) {
            for (int ix = 0; ix < CHANNELS_WIDTH; ++ix) {
                for (int ch = 0; ch < NB_CHANNELS; ++ch) {
-                    weightedSum += inputs[CHANNELS_WIDTH*NB_CHANNELS*iy + NB_CHANNELS*ix + ch] 
+                    weightedSum += inputs[CHANNELS_WIDTH*NB_CHANNELS*iy + NB_CHANNELS*ix + ch]
                                * weights[CHANNELS_HEIGHT*CHANNELS_WIDTH*NB_CHANNELS*och + CHANNELS_HEIGHT*CHANNELS_WIDTH*ch + CHANNELS_HEIGHT*iy + ix];
                }
            }
@@ -58,8 +58,8 @@ Here the kernel to use with inputs in NHWC and weights in NHWC
                                    * (iy + CHANNELS_HEIGHT * och);

            macsOnRange<NB_CHANNELS * CHANNELS_WIDTH>(
-                inputs + iOffset, 
-                weights + wOffset, 
+                inputs + iOffset,
+                weights + wOffset,
                weightedSum);
        }

@@ -69,4 +69,4 @@ Here the kernel to use with inputs in NHWC and weights in NHWC
 }


-#endif  // __AIDGE_EXPORT_CPP_KERNELS_FULLYCONNECTED__
\ No newline at end of file
+#endif  // __AIDGE_EXPORT_CPP_KERNELS_FULLYCONNECTED__
--- a/aidge_export_cpp/static/include/network/rescaling.hpp
+++ b/aidge_export_cpp/static/include/network/rescaling.hpp
--- a/aidge_export_cpp/memory.py
+++ b/aidge_export_cpp/memory.py
-import aidge_core
-import aidge_backend_cpu
-from typing import List
-
-# for each layer, name: [size, offset start]
-# Example:
-#define ENV_MEM_SIZE 3
-#define ENV_OFFSET 0
-MEMORY_INFO_TEMPLATE = ["layer_name", "size", "offset"]
-
-
-# Default memory management, which can be used for development
-def compute_default_mem_info(scheduler: aidge_core.Scheduler):
-    
-    list_forward_nodes = scheduler.get_static_scheduling()
-    mem_info = []
-    mem_size = 0
-
-    # Exclude Producers and the last layers (because the results are stored outside the export)
-    for i, node in enumerate(list_forward_nodes):
-        if node.type() != "Producer":
-            if len(node.get_children()) != 0:
-                dims = node.get_operator().get_output(0).dims()
-                mem = 1
-                for dim in dims:
-                    mem *= dim
-
-                # Add memeory info
-                mem_info.append([node.name(), mem, mem_size])
-                
-                # Increment offset for the next layer
-                mem_size += mem
-
-    return mem_size, mem_info
-
-
-def generate_optimized_memory_info(scheduler: aidge_core.Scheduler,
-                                   wrapping:bool = False):
-    
-    # The forward dims has to done outside the function
-
-    # Generate the memory manager
-    mem_manager = scheduler.generate_memory(inc_producers=False, wrap_around_buffer=wrapping)
-
-    mem_size = 0
-    mem_info = []
-
-
-    return mem_size, mem_info
\ No newline at end of file
--- a/aidge_export_cpp/operators.py
+++ b/aidge_export_cpp/operators.py
--- a/aidge_export_cpp/operators_old.py
+++ b/aidge_export_cpp/operators_old.py
-from aidge_core import ExportNode
-import numpy as np
-from jinja2 import Environment, FileSystemLoader
-import os
-import shutil
-from aidge_export_cpp.register import export_cpp_register
-
-dirpath = os.path.dirname(__file__)
-
-class KERNELS:
-    ACTIVATION = dirpath + "/kernels/activation.hpp"
-    BATCHNORM = dirpath + "/kernels/batchnorm.hpp"
-    CONV = dirpath + "/kernels/convolution.hpp"
-    ADD = dirpath + "/kernels/elemwise.hpp"
-    FC = dirpath + "/kernels//fullyconnected.hpp"
-    POOLING = dirpath + "/kernels/pooling.hpp"
-    LEAKYRELU = dirpath + "/kernels/leakyrelu.hpp"
-
-class KERNELS_FORWARD:
-    ACTIVATION = dirpath + "/templates/kernel_forward/activation_forward.jinja"
-    BATCHNORM = dirpath + "/templates/kernel_forward/batchnorm_forward.jinja"
-    CONV = dirpath + "/templates/kernel_forward/convolution_forward.jinja"
-    ADD = dirpath + "/templates/kernel_forward/elemwise_forward.jinja"
-    FC = dirpath + "/templates/kernel_forward/fullyconnected_forward.jinja"
-    POOLING = dirpath + "/templates/kernel_forward/pooling_forward.jinja"
-    LEAKYRELU = dirpath + "/templates/kernel_forward/leakyrelu_forward.jinja"
-
-class CONFIGURATIONS:
-    ACTIVATION = dirpath + "/templates/configuration/activation_config.jinja"
-    BATCHNORM = dirpath + "/templates/configuration/batchnorm_config.jinja"
-    CONV = dirpath + "/templates/configuration/convolution_config.jinja"
-    ADD = dirpath + "/templates/configuration/elemwise_config.jinja"
-    FC = dirpath + "/templates/configuration//fullyconnected_config.jinja"
-    POOLING = dirpath + "/templates/configuration/pooling_config.jinja"
-    LEAKYRELU = dirpath + "/templates/configuration/leakyrelu_config.jinja"
-
-##############################################
-############## Export functions ##############
-##############################################
-
-def generate_file(filename, templatename, **kwargs):
-
-    # Get directory name of the file
-    dirname = os.path.dirname(filename)
-
-    # If directory doesn't exist, create it
-    if not os.path.exists(dirname):
-        os.makedirs(dirname)
-
-    # Get directory name and name of the template
-    template_dir = os.path.dirname(templatename)
-    template_name = os.path.basename(templatename)
-
-    # Select template
-    template = Environment(loader=FileSystemLoader(template_dir)).get_template(template_name)
-
-    # Generate file
-    content = template.render(kwargs)
-    with open(filename, mode="w", encoding="utf-8") as message:
-        message.write(content)
-
-
-def generate_action(template_path, **kwargs):
-    dirname = os.path.dirname(template_path)
-    filename = os.path.basename(template_path)
-    template = Environment(loader=FileSystemLoader(dirname)).get_template(filename)
-    return template.render(kwargs)
-
-
-def copyfile(filename, dst_folder):
-
-    # If directory doesn't exist, create it
-    if not os.path.exists(dst_folder):
-        os.makedirs(dst_folder)
-
-    shutil.copy(filename, dst_folder)
-
-
-def export_to_static(name, array, filepath):
-
-    # Get directory name of the file
-    dirname = os.path.dirname(filepath)
-
-    # If directory doesn't exist, create it
-    if not os.path.exists(dirname):
-        os.makedirs(dirname)
-
-    generate_file(
-        filepath,
-        dirpath + "/templates/data/data_static.jinja",
-        dims = array.shape,
-        data_t = "float",
-        name = name,
-        values = array.tolist()
-    )
-
-
-##############################################
-################### Utils ####################
-##############################################
-
-def get_node_parents(node):
-    parents = []
-    for parent in node.get_parents():
-        if parent.type() != "Producer":
-            parents.append(parent)
-    return parents
-
-def get_producer_parents(node):
-    parents = []
-    for parent in node.get_parents():
-        if parent.type() == "Producer":
-            parents.append(parent)
-    return parents
-
-
-##############################################
-################### Actions ##################
-##############################################
-
-def set_up_output(name, datatype):
-    return f"{datatype}* {name} = ({datatype}*) mem + {name.upper()}_OFFSET;"
-
-
-##############################################
-############## Operators helper ##############
-##############################################
-
-@export_cpp_register("Conv")
-class ConvCPP(ExportNode):
-    def __init__(self, node):
-        super().__init__(node)
-
-        self.kernel = node.get_operator().get_attr("KernelDims")
-        self.stride = node.get_operator().get_attr("StrideDims")
-
-        # Not working anymore because Padding is a standalone operator
-        # self.padding = node.get_operator().get_attr("PaddingDims")
-        self.padding = [1, 1]
-        self.dilation = node.get_operator().get_attr("DilationDims")
-        self.nb_channels = node.get_operator().get_attr("InChannels")
-        self.nb_outputs = node.get_operator().get_attr("OutChannels")
-
-    def export(self, export_folder:str, list_configs:list):
-
-        copyfile(KERNELS.CONV, f"{export_folder}/include/kernels/")
-        copyfile(dirpath + "/kernels/macs.hpp", f"{export_folder}/include/kernels/")
-        copyfile(dirpath + "/kernels/activation.hpp", f"{export_folder}/include/kernels/")
-
-        list_configs.append("kernels/convolution.hpp")
-        list_configs.append(f"layers/{self.name}.h")
-        generate_file(
-            f"{export_folder}/layers/{self.name}.h",
-            CONFIGURATIONS.CONV,
-            name=self.name,
-            input_dims=self.inputs_dims[0][1:],
-            output_dims=self.outputs_dims[0][1:],
-            kernel=self.kernel,
-            stride=self.stride,
-            padding=self.padding,
-            dilation=self.dilation,
-            activation="Linear",
-            rescaling="NoScaling")
-
-        return list_configs
-
-    def forward(self, list_actions:list):
-
-        if not self.is_last:
-            list_actions.append(set_up_output(self.name, "float"))
-
-        list_actions.append(generate_action(
-            KERNELS_FORWARD.CONV,
-            name=self.name,
-            input_name=self.inputs[0].name(),
-            output_name=self.name,
-            weights_name=self.inputs[1].name(),
-            biases_name=self.inputs[2].name()
-        ))
-        return list_actions
-
-
-class BatchNormCPP:
-    def __init__(self, node):
-        self.name = node.name()
-        self.epsilon = node.get_operator().get_attr("Epsilon")
-
-        self.producers = get_producer_parents(node)
-
-        self.scales = np.array(self.producers[0].get_operator().get_output(0)).reshape(-1).tolist()
-        self.biases = np.array(self.producers[1].get_operator().get_output(0)).reshape(-1).tolist()
-        self.means = np.array(self.producers[2].get_operator().get_output(0)).reshape(-1).tolist()
-        self.vars = np.array(self.producers[3].get_operator().get_output(0)).reshape(-1).tolist()
-
-        parents = get_node_parents(node)
-        if len(parents) == 0:
-            self.input_name = "in"
-        else :
-            self.input_name = parents[0].name()
-
-    def export(self, export_folder:str, list_configs:list):
-        list_configs.append(f"layers/{self.name}.h")
-        generate_file(
-            f"{export_folder}/layers/{self.name}.h",
-            "tensorrt/templates/configuration/batchnorm_config.jinja",
-            name=self.name,
-            input_dims=[0, 0, 0],
-            output_dims=[0, 0, 0],
-            activation="Linear",
-            epsilon=self.epsilon)
-
-        # export the batchnorm parameters
-
-        return list_configs
-
-    def forward(self, list_actions:list):
-
-        list_actions.append(set_up_output(self.name, "float"))
-        list_actions.append(generate_action(
-            "cpp/templates/kernel_forward/batchnorm_forward.jinja",
-            name=self.name,
-            input_name=self.input_name,
-            output_name=self.name,
-            biases_name=self.producers[0].name(),
-            variances_name=self.producers[1].name(),
-            means_name=self.producers[2].name(),
-            scales_name=self.producers[3].name()
-        ))
-        return list_actions
-
-@export_cpp_register("ReLU")
-class ReLUCPP(ExportNode):
-    def __init__(self, node):
-        super().__init__(node)
-
-        self.nb_data = 1
-        for i in self.inputs_dims[0]:
-            self.nb_data *= i
-
-    def export(self, export_folder:str, list_configs:list):
-
-        copyfile(KERNELS.ACTIVATION, f"{export_folder}/include/kernels/")
-
-        list_configs.append("kernels/activation.hpp")
-        list_configs.append(f"layers/{self.name}.h")
-        generate_file(
-            f"{export_folder}/layers/{self.name}.h",
-            CONFIGURATIONS.ACTIVATION,
-            name=self.name,
-            nb_data=self.nb_data,
-            activation="Rectifier",
-            rescaling="NoScaling")
-
-        return list_configs
-
-    def forward(self, list_actions:list):
-
-        if not self.is_last:
-            list_actions.append(set_up_output(self.name, "float"))
-
-        list_actions.append(generate_action(
-            KERNELS_FORWARD.ACTIVATION,
-            name=self.name,
-            input_name=self.inputs[0].name(),
-            output_name=self.name
-        ))
-        return list_actions
-
-
-@export_cpp_register("LeakyReLU")
-class LeakyReLUCPP(ExportNode):
-    def __init__(self, node):
-        super().__init__(node)
-
-        self.alpha = node.get_operator().get_attr("NegativeSlope")
-
-        self.nb_data = 1
-        for i in self.inputs_dims[0]:
-            self.nb_data *= i
-
-    def export(self, export_folder:str, list_configs:list):
-
-        copyfile(KERNELS.LEAKYRELU, f"{export_folder}/include/kernels/")
-
-        list_configs.append("kernels/activation.hpp")
-        list_configs.append(f"layers/{self.name}.h")
-        generate_file(
-            f"{export_folder}/layers/{self.name}.h",
-            CONFIGURATIONS.LEAKYRELU,
-            name=self.name,
-            nb_data=self.nb_data,
-            alpha = self.alpha)
-
-        return list_configs
-
-    def forward(self, list_actions:list):
-
-        if not self.is_last:
-            list_actions.append(set_up_output(self.name, "float"))
-
-        list_actions.append(generate_action(
-            KERNELS_FORWARD.LEAKYRELU,
-            name=self.name,
-            input_name=self.inputs[0].name(),
-            output_name=self.name
-        ))
-        return list_actions
-
-
-class AddCPP:
-    def __init__(self, node):
-        self.name = node.name()
-        self.parents = get_node_parents(node)
-
-    def export(self, export_folder:str, list_configs:list):
-        list_configs.append(f"layers/{self.name}.h")
-        generate_file(
-            f"{export_folder}/layers/{self.name}.h",
-            CONFIGURATIONS.ADD,
-            name=self.name,
-            input_dims=[0, 0, 0],
-            output_dims=[0, 0, 0],
-            activation="Linear",
-            elemwise_op="Sum")
-
-        return list_configs
-
-    def forward(self, list_actions:list):
-
-        list_actions.append(set_up_output(self.name, "float"))
-        list_actions.append(generate_action(
-            "cpp/templates/kernel_forward/elemwise_forward.jinja",
-            name=self.name,
-            input1_name=self.parents[0].name(),
-            input2_name=self.parents[1].name(),
-            output_name=self.name
-        ))
-        return list_actions
-
-
-@export_cpp_register("MaxPooling")
-class MaxPoolCPP(ExportNode):
-    def __init__(self, node):
-        super().__init__(node)
-
-        self.kernel = node.get_operator().get_attr("KernelDims")
-        self.stride = node.get_operator().get_attr("StrideDims")
-
-        # Not supported by the core...
-        # self.padding = node.get_operator().get_attr("PaddingDims")
-        self.padding = [0, 0]
-
-    def export(self, export_folder:str, list_configs:list):
-
-        copyfile(KERNELS.POOLING, f"{export_folder}/include/kernels/")
-
-        list_configs.append("kernels/pooling.hpp")
-        list_configs.append(f"layers/{self.name}.h")
-        generate_file(
-            f"{export_folder}/layers/{self.name}.h",
-            CONFIGURATIONS.POOLING,
-            name=self.name,
-            input_dims=self.inputs_dims[0],
-            output_dims=self.outputs_dims[0],
-            kernel=self.kernel,
-            stride=self.stride,
-            padding=self.padding,
-            pool_type="Max",
-            activation="Linear")
-
-        return list_configs
-
-    def forward(self, list_actions:list):
-
-        if not self.is_last:
-            list_actions.append(set_up_output(self.name, "float"))
-
-        list_actions.append(generate_action(
-            KERNELS_FORWARD.POOLING,
-            name=self.name,
-            input_name=self.inputs[0].name(),
-            output_name=self.name
-        ))
-        return list_actions
-
-
-class GlobalAvgPoolCPP:
-    def __init__(self, node):
-
-        # node.get_operator().set_compute_output_dims(lambda x: [[x[0][0], x[0][1], 1, 1]])
-        pass
-
-    def export(self, export_folder:str, list_configs:list):
-
-        return list_configs
-
-    def forward(self, list_actions:list):
-
-        list_actions.append(set_up_output(self.name, "float"))
-        list_actions.append(generate_action(
-            "cpp/templates/kernel_forward/pooling_forward.jinja",
-            name=self.name,
-            input_name=self.input_name,
-            output_name=self.name
-        ))
-        return list_actions
-
-@export_cpp_register("FC")
-class FcCPP(ExportNode):
-    def __init__(self, node):
-        super().__init__(node)
-
-        if len(self.inputs_dims[0]) == 2:
-            self.inputs_dims[0] = [self.inputs_dims[0][1], 1, 1]
-        elif len(self.inputs_dims[0]) == 4:
-            self.inputs_dims[0] = self.inputs_dims[0][1:]
-
-        if len(self.outputs_dims[0]) == 2:
-            self.outputs_dims[0] = [self.outputs_dims[0][1], 1, 1]
-
-    def export(self, export_folder:str, list_configs:list):
-
-        copyfile(KERNELS.FC, f"{export_folder}/include/kernels/")
-        copyfile(dirpath + "/kernels/macs.hpp", f"{export_folder}/include/kernels/")
-        copyfile(dirpath + "/kernels/activation.hpp", f"{export_folder}/include/kernels/")
-
-        list_configs.append("kernels/fullyconnected.hpp")
-        list_configs.append(f"layers/{self.name}.h")
-        generate_file(
-            f"{export_folder}/layers/{self.name}.h",
-            CONFIGURATIONS.FC,
-            name=self.name,
-            input_dims=self.inputs_dims[0],
-            output_dims=self.outputs_dims[0],
-            activation="Linear",
-            rescaling="NoScaling")
-
-        return list_configs
-
-
-    def forward(self, list_actions:list):
-
-        if not self.is_last:
-            list_actions.append(set_up_output(self.name, "float"))
-
-        list_actions.append(generate_action(
-            KERNELS_FORWARD.FC,
-            name=self.name,
-            input_name=self.inputs[0].name() if self.inputs[0] else self.name + "_input",
-            output_name=self.name,
-            weights_name=self.inputs[1].name(),
-            biases_name=self.inputs[2].name()
-        ))
-        return list_actions
-
-
-@export_cpp_register("Producer")
-class ProducerCPP(ExportNode):
-    def __init__(self, node):
-        super().__init__(node)
-
-        self.values = np.array(self.operator.get_output(0))
-
-    def export(self, export_folder:str, list_configs:list):
-
-        list_configs.append(f"parameters/{self.name}.h")
-        export_to_static(self.name,
-                            self.values.reshape(-1),
-                            f"{export_folder}/parameters/{self.name}.h")
-
-        return list_configs
-
-    def forward(self, list_actions:list):
-        return list_actions
--- a/aidge_export_cpp/static/include/network/utils.hpp
+++ b/aidge_export_cpp/static/include/network/utils.hpp
 #ifndef __AIDGE_EXPORT_CPP_NETWORK_UTILS__
 #define __AIDGE_EXPORT_CPP_NETWORK_UTILS__

+#ifdef SAVE_OUTPUTS
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <cstdio>      // fprintf
+#include <type_traits> // std::is_floating_point
+#endif
+
 /**
 * @brief   Integer clamping
 * @param[in]  v   Value to be clamped
 * @param[in]  lo  Saturating lower bound
 * @param[in]  hi  Saturating higher bound
 * @returns         Value clamped between lo and hi
- * 
+ *
 */
 __attribute__((always_inline)) static inline
-int clamp (int v, int lo, int hi) 
+int clamp (int v, int lo, int hi)
 {
    if(v < lo) {
        return lo;
@@ -27,7 +34,7 @@ int clamp (int v, int lo, int hi)
 * @brief   Maximum of two integer values
 */
 __attribute__((always_inline)) static inline
-int max (int lhs, int rhs) 
+int max (int lhs, int rhs)
 {
    return (lhs >= rhs) ? lhs : rhs;
 }
@@ -36,9 +43,107 @@ int max (int lhs, int rhs)
 * @brief   Minimum of two integer values
 */
 __attribute__((always_inline)) static inline
-int min (int lhs, int rhs) 
+int min (int lhs, int rhs)
 {
    return (lhs <= rhs) ? lhs : rhs;
 }

-#endif  // __AIDGE_EXPORT_CPP_NETWORK_UTILS__
+
+#ifdef SAVE_OUTPUTS
+enum class Format {
+    Default,
+    NCHW,
+    NHWC,
+    CHWN,
+    NCDHW,
+    NDHWC,
+    CDHWN
+};
+
+
+template<typename Output_T>
+inline void saveOutputs(
+    int NB_OUTPUTS,
+    int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH,
+    int OUTPUT_MEM_CONT_OFFSET,
+    int OUTPUT_MEM_CONT_SIZE,
+    int OUTPUT_MEM_WRAP_OFFSET,
+    int OUTPUT_MEM_WRAP_SIZE,
+    int OUTPUT_MEM_STRIDE,
+    const Output_T* __restrict outputs,
+    FILE* pFile,
+    Format format)
+{
+    // default is NHCW !
+    if (format == Format::NHWC) {
+        fprintf(pFile, "(");
+        for(int oy = 0; oy < OUTPUTS_HEIGHT; oy++) {
+            fprintf(pFile, "(");
+
+            for(int ox = 0; ox < OUTPUTS_WIDTH; ox++) {
+                fprintf(pFile, "(");
+
+                const int oPos = (ox + OUTPUTS_WIDTH * oy);
+                int oOffset = OUTPUT_MEM_STRIDE * oPos;
+
+                if (OUTPUT_MEM_WRAP_SIZE > 0
+                    && oOffset >= OUTPUT_MEM_CONT_SIZE)
+                {
+                    oOffset += OUTPUT_MEM_WRAP_OFFSET - OUTPUT_MEM_CONT_OFFSET
+                                - OUTPUT_MEM_CONT_SIZE;
+                }
+
+                for (int output = 0; output < NB_OUTPUTS; output++) {
+                    if (std::is_floating_point<Output_T>::value)
+                        fprintf(pFile, "%f", static_cast<float>(outputs[oOffset + output]));
+                    else
+                        fprintf(pFile, "%d", static_cast<int>(outputs[oOffset + output]));
+
+                    fprintf(pFile, ", ");
+                }
+
+                fprintf(pFile, "), \n");
+            }
+
+            fprintf(pFile, "), \n");
+        }
+
+        fprintf(pFile, ")\n");
+    }
+    else if (format == Format::NCHW || format == Format::Default) {
+        for(int output = 0; output < NB_OUTPUTS; output++) {
+            fprintf(pFile, "%d:\n", output);
+            for(int oy = 0; oy < OUTPUTS_HEIGHT; oy++) {
+                for(int ox = 0; ox < OUTPUTS_WIDTH; ox++) {
+                    const int oPos = (ox + OUTPUTS_WIDTH * oy);
+                    int oOffset = OUTPUT_MEM_STRIDE * oPos;
+                    if (OUTPUT_MEM_WRAP_SIZE > 0
+                        && oOffset >= OUTPUT_MEM_CONT_SIZE)
+                    {
+                        oOffset += OUTPUT_MEM_WRAP_OFFSET
+                            - OUTPUT_MEM_CONT_OFFSET - OUTPUT_MEM_CONT_SIZE;
+                    }
+
+                    if (std::is_floating_point<Output_T>::value)
+                        fprintf(pFile, "%f", static_cast<float>(outputs[oOffset + output]));
+                    else
+                        fprintf(pFile, "%d",  static_cast<int>(outputs[oOffset + output]));
+
+                    fprintf(pFile, " ");
+                }
+
+                fprintf(pFile, "\n");
+            }
+
+            fprintf(pFile, "\n");
+        }
+
+        fprintf(pFile, "\n");
+    }
+    else {
+        printf("Warning unsupported dataformat.\n");
+    }
+}
+#endif // SAVE_OUTPUTS
+
+#endif // __AIDGE_EXPORT_CPP_NETWORK_UTILS__
--- a/aidge_export_cpp/static/main.cpp
+++ b/aidge_export_cpp/static/main.cpp
-
-#include <iostream>
-#include "dnn.hpp"
-#include "inputs.h"
-
-int main()
-{
-    // Example for MNIST dataset
-    // Feel free to change this file for your own projects
-    const unsigned int nb_classes = 10;
-
-    float results[nb_classes];
-    model_forward(inputs, results);
-
-    for (unsigned int i = 0; i < nb_classes; ++i)
-    {
-        std::cout << i << ": " << results[i] << std::endl;
-    }
-
-    return 0;
-}
\ No newline at end of file
--- a/aidge_export_cpp/templates/configuration/_def_io.jinja
+++ b/aidge_export_cpp/templates/configuration/_def_io.jinja
+{# NOTE: Suppose input is first #}
+// INPUT CONF
+{% for inidx in range(nb_in) -%}
+#define {{ in_name[inidx]|upper }}_NB_CHANNELS {{ in_chan[inidx] }}
+#define {{ in_name[inidx]|upper }}_IN_HEIGHT {{ in_height[inidx] }}
+#define {{ in_name[inidx]|upper }}_IN_WIDTH {{ in_width[inidx] }}
+{% endfor %}
+
+// OUTPUT CONF
+{% for outidx in range(nb_out) -%}
+#define {{ out_name[outidx]|upper }}_NB_OUTPUTS {{ out_chan[outidx] }}
+#define {{ out_name[outidx]|upper }}_OUT_HEIGHT {{ out_height[outidx] }}
+#define {{ out_name[outidx]|upper }}_OUT_WIDTH {{ out_width[outidx] }}
+{% endfor %}
--- a/aidge_export_cpp/templates/configuration/_meminfo.jinja
+++ b/aidge_export_cpp/templates/configuration/_meminfo.jinja
+// MEMINFO CONF
+{% for outidx in range(nb_out) -%}
+#define {{ out_name[outidx]|upper }}_SIZE {{ mem_info_size[outidx]}}
+#define {{ out_name[outidx]|upper }}_OFFSET {{ mem_info_offset[outidx]}}
+#define {{ out_name[outidx]|upper }}_STRIDE {{ mem_info_stride[outidx]}}
+#define {{ out_name[outidx]|upper }}_LENGTH {{ mem_info_length[outidx]}}
+#define {{ out_name[outidx]|upper }}_CONT_SIZE {{ mem_info_cont_size[outidx]}}
+#define {{ out_name[outidx]|upper }}_CONT_OFFSET {{ mem_info_cont_offset[outidx]}}
+#define {{ out_name[outidx]|upper }}_WRAP_OFFSET {{ mem_info_wrap_offset[outidx]}}
+#define {{ out_name[outidx]|upper }}_WRAP_SIZE {{ mem_info_wrap_size[outidx]}}
+{% endfor %}
--- a/aidge_export_cpp/templates/configuration/activation_config.jinja
+++ b/aidge_export_cpp/templates/configuration/activation_config.jinja
 {#- For name header -#}
 #ifndef {{ name|upper }}_LAYER_H
 #define {{ name|upper }}_LAYER_H
+#include "kernels/rescaling.hpp"

 {# For layer configuration -#}
+{%- set nb_data = in_chan[0] * in_height[0] * in_width[0] %}
 #define {{ name|upper }}_NB_DATA {{ nb_data }}
 #define {{ name|upper }}_ACTIVATION {{ activation }}
+{% include "./_def_io.jinja" %}
+{% include "./_meminfo.jinja" %}
 static const {{ rescaling }} {{ name|upper }}_RESCALING = {};

 #endif /* {{ name|upper }}_LAYER_H */
--- a/aidge_export_cpp/templates/configuration/batchnorm_config.jinja
+++ b/aidge_export_cpp/templates/configuration/batchnorm_config.jinja
@@ -3,12 +3,8 @@
 #define {{ name|upper }}_LAYER_H

 {# For layer configuration -#}
-#define {{ name|upper }}_NB_CHANNELS {{ input_dims[0] }}
-#define {{ name|upper }}_CHANNELS_HEIGHT {{ input_dims[1] }}
-#define {{ name|upper }}_CHANNELS_WIDTH {{ input_dims[2] }}
-#define {{ name|upper }}_NB_OUTPUTS {{ output_dims[0] }}
-#define {{ name|upper }}_OUTPUTS_HEIGHT {{ output_dims[1] }}
-#define {{ name|upper }}_OUTPUTS_WIDTH {{ output_dims[2] }}
+{% include "./_def_io.jinja" %}
+{% include "./_meminfo.jinja" %}
 #define {{ name|upper }}_ACTIVATION {{ activation }}
 #define {{ name|upper }}_EPSILON {{ epsilon }}


--- a/aidge_export_cpp/templates/configuration/convolution_config.jinja
+++ b/aidge_export_cpp/templates/configuration/convolution_config.jinja
 {#- For name header -#}
 #ifndef {{ name|upper }}_LAYER_H
 #define {{ name|upper }}_LAYER_H
-
+#include "kernels/rescaling.hpp"
 {# For layer configuration -#}
-#define {{ name|upper }}_NB_CHANNELS {{ input_dims[0] }}
-#define {{ name|upper }}_CHANNELS_HEIGHT {{ input_dims[1] }}
-#define {{ name|upper }}_CHANNELS_WIDTH {{ input_dims[2] }}
-#define {{ name|upper }}_NB_OUTPUTS {{ output_dims[0] }}
-#define {{ name|upper }}_OUTPUTS_HEIGHT {{ output_dims[1] }}
-#define {{ name|upper }}_OUTPUTS_WIDTH {{ output_dims[2] }}
+{% include "./_def_io.jinja" %}
+{% include "./_meminfo.jinja" %}
 #define {{ name|upper }}_PADDING_Y {{ padding[1] }}
 #define {{ name|upper }}_PADDING_X {{ padding[0] }}
-#define {{ name|upper }}_STRIDE_Y {{ stride[1] }}
-#define {{ name|upper }}_STRIDE_X {{ stride[0] }}
-#define {{ name|upper }}_DILATION_Y {{ dilation[1] }}
-#define {{ name|upper }}_DILATION_X {{ dilation[0] }}
-#define {{ name|upper }}_KERNEL_HEIGHT {{ kernel[1] }}
-#define {{ name|upper }}_KERNEL_WIDTH {{ kernel[0] }}
+#define {{ name|upper }}_STRIDE_Y {{ stride_dims[1] }}
+#define {{ name|upper }}_STRIDE_X {{ stride_dims[0] }}
+#define {{ name|upper }}_DILATION_Y {{ dilation_dims[1] }}
+#define {{ name|upper }}_DILATION_X {{ dilation_dims[0] }}
+#define {{ name|upper }}_KERNEL_HEIGHT {{ kernel_dims[1] }}
+#define {{ name|upper }}_KERNEL_WIDTH {{ kernel_dims[0] }}
 #define {{ name|upper }}_ACTIVATION {{ activation }}
 static const {{ rescaling }} {{ name|upper }}_RESCALING = {};

 {#- Calculate sizes #}
-{%- set weights_size = output_dims[0] * input_dims[0] * kernel[1] * kernel[0] %}
+{%- set weights_size = out_chan[0] * in_chan[0] * kernel_dims[1] * kernel_dims[0] %}
 #define {{ name|upper }}_WEIGHTS_SIZE {{ weights_size }}
-#define {{ name|upper }}_BIASES_SIZE {{ output_dims[0] }}
+#define {{ name|upper }}_BIASES_SIZE {{ out_chan[0] }}


 #endif /* {{ name|upper }}_LAYER_H */
--- a/aidge_export_cpp/templates/configuration/elemwise_config.jinja
+++ b/aidge_export_cpp/templates/configuration/elemwise_config.jinja
 {#- For name header -#}
 #ifndef {{ name|upper }}_LAYER_H
 #define {{ name|upper }}_LAYER_H
+#include "kernels/rescaling.hpp"

+{% include "./_def_io.jinja" %}
+{% include "./_meminfo.jinja" %}
 {# For layer configuration -#}
-#define {{ name|upper }}_NB_ELTS {{ nb_elts }}
+#define {{ name|upper }}_NB_ELTS {{ in_dims[0]|join('*') }}
 #define {{ name|upper }}_ACTIVATION {{ activation }}
 #define {{ name|upper }}_ELEM_OP {{ elemwise_op }}
 static const {{ rescaling }} {{ name|upper }}_RESCALING = {};