[New] Add aidge_quantization compatibility, and

[Wip] Export and forward not always have the same parameters and dimensions

[New] Add aidge_quantization compatibility, and
[Wip] Export and forward not always have the same parameters and dimensions
8bef8f8d · Wissam Boussella · 12087967 · 8bef8f8d · 8bef8f8d · 8bef8f8d
Commit 8bef8f8d authored 11 months ago by Wissam Boussella
--- a/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/arm_convolve.jinja
+++ b/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/arm_convolve.jinja
@@ -23,16 +23,16 @@
 #define {{ name|upper }}_STRIDE_X {{ stride[0] }}

 {# Dilation : #}
-#define {{ name|upper }}_DILATION_Y {{ dilation[1] }}
-#define {{ name|upper }}_DILATION_X {{ dilation[0] }}
+#define {{ name|upper }}_DILATION_Y {{ dilation_y }}
+#define {{ name|upper }}_DILATION_X {{ dilation_x }}

 {# Kernel :  #}
 #define {{ name|upper }}_KERNEL_HEIGHT {{ kernel[1] }}
 #define {{ name|upper }}_KERNEL_WIDTH {{ kernel[0] }}

 {# Activation :  #}
-#define {{ name|upper }}_ACTIVATION_MIN {{ activation[0] }}
-#define {{ name|upper }}_ACTIVATION_MAX {{ activation[1] }}
+#define {{ name|upper }}_ACTIVATION_MIN {{ activation_min }}
+#define {{ name|upper }}_ACTIVATION_MAX {{ activation_max }}

 {# Offset #}
 #define {{ name|upper }}_INPUT_OFFSET {{ input_offset}}

--- a/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/arm_fully_connected.jinja
+++ b/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/arm_fully_connected.jinja
@@ -4,15 +4,16 @@

 {# For layer configuration -#}
 {# Batches :  #}
-#define {{ name|upper }}_INPUT_BATCHES {{ input_batches }}
+#define {{ name|upper }}_INPUT_BATCHES {{ input_dims[0] }}

 {# Inputs : #}
 {# {% if gemm %}  #}
-#define {{ name|upper }}_INPUT_NB_CHANNELS {{ input_dims }}
-#define {{ name|upper }}_INPUT_HEIGHT 1
-#define {{ name|upper }}_INPUT_WIDTH 1
+#define {{ name|upper }}_INPUT_NB_CHANNELS {{ input_dims[1] }}
+#define {{ name|upper }}_INPUT_HEIGHT {{ input_dims[2] }}
+#define {{ name|upper }}_INPUT_WIDTH  {{ input_dims[3] }}

-#define {{ name|upper }}_ACCUMULATION_DEPTH {{ input_dims }}
+{% set accumulation_depth = input_dims[1] * input_dims[2] * input_dims[3] %}
+#define {{ name|upper }}_ACCUMULATION_DEPTH {{ accumulation_depth }}
 #define {{ name|upper }}_OUTPUT_CH {{ output_channels }}

 #define {{ name|upper }}_ACTIVATION_MAX {{ activation_max }}

--- a/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/max_pooling.jinja
+++ b/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/max_pooling.jinja
@@ -27,7 +27,7 @@
 #define {{ name|upper }}_KERNEL_WIDTH {{ kernel[0] }}

 {# Activation :  #}
-#define {{ name|upper }}_ACTIVATION_MIN {{ activation[0] }}
-#define {{ name|upper }}_ACTIVATION_MAX {{ activation[1] }}
+#define {{ name|upper }}_ACTIVATION_MIN {{ activation_min }}
+#define {{ name|upper }}_ACTIVATION_MAX {{ activation_max }}

 #endif /* {{ name|upper }}_LAYER_H */
\ No newline at end of file
--- a/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/arm_nn_activation.jinja
+++ b/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/arm_nn_activation.jinja
    arm_relu6_s8({{ input_name }}, {{ name|upper }}_SIZE);
-    {{ output_name }} = {{ input_name }}; 
+    {{ output_name }} = {{ input_name }};
    {# TODO:Wissam: don't copy  #}
\ No newline at end of file
--- a/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/max_pooling.jinja
+++ b/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/max_pooling.jinja
-cmsis_nn_context ctx;
-cmsis_nn_conv_params pool_params_{{ name|lower }};
-cmsis_nn_dims input_dims_{{ name|lower }};
-cmsis_nn_dims filter_dims_{{ name|lower }};
-cmsis_nn_dims output_dims_{{ name|lower }};
+    cmsis_nn_context ctx;
+    cmsis_nn_conv_params pool_params_{{ name|lower }};
+    cmsis_nn_dims input_dims_{{ name|lower }};
+    cmsis_nn_dims filter_dims_{{ name|lower }};
+    cmsis_nn_dims output_dims_{{ name|lower }};

-input_dims_{{ name|lower }}.n = {{ name|upper }}_INPUT_BATCHES;
-input_dims_{{ name|lower }}.w = {{ name|upper }}_INPUT_W;
-input_dims_{{ name|lower }}.h = {{ name|upper }}_INPUT_H;
-input_dims_{{ name|lower }}.c = {{ name|upper }}_INPUT_CH;
+    input_dims_{{ name|lower }}.n = {{ name|upper }}_INPUT_BATCHES;
+    input_dims_{{ name|lower }}.w = {{ name|upper }}_INPUT_W;
+    input_dims_{{ name|lower }}.h = {{ name|upper }}_INPUT_H;
+    input_dims_{{ name|lower }}.c = {{ name|upper }}_INPUT_CH;

-filter_dims_{{ name|lower }}.w = {{ name|upper }}_FILTER_X;
-filter_dims_{{ name|lower }}.h = {{ name|upper }}_FILTER_Y;
+    filter_dims_{{ name|lower }}.w = {{ name|upper }}_FILTER_X;
+    filter_dims_{{ name|lower }}.h = {{ name|upper }}_FILTER_Y;

-output_dims_{{ name|lower }}.w = {{ name|upper }}_OUTPUTS_WIDTH;
-output_dims_{{ name|lower }}.h = {{ name|upper }}_OUTPUTS_HEIGHT;
-output_dims_{{ name|lower }}.c = {{ name|upper }}_OUT_CH;
+    output_dims_{{ name|lower }}.w = {{ name|upper }}_OUTPUTS_WIDTH;
+    output_dims_{{ name|lower }}.h = {{ name|upper }}_OUTPUTS_HEIGHT;
+    output_dims_{{ name|lower }}.c = {{ name|upper }}_OUT_CH;

-pool_params_{{ name|lower }}.padding.w = {{ name|upper }}_PAD_X;
-pool_params_{{ name|lower }}.padding.h = {{ name|upper }}_PAD_Y;
-pool_params_{{ name|lower }}.stride.w = {{ name|upper }}_STRIDE_X;
-pool_params_{{ name|lower }}.stride.h = {{ name|upper }}_STRIDE_Y;
-pool_params_{{ name|lower }}.activation.min = {{ name|upper }}_ACTIVATION_MIN;
-pool_params_{{ name|lower }}.activation.max = {{ name|upper }}_ACTIVATION_MAX;
+    pool_params_{{ name|lower }}.padding.w = {{ name|upper }}_PAD_X;
+    pool_params_{{ name|lower }}.padding.h = {{ name|upper }}_PAD_Y;
+    pool_params_{{ name|lower }}.stride.w = {{ name|upper }}_STRIDE_X;
+    pool_params_{{ name|lower }}.stride.h = {{ name|upper }}_STRIDE_Y;
+    pool_params_{{ name|lower }}.activation.min = {{ name|upper }}_ACTIVATION_MIN;
+    pool_params_{{ name|lower }}.activation.max = {{ name|upper }}_ACTIVATION_MAX;

-int32_t buf_size_{{ name|lower }} = arm_convolve_{{ format }}_get_buffer_size(&input_dims_{{ name|upper }}, &filter_dims_{{ name|upper }});
-ctx.buf = malloc(buf_size_{{ name|lower }});
-ctx.size = 0;
+    int32_t buf_size_{{ name|lower }} = arm_convolve_{{ format }}_get_buffer_size(&input_dims_{{ name|upper }}, &filter_dims_{{ name|upper }});
+    ctx.buf = malloc(buf_size_{{ name|lower }});
+    ctx.size = 0;

-{# TODO:Wissam: Find a way to choose avg or maxpooling #}
-arm_max_pool_{{ format }} ( &ctx,
-                            &pool_params_{{ name|lower }},
-                            &input_dims_{{ name|lower }},
-                            {{ input_name }},
-                            &filter_dims_{{ name|lower }},
-                            &output_dims_{{ name|lower }},
-                            {{ output_name }});
\ No newline at end of file
+    {# TODO:Wissam: Find a way to choose avg or maxpooling #}
+    arm_max_pool_{{ format }} ( &ctx,
+                                &pool_params_{{ name|lower }},
+                                &input_dims_{{ name|lower }},
+                                {{ input_name }},
+                                &filter_dims_{{ name|lower }},
+                                &output_dims_{{ name|lower }},
+                                {{ output_name }});
\ No newline at end of file
--- a/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/scaling.jinja
+++ b/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/scaling.jinja
+    {{ output_name }} = {{ input_name }}; 
\ No newline at end of file
--- a/aidge_export_arm_cortexm/export.py
+++ b/aidge_export_arm_cortexm/export.py
@@ -46,12 +46,13 @@ def export(export_folder,
    list_actions = []
    list_configs = []

-    for node in list_forward_nodes:
-        print(node.type())
+    
+        
    # Export layer configurations
    for node in list_forward_nodes:
-        nt = node.type()
-        if node.type() in supported_operators():
+        # Avoid specials characteres 
+        node.set_name(str(re.sub(r'\W+', '_', node.name())))
+        if node.type() in supported_operators() and node.type()!="Scaling":
            op = OPERATORS_REGISTRY[node.type()](node, board, dataformat=format, library="aidge")

            # Export the configuration
@@ -66,7 +67,8 @@ def export(export_folder,
    mem_offsets = []
    mem_size = 0
    for i, node in enumerate(list_forward_nodes):
-        if node.type() != "Producer":
+        type = node.type()
+        if node.type() != "Producer" and node.type() != "Scaling":
            if i != len(list_forward_nodes) - 1:
                mem_offsets.append(f"{node.name().upper()}_OFFSET {mem_size}")
                dims = node.get_operator().get_output(0).dims()
@@ -75,10 +77,7 @@ def export(export_folder,
                    mem *= dim
                mem_size += mem

-<<<<<<< HEAD
    
-=======
->>>>>>> 7dbab60 (Add export function)
    # Generate memory file
    aidge_export_arm_cortexm.operators.generate_file(
        f"{dnn_folder}/memory/mem_info.h",

--- a/aidge_export_arm_cortexm/operators.py
+++ b/aidge_export_arm_cortexm/operators.py
 import os
 import shutil
+import math
 import numpy as np
 from pathlib import Path
 from jinja2 import Environment, FileSystemLoader
-
 from aidge_core import ExportNode
 from aidge_export_arm_cortexm.utils import ROOT, operator_register

@@ -85,13 +85,44 @@ def get_producer_parents(node):
            parents.append(parent)
    return parents

-def normalize_to_range(arr, new_min, new_max):
-    min_value = np.min(arr)
-    max_value = np.max(arr)
-    normalized_array = ((arr - min_value) * (new_max - new_min)) / (max_value - min_value) + new_min
-    normalized_array = np.trunc(normalized_array)
-    normalized_array = np.int_(normalized_array)
-    return normalized_array
+
+def calculate_shift_and_multiplier(scaling_factor, quant_bits,max_shift):
+   
+    # Calculate the absolute value of the scaling factor
+    abs_scaling_factor = abs(scaling_factor)
+
+    # Calculate the number of bits required to represent the scaling factor as a fixed-point number
+    scaling_factor_bits = int(np.ceil(np.log2(abs_scaling_factor)))
+
+    # Calculate the number of fractional bits required for the fixed-point representation
+    frac_bits = quant_bits - scaling_factor_bits - 1
+
+    # Calculate the shift value
+    shift = -frac_bits
+
+    # Calculate the multiplier value
+    multiplier = int(np.round(2**frac_bits / abs_scaling_factor))
+
+    shift = min(shift,max_shift)
+    return shift, multiplier
+
+def get_scaling(node):
+    b = node.get_children()
+    if len(node.get_children())!=0 :
+        parent_node_scaling = node.get_children().pop()
+    else : 
+        return 0,1
+        
+    a = parent_node_scaling.type()
+    if parent_node_scaling.type() != "Scaling" :
+        return 0,1
+    
+    c = parent_node_scaling.get_operator()
+    scaling_factor = parent_node_scaling.get_operator().get_attr("scalingFactor")
+    quantized_number_bits = parent_node_scaling.get_operator().get_attr("quantizedNbBits")
+    shift, multiplier = calculate_shift_and_multiplier(scaling_factor,quantized_number_bits,31) 
+    return shift,multiplier
+         



@@ -582,12 +613,13 @@ class Producer(ExportNode):
 #TODO:Wissam : For now i use the name Gemm, find a better way to organise that 
 #TODO:Wissam : Use loop in .jinja to factorize the code
 #TODO:Wissam : Use adapted format
-@operator_register("FC")
+@operator_register("FC","FCOp")
 class FC(ExportNode):
    def __init__(self, node, board, dataformat, library):

        w_dims = node.get_operator().get_input(1).dims()
-        node.get_operator().get_output(0).resize([w_dims[1]])
+        a = node.get_operator().get_output(0)
+        node.get_operator().get_output(0).resize([w_dims[1]], [1])

        super().__init__(node)

@@ -597,6 +629,9 @@ class FC(ExportNode):
        self.batch = 1 #TODO:Wissam: let the user choose the batch size ? 
        activation_min = -128
        activation_min = 127
+        
+        self.shift,self.multiplier = get_scaling(node)
+         
        if(self.__format__ == "int8_t"):
            activation_min = -128
            activation_min = 127
@@ -617,24 +652,43 @@ class FC(ExportNode):
                copyfile(str(ROOT / "_CMSIS-NN" / "CMSIS-NN" / "Source" / "NNSupportFunctions" / "arm_nn_vec_mat_mult_t_s8.c"),
                         str(Path(export_folder) / "src" / "kernels"))
                
+        
+        
        # Add to config list the include of configurations
        list_configs.append(f"layers/{self.name}.h")
-        
-        # Export configuration file
-        generate_file(
+        if len(self.inputs_dims[0]) != 4 : 
+            generate_file(
            f"{export_folder}/layers/{self.name}.h",
            str(ROOT /"_CMSIS-NN"/"templates"/"configuration"/"arm_fully_connected.jinja"),
            name=self.name,
            input_batches=self.batch,
-            input_dims=self.inputs_dims[1][1],
+            input_dims=[1, self.inputs_dims[0][1], 1, 1],
            output_channels=self.inputs_dims[2][0],
            activation_min = -128,
            activation_max = 127,
-            input_offset = -128,
-            output_offset = -128,
-            multiplier = 1,
-            shift = 0
+            input_offset = 0,
+            output_offset = 0,
+            multiplier = self.multiplier,
+            shift = self.shift
            )
+        else : 
+            generate_file(
+            f"{export_folder}/layers/{self.name}.h",
+            str(ROOT /"_CMSIS-NN"/"templates"/"configuration"/"arm_fully_connected.jinja"),
+            name=self.name,
+            input_batches=self.batch,
+            input_dims=self.inputs_dims[0],
+            output_channels=self.outputs_dims[0][0],
+            activation_min = -128,
+            activation_max = 127,
+            input_offset = 0,
+            output_offset = 0,
+            multiplier = self.multiplier,
+            shift = self.shift
+            )
+            
+        # Export configuration file
+        
        
        return list_configs

@@ -683,7 +737,7 @@ class ReLU(ExportNode):
            str(ROOT /"_CMSIS-NN"/"templates"/"configuration"/"arm_nn_activation.jinja"),
            name=self.name,
            format="s8",
-            size=self.inputs_dims[0][0],
+            size=self.inputs_dims[0][1],
            )
        return list_configs

@@ -712,15 +766,7 @@ class Conv(ExportNode):
        node.get_operator().get_output(0).resize([w_dims[1]])

        super().__init__(node)
-        # if node.op_type == "PaddedConv" : 
-        #     self.kernel = node.get_operator().get_attr("KernelDims")
-        #     self.stride = node.get_operator().get_attr("StrideDims")
-        #     # Not working anymore because Padding is a standalone operator
-        #     self.padding = node.get_operator().get_attr("PaddingDims")
-        #     self.dilation = node.get_operator().get_attr("DilationDims")
-        #     self.nb_channels = node.get_operator().get_attr("InChannels")
-        #     self.nb_outputs = node.get_operator().get_attr("OutChannels")
-
+       

        self.board = board
        self.library = library
@@ -792,7 +838,6 @@ class PaddedConv(ExportNode):
        node.get_operator().get_output(0).resize([w_dims[1]])

        super().__init__(node)
-        parent = node.get_parent(0)
        
        self.board = board
        self.library = library
@@ -801,8 +846,7 @@ class PaddedConv(ExportNode):
        
        #get_micro_graph where is the scheduler ??  
        # PaddedConv is separated by one node Padding and one node Conv
-        
-        for op in  node.get_operator().get_micro_graph().get_nodes():
+        for op in node.get_operator().get_micro_graph().get_nodes():
            if op.type() == "Conv" : 
                self.kernel = op.get_operator().get_attr("KernelDims")
                self.stride = op.get_operator().get_attr("StrideDims")
@@ -814,6 +858,11 @@ class PaddedConv(ExportNode):
                self.border_value = op.get_operator().get_attr("BorderValue")
                self.begin_end_borders = op.get_operator().get_attr("BeginEndBorders")

+        self.activation_min = -128
+        self.activation_max = 127
+        if(self.__format__ == "int8_t"):
+            self.activation_min = -128
+            self.activation_max = 127
       

    def export(self, export_folder:str, list_configs:list):
@@ -839,13 +888,16 @@ class PaddedConv(ExportNode):
            input_batches=self.batch,
            #TODO:Wissam: why for lenet the output is not good ? 
            
-            input_dims=self.inputs_dims[0][1:],
+            input_dims=self.inputs_dims[0],
            output_dims=self.outputs_dims[0],
-            kernel=self.inputs_dims[1][2:],
+            kernel=self.kernel,
            stride=self.stride,
            padding=self.begin_end_borders,
-            dilation=self.dilation,
-            )
+            dilation_y = self.dilation[1],
+            dilation_x = self.dilation[0],
+            activation_min=self.activation_min,
+            activation_max=self.activation_max
+        )
        return list_configs

    def forward(self, list_actions:list):
@@ -869,10 +921,6 @@ class PaddedConv(ExportNode):
 @operator_register("MaxPooling")
 class MaxPooling(ExportNode):
    def __init__(self, node, board, dataformat, library):
-
-        w_dims = node.get_operator().get_input(1).dims()
-        node.get_operator().get_output(0).resize([w_dims[1]])
-
        super().__init__(node)

        self.board = board
@@ -882,10 +930,15 @@ class MaxPooling(ExportNode):
        
        self.kernel = node.get_operator().get_attr("KernelDims")
        self.stride = node.get_operator().get_attr("StrideDims")
-
        # Not supported by the core...
        # self.padding = node.get_operator().get_attr("PaddingDims")
        self.padding = [0, 0]
+        
+        self.activation_min = -128
+        self.activation_max = 127
+        # if(self.__format__ == "int8_t"):
+        #     self.activation_min = -128
+        #     self.activation_max = 127

    def export(self, export_folder:str, list_configs:list):
        # Copying kernel into export
@@ -893,7 +946,7 @@ class MaxPooling(ExportNode):
        if self.library == "aidge":
            
            if True :#if self.dataformat == "int8_t":
-                copyfile(str(ROOT / "_CMSIS-NN" / "CMSIS-NN" / "Source" / "MaxPooling" / "arm_max_pool_s8.c"),
+                copyfile(str(ROOT / "_CMSIS-NN" / "CMSIS-NN" / "Source" / "PoolingFunctions" / "arm_max_pool_s8.c"),
                         str(Path(export_folder) / "src" / "kernels"))
                copyfile(str(ROOT / "_CMSIS-NN" / "CMSIS-NN" / "Include" / "arm_nnfunctions.h"),
                         str(Path(export_folder) / "Include" ))
@@ -904,7 +957,7 @@ class MaxPooling(ExportNode):
        # Export configuration file
        generate_file(
            f"{export_folder}/layers/{self.name}.h",
-            str(ROOT /"_CMSIS-NN"/"templates"/"configuration"/ "arm_convolve.jinja"),
+            str(ROOT /"_CMSIS-NN"/"templates"/"configuration"/ "max_pooling.jinja"),
            name=self.name,
            format="s8",
            input_batches=self.batch,
@@ -913,6 +966,8 @@ class MaxPooling(ExportNode):
            kernel=self.kernel,
            stride=self.stride,
            padding=self.padding,
+            activation_min=self.activation_min,
+            activation_max=self.activation_max
            )
        return list_configs

@@ -921,13 +976,11 @@ class MaxPooling(ExportNode):
            list_actions.append(set_up_output(self.name, "int8_t"))

        list_actions.append(generate_action(
-            str(ROOT /"_CMSIS-NN"/"templates"/"kernel"/"arm_convolve.jinja"),
+            str(ROOT /"_CMSIS-NN"/"templates"/"kernel"/"max_pooling.jinja"),
            name=self.name,
            format="s8",
            dataformat = "int8_t",
            input_name=self.inputs[0].name(),
-            weight_name=self.inputs[1].name(),
-            bias_name=self.inputs[2].name(),
            output_name=self.name
        ))
        return list_actions
@@ -984,6 +1037,32 @@ class Reshape(ExportNode):
    def forward(self, list_actions:list):
        if not self.is_last:
            list_actions.append(set_up_output(self.name, "int8_t"))
+        return list_actions
+

+@operator_register("Scaling")
+class Scaling(ExportNode):
+    def __init__(self, node, board, dataformat, library):
        
+
+        self.board = board
+        self.library = library
+        self.dataformat = dataformat
+        self.batch = 1 #TODO:Wissam: let the user choose the batch size ?
+        
+    def export(self, export_folder:str, list_configs:list):
+
+        return list_configs
+
+    def forward(self, list_actions:list):
+          
+        list_actions.append(generate_action(
+            str(ROOT /"_CMSIS-NN"/"templates"/"kernel"/"scaling.jinja"),
+            
+            format="s8",
+            dataformat = "int8_t",
+            input_name=self.inputs[0].name(),
+            output_name=self.name
+        ))
        return list_actions
+    
\ No newline at end of file