diff --git a/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/arm_convolve.jinja b/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/arm_convolve.jinja index c1ea5a5687aeb83a220e167ead361b9205e8d0f9..5ba322fb4b7e9ccc6e12a7c2e64aeb912c20d9a5 100644 --- a/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/arm_convolve.jinja +++ b/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/arm_convolve.jinja @@ -23,16 +23,16 @@ #define {{ name|upper }}_STRIDE_X {{ stride[0] }} {# Dilation : #} -#define {{ name|upper }}_DILATION_Y {{ dilation[1] }} -#define {{ name|upper }}_DILATION_X {{ dilation[0] }} +#define {{ name|upper }}_DILATION_Y {{ dilation_y }} +#define {{ name|upper }}_DILATION_X {{ dilation_x }} {# Kernel : #} #define {{ name|upper }}_KERNEL_HEIGHT {{ kernel[1] }} #define {{ name|upper }}_KERNEL_WIDTH {{ kernel[0] }} {# Activation : #} -#define {{ name|upper }}_ACTIVATION_MIN {{ activation[0] }} -#define {{ name|upper }}_ACTIVATION_MAX {{ activation[1] }} +#define {{ name|upper }}_ACTIVATION_MIN {{ activation_min }} +#define {{ name|upper }}_ACTIVATION_MAX {{ activation_max }} {# Offset #} #define {{ name|upper }}_INPUT_OFFSET {{ input_offset}} diff --git a/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/arm_fully_connected.jinja b/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/arm_fully_connected.jinja index b975ccdec5ff41dbabf7c4cdd25de8a9fecaace0..ac6c2583977d0e799b5d316a2dcef0ebdeb84cc0 100644 --- a/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/arm_fully_connected.jinja +++ b/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/arm_fully_connected.jinja @@ -4,15 +4,16 @@ {# For layer configuration -#} {# Batches : #} -#define {{ name|upper }}_INPUT_BATCHES {{ input_batches }} +#define {{ name|upper }}_INPUT_BATCHES {{ input_dims[0] }} {# Inputs : #} {# {% if gemm %} #} -#define {{ name|upper }}_INPUT_NB_CHANNELS {{ input_dims }} -#define {{ name|upper }}_INPUT_HEIGHT 1 -#define {{ name|upper }}_INPUT_WIDTH 1 +#define {{ name|upper }}_INPUT_NB_CHANNELS {{ input_dims[1] }} +#define {{ name|upper }}_INPUT_HEIGHT {{ input_dims[2] }} +#define {{ name|upper }}_INPUT_WIDTH {{ input_dims[3] }} -#define {{ name|upper }}_ACCUMULATION_DEPTH {{ input_dims }} +{% set accumulation_depth = input_dims[1] * input_dims[2] * input_dims[3] %} +#define {{ name|upper }}_ACCUMULATION_DEPTH {{ accumulation_depth }} #define {{ name|upper }}_OUTPUT_CH {{ output_channels }} #define {{ name|upper }}_ACTIVATION_MAX {{ activation_max }} diff --git a/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/max_pooling.jinja b/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/max_pooling.jinja index 9830271b091be82e1c86c82e8c1f2af6307b9b39..2efe8132deca828dd7458a3666c7414e15fd320f 100644 --- a/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/max_pooling.jinja +++ b/aidge_export_arm_cortexm/_CMSIS-NN/templates/configuration/max_pooling.jinja @@ -27,7 +27,7 @@ #define {{ name|upper }}_KERNEL_WIDTH {{ kernel[0] }} {# Activation : #} -#define {{ name|upper }}_ACTIVATION_MIN {{ activation[0] }} -#define {{ name|upper }}_ACTIVATION_MAX {{ activation[1] }} +#define {{ name|upper }}_ACTIVATION_MIN {{ activation_min }} +#define {{ name|upper }}_ACTIVATION_MAX {{ activation_max }} #endif /* {{ name|upper }}_LAYER_H */ \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/arm_nn_activation.jinja b/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/arm_nn_activation.jinja index 583166d34e6880c004b23684d1a9b0083b70dcd5..4a4459f7e5346aa03e2857580fba45ccd114de12 100644 --- a/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/arm_nn_activation.jinja +++ b/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/arm_nn_activation.jinja @@ -1,3 +1,3 @@ arm_relu6_s8({{ input_name }}, {{ name|upper }}_SIZE); - {{ output_name }} = {{ input_name }}; + {{ output_name }} = {{ input_name }}; {# TODO:Wissam: don't copy #} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/max_pooling.jinja b/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/max_pooling.jinja index 546abc4e15d254004c5cd79ab8fcebaeaedd8bed..fc1c91679008bb0e98eabb6ccf2511c428b6c21c 100644 --- a/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/max_pooling.jinja +++ b/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/max_pooling.jinja @@ -1,37 +1,37 @@ -cmsis_nn_context ctx; -cmsis_nn_conv_params pool_params_{{ name|lower }}; -cmsis_nn_dims input_dims_{{ name|lower }}; -cmsis_nn_dims filter_dims_{{ name|lower }}; -cmsis_nn_dims output_dims_{{ name|lower }}; + cmsis_nn_context ctx; + cmsis_nn_conv_params pool_params_{{ name|lower }}; + cmsis_nn_dims input_dims_{{ name|lower }}; + cmsis_nn_dims filter_dims_{{ name|lower }}; + cmsis_nn_dims output_dims_{{ name|lower }}; -input_dims_{{ name|lower }}.n = {{ name|upper }}_INPUT_BATCHES; -input_dims_{{ name|lower }}.w = {{ name|upper }}_INPUT_W; -input_dims_{{ name|lower }}.h = {{ name|upper }}_INPUT_H; -input_dims_{{ name|lower }}.c = {{ name|upper }}_INPUT_CH; + input_dims_{{ name|lower }}.n = {{ name|upper }}_INPUT_BATCHES; + input_dims_{{ name|lower }}.w = {{ name|upper }}_INPUT_W; + input_dims_{{ name|lower }}.h = {{ name|upper }}_INPUT_H; + input_dims_{{ name|lower }}.c = {{ name|upper }}_INPUT_CH; -filter_dims_{{ name|lower }}.w = {{ name|upper }}_FILTER_X; -filter_dims_{{ name|lower }}.h = {{ name|upper }}_FILTER_Y; + filter_dims_{{ name|lower }}.w = {{ name|upper }}_FILTER_X; + filter_dims_{{ name|lower }}.h = {{ name|upper }}_FILTER_Y; -output_dims_{{ name|lower }}.w = {{ name|upper }}_OUTPUTS_WIDTH; -output_dims_{{ name|lower }}.h = {{ name|upper }}_OUTPUTS_HEIGHT; -output_dims_{{ name|lower }}.c = {{ name|upper }}_OUT_CH; + output_dims_{{ name|lower }}.w = {{ name|upper }}_OUTPUTS_WIDTH; + output_dims_{{ name|lower }}.h = {{ name|upper }}_OUTPUTS_HEIGHT; + output_dims_{{ name|lower }}.c = {{ name|upper }}_OUT_CH; -pool_params_{{ name|lower }}.padding.w = {{ name|upper }}_PAD_X; -pool_params_{{ name|lower }}.padding.h = {{ name|upper }}_PAD_Y; -pool_params_{{ name|lower }}.stride.w = {{ name|upper }}_STRIDE_X; -pool_params_{{ name|lower }}.stride.h = {{ name|upper }}_STRIDE_Y; -pool_params_{{ name|lower }}.activation.min = {{ name|upper }}_ACTIVATION_MIN; -pool_params_{{ name|lower }}.activation.max = {{ name|upper }}_ACTIVATION_MAX; + pool_params_{{ name|lower }}.padding.w = {{ name|upper }}_PAD_X; + pool_params_{{ name|lower }}.padding.h = {{ name|upper }}_PAD_Y; + pool_params_{{ name|lower }}.stride.w = {{ name|upper }}_STRIDE_X; + pool_params_{{ name|lower }}.stride.h = {{ name|upper }}_STRIDE_Y; + pool_params_{{ name|lower }}.activation.min = {{ name|upper }}_ACTIVATION_MIN; + pool_params_{{ name|lower }}.activation.max = {{ name|upper }}_ACTIVATION_MAX; -int32_t buf_size_{{ name|lower }} = arm_convolve_{{ format }}_get_buffer_size(&input_dims_{{ name|upper }}, &filter_dims_{{ name|upper }}); -ctx.buf = malloc(buf_size_{{ name|lower }}); -ctx.size = 0; + int32_t buf_size_{{ name|lower }} = arm_convolve_{{ format }}_get_buffer_size(&input_dims_{{ name|upper }}, &filter_dims_{{ name|upper }}); + ctx.buf = malloc(buf_size_{{ name|lower }}); + ctx.size = 0; -{# TODO:Wissam: Find a way to choose avg or maxpooling #} -arm_max_pool_{{ format }} ( &ctx, - &pool_params_{{ name|lower }}, - &input_dims_{{ name|lower }}, - {{ input_name }}, - &filter_dims_{{ name|lower }}, - &output_dims_{{ name|lower }}, - {{ output_name }}); \ No newline at end of file + {# TODO:Wissam: Find a way to choose avg or maxpooling #} + arm_max_pool_{{ format }} ( &ctx, + &pool_params_{{ name|lower }}, + &input_dims_{{ name|lower }}, + {{ input_name }}, + &filter_dims_{{ name|lower }}, + &output_dims_{{ name|lower }}, + {{ output_name }}); \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/scaling.jinja b/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/scaling.jinja new file mode 100644 index 0000000000000000000000000000000000000000..1a68b0b1664003101721b6d0782037ab0e2ff343 --- /dev/null +++ b/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/scaling.jinja @@ -0,0 +1 @@ + {{ output_name }} = {{ input_name }}; \ No newline at end of file diff --git a/aidge_export_arm_cortexm/export.py b/aidge_export_arm_cortexm/export.py index 8cd92a71d809092544836d8f5eb9f4ae1ae1bb57..a5b85ed60db9792323c9ae248a354184690e9871 100644 --- a/aidge_export_arm_cortexm/export.py +++ b/aidge_export_arm_cortexm/export.py @@ -46,12 +46,13 @@ def export(export_folder, list_actions = [] list_configs = [] - for node in list_forward_nodes: - print(node.type()) + + # Export layer configurations for node in list_forward_nodes: - nt = node.type() - if node.type() in supported_operators(): + # Avoid specials characteres + node.set_name(str(re.sub(r'\W+', '_', node.name()))) + if node.type() in supported_operators() and node.type()!="Scaling": op = OPERATORS_REGISTRY[node.type()](node, board, dataformat=format, library="aidge") # Export the configuration @@ -66,7 +67,8 @@ def export(export_folder, mem_offsets = [] mem_size = 0 for i, node in enumerate(list_forward_nodes): - if node.type() != "Producer": + type = node.type() + if node.type() != "Producer" and node.type() != "Scaling": if i != len(list_forward_nodes) - 1: mem_offsets.append(f"{node.name().upper()}_OFFSET {mem_size}") dims = node.get_operator().get_output(0).dims() @@ -75,10 +77,7 @@ def export(export_folder, mem *= dim mem_size += mem -<<<<<<< HEAD -======= ->>>>>>> 7dbab60 (Add export function) # Generate memory file aidge_export_arm_cortexm.operators.generate_file( f"{dnn_folder}/memory/mem_info.h", diff --git a/aidge_export_arm_cortexm/operators.py b/aidge_export_arm_cortexm/operators.py index 83ba492e01a76b9ae0c7889e317b672bce72d387..d2b037e4890d54c6cd20f2119a686ceb41820584 100644 --- a/aidge_export_arm_cortexm/operators.py +++ b/aidge_export_arm_cortexm/operators.py @@ -1,9 +1,9 @@ import os import shutil +import math import numpy as np from pathlib import Path from jinja2 import Environment, FileSystemLoader - from aidge_core import ExportNode from aidge_export_arm_cortexm.utils import ROOT, operator_register @@ -85,13 +85,44 @@ def get_producer_parents(node): parents.append(parent) return parents -def normalize_to_range(arr, new_min, new_max): - min_value = np.min(arr) - max_value = np.max(arr) - normalized_array = ((arr - min_value) * (new_max - new_min)) / (max_value - min_value) + new_min - normalized_array = np.trunc(normalized_array) - normalized_array = np.int_(normalized_array) - return normalized_array + +def calculate_shift_and_multiplier(scaling_factor, quant_bits,max_shift): + + # Calculate the absolute value of the scaling factor + abs_scaling_factor = abs(scaling_factor) + + # Calculate the number of bits required to represent the scaling factor as a fixed-point number + scaling_factor_bits = int(np.ceil(np.log2(abs_scaling_factor))) + + # Calculate the number of fractional bits required for the fixed-point representation + frac_bits = quant_bits - scaling_factor_bits - 1 + + # Calculate the shift value + shift = -frac_bits + + # Calculate the multiplier value + multiplier = int(np.round(2**frac_bits / abs_scaling_factor)) + + shift = min(shift,max_shift) + return shift, multiplier + +def get_scaling(node): + b = node.get_children() + if len(node.get_children())!=0 : + parent_node_scaling = node.get_children().pop() + else : + return 0,1 + + a = parent_node_scaling.type() + if parent_node_scaling.type() != "Scaling" : + return 0,1 + + c = parent_node_scaling.get_operator() + scaling_factor = parent_node_scaling.get_operator().get_attr("scalingFactor") + quantized_number_bits = parent_node_scaling.get_operator().get_attr("quantizedNbBits") + shift, multiplier = calculate_shift_and_multiplier(scaling_factor,quantized_number_bits,31) + return shift,multiplier + @@ -582,12 +613,13 @@ class Producer(ExportNode): #TODO:Wissam : For now i use the name Gemm, find a better way to organise that #TODO:Wissam : Use loop in .jinja to factorize the code #TODO:Wissam : Use adapted format -@operator_register("FC") +@operator_register("FC","FCOp") class FC(ExportNode): def __init__(self, node, board, dataformat, library): w_dims = node.get_operator().get_input(1).dims() - node.get_operator().get_output(0).resize([w_dims[1]]) + a = node.get_operator().get_output(0) + node.get_operator().get_output(0).resize([w_dims[1]], [1]) super().__init__(node) @@ -597,6 +629,9 @@ class FC(ExportNode): self.batch = 1 #TODO:Wissam: let the user choose the batch size ? activation_min = -128 activation_min = 127 + + self.shift,self.multiplier = get_scaling(node) + if(self.__format__ == "int8_t"): activation_min = -128 activation_min = 127 @@ -617,24 +652,43 @@ class FC(ExportNode): copyfile(str(ROOT / "_CMSIS-NN" / "CMSIS-NN" / "Source" / "NNSupportFunctions" / "arm_nn_vec_mat_mult_t_s8.c"), str(Path(export_folder) / "src" / "kernels")) + + # Add to config list the include of configurations list_configs.append(f"layers/{self.name}.h") - - # Export configuration file - generate_file( + if len(self.inputs_dims[0]) != 4 : + generate_file( f"{export_folder}/layers/{self.name}.h", str(ROOT /"_CMSIS-NN"/"templates"/"configuration"/"arm_fully_connected.jinja"), name=self.name, input_batches=self.batch, - input_dims=self.inputs_dims[1][1], + input_dims=[1, self.inputs_dims[0][1], 1, 1], output_channels=self.inputs_dims[2][0], activation_min = -128, activation_max = 127, - input_offset = -128, - output_offset = -128, - multiplier = 1, - shift = 0 + input_offset = 0, + output_offset = 0, + multiplier = self.multiplier, + shift = self.shift ) + else : + generate_file( + f"{export_folder}/layers/{self.name}.h", + str(ROOT /"_CMSIS-NN"/"templates"/"configuration"/"arm_fully_connected.jinja"), + name=self.name, + input_batches=self.batch, + input_dims=self.inputs_dims[0], + output_channels=self.outputs_dims[0][0], + activation_min = -128, + activation_max = 127, + input_offset = 0, + output_offset = 0, + multiplier = self.multiplier, + shift = self.shift + ) + + # Export configuration file + return list_configs @@ -683,7 +737,7 @@ class ReLU(ExportNode): str(ROOT /"_CMSIS-NN"/"templates"/"configuration"/"arm_nn_activation.jinja"), name=self.name, format="s8", - size=self.inputs_dims[0][0], + size=self.inputs_dims[0][1], ) return list_configs @@ -712,15 +766,7 @@ class Conv(ExportNode): node.get_operator().get_output(0).resize([w_dims[1]]) super().__init__(node) - # if node.op_type == "PaddedConv" : - # self.kernel = node.get_operator().get_attr("KernelDims") - # self.stride = node.get_operator().get_attr("StrideDims") - # # Not working anymore because Padding is a standalone operator - # self.padding = node.get_operator().get_attr("PaddingDims") - # self.dilation = node.get_operator().get_attr("DilationDims") - # self.nb_channels = node.get_operator().get_attr("InChannels") - # self.nb_outputs = node.get_operator().get_attr("OutChannels") - + self.board = board self.library = library @@ -792,7 +838,6 @@ class PaddedConv(ExportNode): node.get_operator().get_output(0).resize([w_dims[1]]) super().__init__(node) - parent = node.get_parent(0) self.board = board self.library = library @@ -801,8 +846,7 @@ class PaddedConv(ExportNode): #get_micro_graph where is the scheduler ?? # PaddedConv is separated by one node Padding and one node Conv - - for op in node.get_operator().get_micro_graph().get_nodes(): + for op in node.get_operator().get_micro_graph().get_nodes(): if op.type() == "Conv" : self.kernel = op.get_operator().get_attr("KernelDims") self.stride = op.get_operator().get_attr("StrideDims") @@ -814,6 +858,11 @@ class PaddedConv(ExportNode): self.border_value = op.get_operator().get_attr("BorderValue") self.begin_end_borders = op.get_operator().get_attr("BeginEndBorders") + self.activation_min = -128 + self.activation_max = 127 + if(self.__format__ == "int8_t"): + self.activation_min = -128 + self.activation_max = 127 def export(self, export_folder:str, list_configs:list): @@ -839,13 +888,16 @@ class PaddedConv(ExportNode): input_batches=self.batch, #TODO:Wissam: why for lenet the output is not good ? - input_dims=self.inputs_dims[0][1:], + input_dims=self.inputs_dims[0], output_dims=self.outputs_dims[0], - kernel=self.inputs_dims[1][2:], + kernel=self.kernel, stride=self.stride, padding=self.begin_end_borders, - dilation=self.dilation, - ) + dilation_y = self.dilation[1], + dilation_x = self.dilation[0], + activation_min=self.activation_min, + activation_max=self.activation_max + ) return list_configs def forward(self, list_actions:list): @@ -869,10 +921,6 @@ class PaddedConv(ExportNode): @operator_register("MaxPooling") class MaxPooling(ExportNode): def __init__(self, node, board, dataformat, library): - - w_dims = node.get_operator().get_input(1).dims() - node.get_operator().get_output(0).resize([w_dims[1]]) - super().__init__(node) self.board = board @@ -882,10 +930,15 @@ class MaxPooling(ExportNode): self.kernel = node.get_operator().get_attr("KernelDims") self.stride = node.get_operator().get_attr("StrideDims") - # Not supported by the core... # self.padding = node.get_operator().get_attr("PaddingDims") self.padding = [0, 0] + + self.activation_min = -128 + self.activation_max = 127 + # if(self.__format__ == "int8_t"): + # self.activation_min = -128 + # self.activation_max = 127 def export(self, export_folder:str, list_configs:list): # Copying kernel into export @@ -893,7 +946,7 @@ class MaxPooling(ExportNode): if self.library == "aidge": if True :#if self.dataformat == "int8_t": - copyfile(str(ROOT / "_CMSIS-NN" / "CMSIS-NN" / "Source" / "MaxPooling" / "arm_max_pool_s8.c"), + copyfile(str(ROOT / "_CMSIS-NN" / "CMSIS-NN" / "Source" / "PoolingFunctions" / "arm_max_pool_s8.c"), str(Path(export_folder) / "src" / "kernels")) copyfile(str(ROOT / "_CMSIS-NN" / "CMSIS-NN" / "Include" / "arm_nnfunctions.h"), str(Path(export_folder) / "Include" )) @@ -904,7 +957,7 @@ class MaxPooling(ExportNode): # Export configuration file generate_file( f"{export_folder}/layers/{self.name}.h", - str(ROOT /"_CMSIS-NN"/"templates"/"configuration"/ "arm_convolve.jinja"), + str(ROOT /"_CMSIS-NN"/"templates"/"configuration"/ "max_pooling.jinja"), name=self.name, format="s8", input_batches=self.batch, @@ -913,6 +966,8 @@ class MaxPooling(ExportNode): kernel=self.kernel, stride=self.stride, padding=self.padding, + activation_min=self.activation_min, + activation_max=self.activation_max ) return list_configs @@ -921,13 +976,11 @@ class MaxPooling(ExportNode): list_actions.append(set_up_output(self.name, "int8_t")) list_actions.append(generate_action( - str(ROOT /"_CMSIS-NN"/"templates"/"kernel"/"arm_convolve.jinja"), + str(ROOT /"_CMSIS-NN"/"templates"/"kernel"/"max_pooling.jinja"), name=self.name, format="s8", dataformat = "int8_t", input_name=self.inputs[0].name(), - weight_name=self.inputs[1].name(), - bias_name=self.inputs[2].name(), output_name=self.name )) return list_actions @@ -984,6 +1037,32 @@ class Reshape(ExportNode): def forward(self, list_actions:list): if not self.is_last: list_actions.append(set_up_output(self.name, "int8_t")) + return list_actions + +@operator_register("Scaling") +class Scaling(ExportNode): + def __init__(self, node, board, dataformat, library): + + self.board = board + self.library = library + self.dataformat = dataformat + self.batch = 1 #TODO:Wissam: let the user choose the batch size ? + + def export(self, export_folder:str, list_configs:list): + + return list_configs + + def forward(self, list_actions:list): + + list_actions.append(generate_action( + str(ROOT /"_CMSIS-NN"/"templates"/"kernel"/"scaling.jinja"), + + format="s8", + dataformat = "int8_t", + input_name=self.inputs[0].name(), + output_name=self.name + )) return list_actions + \ No newline at end of file