diff --git a/.gitignore b/.gitignore index df7ad736e7f8ccf30ed205c8eba8640950a47b03..4eb918445c6113ef073d726fe7fef0534e2ffd3f 100644 --- a/.gitignore +++ b/.gitignore @@ -35,8 +35,8 @@ xml*/ # Test Export test*/ +uni_tests/fully_connected # CMSIS-NN aidge_export_arm_cortexm/_CMSIS-NN/CMSIS-NN*/ -s \ No newline at end of file diff --git a/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/arm_convolve.jinja b/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/arm_convolve.jinja index b1ea59914e09bd09cc9f96c4f0d447d2f995a381..d8a52dc03393f747f529c64ee874cbb85a4ab4ac 100644 --- a/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/arm_convolve.jinja +++ b/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/arm_convolve.jinja @@ -50,3 +50,49 @@ {{ biases_name }}, &output_dims_{{ name|lower }}, {{ output_name }}); + + {% if debug %} + printf("\r\n"); + printf("\r ------------------ {{ name|upper }} ---------------------- \n"); + printf("\r Input offset : %d \n",{{ name|upper }}_INPUT_OFFSET); + printf("\r Output offset : %d \n",{{ name|upper }}_OUTPUT_OFFSET); + printf("\r Input dims : n = %d, w = %d, h = %d, c = %d \n", + {{ name|upper }}_INPUT_BATCHES, {{ name|upper }}_INPUT_W, + {{ name|upper }}_INPUT_H, {{ name|upper }}_INPUT_CH); + printf("\r Filter dims : w = %d, h = %d \n", + {{ name|upper }}_FILTER_X, {{ name|upper }}_FILTER_Y); + printf("\r Output dims : w = %d, h = %d, c = %d \n", + {{ name|upper }}_INPUT_W, {{ name|upper }}_INPUT_H, {{ name|upper }}_OUT_CH); + printf("\r Padding : w = %d, h = %d \n", + {{ name|upper }}_PAD_X, {{ name|upper }}_PAD_Y); + printf("\r Stride : w = %d, h = %d \n", + {{ name|upper }}_STRIDE_X, {{ name|upper }}_STRIDE_Y); + printf("\r Dilation : w = %d, h = %d \n", + {{ name|upper }}_DILATION_X, {{ name|upper }}_DILATION_Y); + printf("\r Activation : min = %d, max = %d \n", + {{ name|upper }}_ACTIVATION_MIN, {{ name|upper }}_ACTIVATION_MAX); + printf("\r Quantization params : multiplier = %d, shift = %d \n", + {{ name|upper }}_OUTPUT_MULT, {{ name|upper }}_OUTPUT_SHIFT); + printf("\r Buffer size : %d \n", buf_size_{{ name|lower }}); + + printf("\r Input Data : \n"); + printf("\r ["); + for(int idx_input_{{ name|lower }} = 0; idx_input_{{ name|lower }} < {{ name|upper }}_INPUT_BATCHES * {{ name|upper }}_INPUT_W * {{ name|upper }}_INPUT_H * {{ name|upper }}_INPUT_CH; ++idx_input_{{ name|lower }}) { + if(idx_input_{{ name|lower }} % 10 == 0 && idx_input_{{ name|lower }} != 0) { + printf("\r \n"); + } + printf(" %d ,", {{ input_name }}[idx_input_{{ name|lower }}]); + } + printf("]\n"); + + printf("\r Output Data : \n"); + printf("\r ["); + for(int idx_output_{{ name|lower }} = 0; idx_output_{{ name|lower }} < {{ name|upper }}_INPUT_BATCHES * {{ name|upper }}_INPUT_W * {{ name|upper }}_INPUT_H * {{ name|upper }}_OUT_CH; ++idx_output_{{ name|lower }}) { + if(idx_output_{{ name|lower }} % 10 == 0 && idx_output_{{ name|lower }} != 0) { + printf("\r \n"); + } + printf(" %d ,", {{ output_name }}[idx_output_{{ name|lower }}]); + } + printf("]\n"); + + {% endif %} diff --git a/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/arm_fully_connected.jinja b/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/arm_fully_connected.jinja index dd61acf515ae8858b4082bd6f5f8d5855c255737..86d2a21b10d5b6c21ea805c538a6bb88a5e6af28 100644 --- a/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/arm_fully_connected.jinja +++ b/aidge_export_arm_cortexm/_CMSIS-NN/templates/kernel/arm_fully_connected.jinja @@ -48,4 +48,43 @@ {{output_name}} ); - if(ctx_{{name|lower}}.buf) free(ctx_{{name|lower}}.buf); \ No newline at end of file + if(ctx_{{name|lower}}.buf) free(ctx_{{name|lower}}.buf); + + {% if debug %} + printf("\r \n"); + printf("\r ------------------ {{ name|upper }} ---------------------- \n"); + printf("\r Input offset : %d \n",{{ name|upper }}_INPUT_OFFSET); + printf("\r Output offset : %d \n",{{ name|upper }}_OUTPUT_OFFSET); + printf("\r Input dims : n = %d, w = %d, h = %d, c = %d \n", + {{ name|upper }}_INPUT_BATCHES, {{ name|upper }}_INPUT_WIDTH, + {{ name|upper }}_INPUT_HEIGHT, {{ name|upper }}_INPUT_NB_CHANNELS); + printf("\r Filter dims : n = %d, c = %d \n", + {{ name|upper }}_ACCUMULATION_DEPTH, {{ name|upper }}_OUTPUT_CH); + printf("\r Output dims : n = %d, c = %d \n", + {{ name|upper }}_INPUT_BATCHES, {{ name|upper }}_OUTPUT_CH); + printf("\r Bias dims : c = %d \n", {{ name|upper }}_OUTPUT_CH); + printf("\r Quantization params : multiplier = %d, shift = %d \n", + {{ name|upper }}_MULTIPLIER, {{ name|upper }}_SHIFT); + printf("\r Buffer size : %d \n", buf_size_{{name|lower}}); + + printf("\r Input Data : \n") + printf("\r ["); + for(int idx_input_{{name|lower}}=0 ; idx_input_{{name|lower}}<{{ name|upper }}_ACCUMULATION_DEPTH ; ++idx_input_{{name|lower}}){ + if(idx_input_{{name|lower}}%10 == 0 && idx_input_{{name|lower}}!=0){ + printf("\r \n"); + } + printf(" %d ,",{{ input_name }}[idx_input_{{name|lower}}] ); + } + printf("]\n"); + + printf("\r Output Data : \n"); + printf("\r["); + for(int idx_output_{{name|lower}}=0 ; idx_output_{{name|lower}}< {{ name|upper }}_OUTPUT_CH ; ++idx_output_{{name|lower}}){ + if(idx_output_{{name|lower}}%10 == 0 && idx_output_{{name|lower}}!=0){ + printf("\r \n"); + } + printf(" %d ,",{{ output_name }}[idx_output_{{name|lower}}] ); + } + printf("]\n"); + + {% endif %} \ No newline at end of file diff --git a/aidge_export_arm_cortexm/export.py b/aidge_export_arm_cortexm/export.py index a5b85ed60db9792323c9ae248a354184690e9871..284cb88e4fa7bc0c98548dbe738b3eefb40545c4 100644 --- a/aidge_export_arm_cortexm/export.py +++ b/aidge_export_arm_cortexm/export.py @@ -13,7 +13,8 @@ def export(export_folder, graphview, scheduler: list = None, board: str ="stm32h7", - format: str = "int8_t"): + format: str = "int8_t", + debug: bool = True): # Create export directory os.makedirs(export_folder, exist_ok=True) diff --git a/aidge_export_arm_cortexm/operators.py b/aidge_export_arm_cortexm/operators.py index d2b037e4890d54c6cd20f2119a686ceb41820584..f481a950d324470f8d1861884620ce0f6e3c3886 100644 --- a/aidge_export_arm_cortexm/operators.py +++ b/aidge_export_arm_cortexm/operators.py @@ -48,6 +48,11 @@ def copyfile(filename, dst_folder): shutil.copy(filename, dst_folder) +def convert_to_int8(array: np.ndarray) -> np.ndarray: + + int8_array = array.astype(np.int8) + + return int8_array def export_to_static(name, array, filepath): @@ -64,7 +69,7 @@ def export_to_static(name, array, filepath): dims = array.shape, data_t = "int8_t", name = name, - values = array.tolist() + values = convert_to_int8(array).tolist() ) ############################################## @@ -87,23 +92,11 @@ def get_producer_parents(node): def calculate_shift_and_multiplier(scaling_factor, quant_bits,max_shift): - - # Calculate the absolute value of the scaling factor - abs_scaling_factor = abs(scaling_factor) - - # Calculate the number of bits required to represent the scaling factor as a fixed-point number - scaling_factor_bits = int(np.ceil(np.log2(abs_scaling_factor))) - - # Calculate the number of fractional bits required for the fixed-point representation - frac_bits = quant_bits - scaling_factor_bits - 1 - - # Calculate the shift value - shift = -frac_bits - - # Calculate the multiplier value - multiplier = int(np.round(2**frac_bits / abs_scaling_factor)) + #TODO:Wissam: this function doesn't work + + shift = 31 - int(np.log2(scaling_factor)) + multiplier = 2**shift - shift = min(shift,max_shift) return shift, multiplier def get_scaling(node): @@ -631,7 +624,7 @@ class FC(ExportNode): activation_min = 127 self.shift,self.multiplier = get_scaling(node) - + self.debug = True if(self.__format__ == "int8_t"): activation_min = -128 activation_min = 127 @@ -662,7 +655,7 @@ class FC(ExportNode): str(ROOT /"_CMSIS-NN"/"templates"/"configuration"/"arm_fully_connected.jinja"), name=self.name, input_batches=self.batch, - input_dims=[1, self.inputs_dims[0][1], 1, 1], + input_dims=[1, self.inputs_dims[1][1], 1, 1], output_channels=self.inputs_dims[2][0], activation_min = -128, activation_max = 127, @@ -678,7 +671,7 @@ class FC(ExportNode): name=self.name, input_batches=self.batch, input_dims=self.inputs_dims[0], - output_channels=self.outputs_dims[0][0], + output_channels=self.inputs_dims[2][0], activation_min = -128, activation_max = 127, input_offset = 0, @@ -704,7 +697,8 @@ class FC(ExportNode): input_name=self.inputs[0].name(), weight_name=self.inputs[1].name(), bias_name=self.inputs[2].name(), - output_name=self.name + output_name=self.name, + debug = self.debug )) return list_actions diff --git a/uni_tests/mnist_samples.npy.gz b/uni_tests/mnist_samples.npy.gz new file mode 100644 index 0000000000000000000000000000000000000000..08ade345cd98d61f297c6c3e787d395b3af51056 Binary files /dev/null and b/uni_tests/mnist_samples.npy.gz differ diff --git a/uni_tests/operator_unitest.py b/uni_tests/operator_unitest.py new file mode 100644 index 0000000000000000000000000000000000000000..3036175d0c56bba0fa98265469f9382d115a845e --- /dev/null +++ b/uni_tests/operator_unitest.py @@ -0,0 +1,151 @@ +import aidge_core +import aidge_onnx +import aidge_backend_cpu +import aidge_export_arm_cortexm +import aidge_quantization +import numpy as np +import aidge_export_cpp + +import torch +import torch.nn as nn +import torch.onnx + +def normalize_and_convert_to_int8(array,min_value,max_value): + + array_min = np.min(array) + array_max = np.max(array) + normalized_array = ((array - array_min) / (array_max - array_min)) * (max_value - min_value) + min_value + + int8_array = normalized_array.astype(np.int8) + + return int8_array + +def quantize_network(model_name, input_size): + + NB_SAMPLES = 100 + NB_BITS = 8 + + # -------------------------------------------------------------- + # LOAD THE MODEL + # -------------------------------------------------------------- + + aidge_model = aidge_onnx.load_onnx(model_name, verbose=False) + aidge_core.remove_flatten(aidge_model) + + # -------------------------------------------------------------- + # SET UP THE AIDGE SCHEDULER + # -------------------------------------------------------------- + + # Insert the input producer + input_node = aidge_core.Producer([1, 1, input_size, 1], "XXX") + input_node.add_child(aidge_model) + aidge_model.add(input_node) + + # Set up the backend + aidge_model.set_datatype(aidge_core.DataType.Float32) + aidge_model.set_backend("cpu") + + # Create the Scheduler + scheduler = aidge_core.SequentialScheduler(aidge_model) + + # -------------------------------------------------------------- + # RUN SOME EXAMPLES INFERENCES + # -------------------------------------------------------------- + np.random.seed(123) + + def propagate(model, scheduler, sample): + # Setup the input + sample = np.reshape(sample, (1, input_size, 1, 1)) + input_tensor = aidge_core.Tensor(sample) + # XXX HERE XXX + input_tensor.set_backend("cpu") + input_tensor.set_datatype(aidge_core.DataType.Float32) + input_node.get_operator().set_output(0, input_tensor) + # Run the inference + scheduler.forward()#verbose=False + # Gather the results + output_node = model.get_output_nodes().pop() + output_tensor = output_node.get_operator().get_output(0) + return np.array(output_tensor) + + tensors = [] + for i in range(100) : + samples = np.random.rand(10) + np.reshape(samples, (1, input_size, 1, 1)) + tensor = aidge_core.Tensor(samples) + # XXX HERE XXX + tensor.set_backend("cpu") + tensor.set_datatype(aidge_core.DataType.Float32) + tensors.append(tensor) + + aidge_quantization.check_architecture(aidge_model) + aidge_quantization.quantize_network(aidge_model, NB_BITS, tensors) + scheduler = aidge_core.SequentialScheduler(aidge_model) + + + + samples = np.random.rand(10) + samples = normalize_and_convert_to_int8(samples,-128,127) + aidge_export_cpp.export_to_static("inputs", samples, "/home/wb274724/STM32_dev/dev/aidge/aidge/aidge_export_arm_cortexm/uni_tests/fully_connected/inputs.h") + + print("Input Values : ", samples) + + input_array = np.reshape(samples, (1, input_size,1 , 1)) + output_array = propagate(aidge_model, scheduler, input_array) + print("Output values : ", np.round(output_array, 2)) + + return aidge_model + + +def uni_test_fully_connected(): + + class SimpleModel(nn.Module): + def __init__(self, input_size, output_size): + super(SimpleModel, self).__init__() + self.fc = nn.Linear(input_size, output_size) + + def forward(self, x): + output = self.fc(x) + return output + + # Définir la taille d'entrée et de sortie + input_size = 10 + output_size = 5 + + # Créer une instance du modèle + model = SimpleModel(input_size, output_size) + + torch.manual_seed(123) + example_input = torch.randn(1, input_size) # Modifier la forme de l'exemple d'entrée + + # Enregistrer le modèle sous forme de fichier ONNX + torch.onnx.export(model, # modèle à être exporté + example_input, # exemple d'entrée + "/home/wb274724/STM32_dev/dev/aidge/aidge/aidge_export_arm_cortexm/uni_tests/fully_connected/simple_model.onnx", # nom du fichier de sortie + input_names=['input'], # nom de la tenseur d'entrée + output_names=['output'], # nom de la tenseur de sortie + dynamic_axes={'input' : {0 : 'batch_size'}, # tenseur d'entrée variable + 'output' : {0 : 'batch_size'}}) # tenseur de sortie variable + + model = quantize_network("/home/wb274724/STM32_dev/dev/aidge/aidge/aidge_export_arm_cortexm/uni_tests/fully_connected/simple_model.onnx",input_size) + aidge_export_arm_cortexm.export("/home/wb274724/STM32_dev/dev/aidge/aidge/aidge_export_arm_cortexm/uni_tests/fully_connected", model, board="stm32f7", debug=True) + + + + return + +def uni_test_relu(): + return + +def uni_test_conv(): + return + +def uni_test_forward(): + return + +def main(): + uni_test_fully_connected() + return + +main() +a = 0 \ No newline at end of file