""" resnet.py This file allows the generation of a resnet18 CPP export. In order for this file to work properly, you should first download the imagenet dataset (search for "ILSVRC2012"). """ import random import numpy as np import os import shutil from PIL import Image # Aidge Modules import aidge_core import aidge_onnx import aidge_backend_cpu import aidge_quantization import aidge_export_cpp from aidge_export_cpp.export_utils import ( cpp_fuse_to_metaops, exclude_unwanted_producers, set_nodes_names, set_nodes_datatypes, normalize) from aidge_core.export_utils import remove_optional_inputs # Torch (Dataset) import torch import torch.nn.functional as F from torch import nn from torchvision import transforms, datasets # Arguments import argparse supported_types = ["float32", "int8"] parser = argparse.ArgumentParser(description="Export the ResNet18 model with the aidge_export_cpp module.") parser.add_argument("--dev", action="store_true", help="Export in dev mode") parser.add_argument("--no_cuda", action="store_true", help="Disable USE_CUDA usage to perform inferences and training.") parser.add_argument("--dtype", type=str, choices=supported_types, default="float32", help="Specify the targeted datatype : [int8, float32]") parser.add_argument("--aidge_cmp", action="store_true", help="Use aidge tensor results as reference.") parser.add_argument( '-v', '--verbose', action='count', default=0, help = ( "Set the verbosity level of the console output." "Use -v to increase verbosity, with the following levels in ascending ordern" "default WARN - Only warnings and higher (WARN, ERROR, FATAL) are displayed.n" "-v NOTICE - Notices and higher (NOTICE, WARN, ERROR, FATAL) are displayed.n" "-vv INFO - Informational messages and higher (INFO, NOTICE, WARN, ERROR, FATAL) are displayed.n" "-vvv DEBUG - All messages, including debug information, are displayed.n" "Available levels in descending order of severityn" "DEBUG < INFO < NOTICE < WARN < ERROR < FATAL." ) ) args = parser.parse_args() USE_CUDA = not args.no_cuda # Setting Aidge verbose level if args.verbose == 0: aidge_core.Log.set_console_level(aidge_core.Level.Error) elif args.verbose == 1: aidge_core.Log.set_console_level(aidge_core.Level.Notice) elif args.verbose == 2: aidge_core.Log.set_console_level(aidge_core.Level.Info) elif args.verbose >= 3: aidge_core.Log.set_console_level(aidge_core.Level.Debug) if USE_CUDA: import aidge_backend_cuda # ------------------------------------------------------------ # EXPORT CONFIG # ------------------------------------------------------------ """ Export configuration details : - RNG_SEED : Fix a random seed for torch to always get the same images from the dataset, therefore always getting the same output. - NB_TEST : Number of example inferences to perform (used to get an accuracy approximation). - NB_CALIB : Number of samples used for the calibration step of quantization. - MODEL_NAME : Should be the same name as the onnx file you want to load and export. - DO_EXAMPLES : Perform example inferences (and allow to get accuracy approximation) - NB_BITS : Quantization output precision. Should be 8 to work with this export. - TARGET_TYPE : The aidge datatype for tensors to be casted after the quantization step. - OPTIM_SIGN : Quantization optional optimization based on data sign. - SINGLE_SHIFT : Quantization option specifying if inserted scaling nodes should be single shift or floating point. - ROUNDING : Apply rounding on the data after the single shift step. - NO_QUANTIZATION : Skip the quantization step. Should be set to False. - CLIPPING : Clipping method during quantization. - FOLD_GRAPH : The quantization step adds cast nodes to cast the graph into the given TARGET_TYPE. Enabling the FOLD_GRAPH will automatically fold these nodes into the following ones at the end of quantization step. - USE_CUDA : Determine if the quantization step uses the GPU. It is generally recommended to enable this option if you have access to GPUs as the quantization step may take a while to complete. - DEV_MODE : The dev mode allows to identify errors more easily export the model with symbolic links enabling to modify the source files directly in the generated export (make sure you installed the export plugin running `pip install -e .`). Enabled running this python file, adding the --test argument. - AIDGE_MODE : Saves and export the outputs generated by the aidge inferences in order to compare it with the export outputs. Enabled running this python file, adding the --aidge_cmp argument. """ print(" Available backends : ", aidge_core.Tensor.get_available_backends()) quantize_model = False NB_BITS = 32 TARGET_TYPE = aidge_core.dtype.float32 if args.dtype == "float32": quantize_model = False elif args.dtype == "int8": quantize_model = True NB_BITS = 8 TARGET_TYPE = aidge_core.dtype.int32 # int8 not yet available else: print(f"[ERROR] Datatype '{args.dtype}' not supported.") print(f"[ERROR] Supported datatypes : {supported_types}.") exit(1) RNG_SEED = 1234 NB_TEST = 20 # Test set NB_CALIB = 20 # Calibration set MODEL_NAME = 'resnet18' EXPORT_FOLDER = f"export_{MODEL_NAME}_int8" DO_EXAMPLES = True # Quantization params OPTIM_SIGN = False SINGLE_SHIFT = True ROUNDING = True NO_QUANTIZATION = False CLIPPING = aidge_quantization.Clipping.MSE # 'MAX' FOLD_GRAPH = True # Export modes DEV_MODE = args.dev AIDGE_CMP = args.aidge_cmp ### Add your paths here ### IMAGENET_PATH = "/database/ILSVRC2012/val" # Look for ILSVRC2012/val VAL_PATH = "/database/ILSVRC2012/val.txt" # File containing labels of image of val folder (Look for val.txt) ########################### def print_cfg(): print('\n RNG_SEED = ', RNG_SEED) print(' MODEL_NAME = ', MODEL_NAME) print(' NB_TEST = ', NB_TEST) print(' NB_CALIB = ', NB_CALIB) print(' NB_BITS = ', NB_BITS) print(' OPTIM_SIGN = ', OPTIM_SIGN) print(' NO_QUANTIZATION = ', NO_QUANTIZATION) print(' CLIPPING = ', CLIPPING) print(' SINGLE_SHIFT = ', SINGLE_SHIFT) print(' TARGET_TYPE = ', TARGET_TYPE) print(' FOLD_GRAPH = ', FOLD_GRAPH) print(' USE_CUDA = ', USE_CUDA) print(' DEV_MODE = ', DEV_MODE) print(' ROUNDING = ', ROUNDING) print_cfg() torch.manual_seed(RNG_SEED) random.seed(RNG_SEED) np.random.seed(RNG_SEED) backend = "cuda" if USE_CUDA else "cpu" image_label_pairs = [] with open(VAL_PATH, 'r') as f: for line in f: parts = line.strip().split() if len(parts) == 2: image_name, label = parts image_label_pairs.append((image_name, int(label))) np.random.seed(RNG_SEED) NB_SELECT = max(NB_TEST, NB_CALIB) # Check that NB_TEST and NB_CALIB are fixed selected_pairs = image_label_pairs[:NB_SELECT] # -------------------------------------------------------------- # CREATE THE SAMPLES # -------------------------------------------------------------- transform_val = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485,0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) tensors = [] labels = [] paths = [] index = 0 for image_name, label in selected_pairs: image_path = os.path.join(IMAGENET_PATH, image_name) if os.path.exists(image_path): try: image = Image.open(image_path) if image.mode != 'RGB': image = image.convert('RGB') tensor = transform_val(image) tensors.append(tensor) labels.append(label) paths.append(image_path) except Exception as e: print(f"Error with image {image_path}: {e}") backend = "cuda" if USE_CUDA else "cpu" aidge_tensors = [] for tensor in tensors: array = tensor.numpy() array = np.reshape(array, (1, 3, 224, 224)) array = normalize(array) aidge_tensor = aidge_core.Tensor(array) aidge_tensor.set_backend(backend) aidge_tensor.set_datatype(aidge_core.dtype.float32) aidge_tensors.append(aidge_tensor) # -------------------------------------------------------------- # LOAD THE MODEL # -------------------------------------------------------------- """ Load the .onnx model and perform some usual graph modifications : - Remove the flatten nodes; - Fuse the batchnorm nodes into the biases producers. """ model = aidge_onnx.load_onnx(MODEL_NAME + ".onnx", verbose=False) model.save("imported_model") aidge_core.remove_flatten(model) aidge_core.fuse_batchnorm(model) model.save("imported_model_fused_bn") # -------------------------------------------------------------- # SET UP THE AIDGE SCHEDULER # -------------------------------------------------------------- """ The scheduler is an ordered version of the model, allowing to schedule nodes to be able to run inferences, for instance. """ # Set up the backend model.set_datatype(aidge_core.dtype.float32) model.set_backend(backend) # Create the Scheduler scheduler = aidge_core.SequentialScheduler(model) # -------------------------------------------------------------- # RUN SOME EXAMPLES INFERENCES # -------------------------------------------------------------- def propagate(model, scheduler, aidge_tensor): """ Propagate the given tensor into the model """ # Run the inference scheduler.forward(True, [aidge_tensor]) # Gather the results output_node = model.get_output_nodes().pop() output_tensor = output_node.get_operator().get_output(0).clone() output_tensor.set_backend("cpu") return np.array(output_tensor) accuracy = 0 if (DO_EXAMPLES): print('\n EXAMPLE INFERENCES :') nb_valid = 0 base_values = [] for i in range(NB_TEST): output_array = propagate(model, scheduler, aidge_tensors[i]) print(labels[i], ' VS ', np.argmax(output_array), ' -> ', np.max(output_array)) base_values.append(np.max(output_array)) if (labels[i] == np.argmax(output_array)): nb_valid += 1 accuracy = nb_valid / NB_TEST print('\n MODEL ACCURACY = ', accuracy * 100, '%') #-------------------------------------------------------------- # PERFORM THE QUANTIZATION # -------------------------------------------------------------- if quantize_model: aidge_quantization.quantize_network( network = model, nb_bits = NB_BITS, input_dataset = aidge_tensors[0:NB_CALIB], clipping_mode = CLIPPING, target_type = TARGET_TYPE, no_quantization = NO_QUANTIZATION, optimize_signs = OPTIM_SIGN, single_shift = SINGLE_SHIFT, use_cuda = USE_CUDA, fold_graph = FOLD_GRAPH, bitshift_rounding = ROUNDING) # -------------------------------------------------------------- # RESCALE THE INPUT SAMPLES # -------------------------------------------------------------- """ Once the quantization is done, the graph now only accepts integer inputs. So we need to rescale the dataset for the data to be within [0, 255]. Also, tensors should be casted to be the same type as TARGET_TYPE. """ if quantize_model: rescaling = 2**(NB_BITS-1)-1 for i in range(max(NB_TEST, NB_CALIB)): array = np.array(aidge_tensors[i]) * rescaling aidge_tensors[i] = aidge_core.Tensor(array) aidge_tensors[i].set_datatype(TARGET_TYPE) # -------------------------------------------------------------- # GENERATE NEW SCHEDULER # -------------------------------------------------------------- """ Each time the graph has been change, it has to be reset. Here some Quantizer and Cast nodes have been added. """ if quantize_model: scheduler.reset_scheduling() # -------------------------------------------------------------- # PERFORM THE EXAMPLE INFERENCES AGAIN # -------------------------------------------------------------- model.save("post_ptq") if (DO_EXAMPLES and quantize_model): print('\n QUANTIZED EXAMPLE INFERENCES :') nb_valid = 0 post_values = [] for i in range(NB_TEST): output_array = propagate(model, scheduler, aidge_tensors[i]) print(labels[i], ' VS ', np.argmax(output_array), ' -> ', np.max(output_array)) post_values.append(np.max(output_array)) if (labels[i] == np.argmax(output_array)): nb_valid += 1 quant_accuracy = nb_valid / NB_TEST print('\n MODEL ACCURACY = ', accuracy * 100, '%') print('\n QUANTIZED ACCURACY = ', quant_accuracy * 100, '%') if USE_CUDA: model.set_backend("cpu") for aidge_tensor in aidge_tensors: aidge_tensor.set_backend("cpu") # -------------------------------------------------------------- # FUSE NODES INTO METAOPS # -------------------------------------------------------------- """ Here is made the link between the Aidge model and the CPP kernels implementation. In aidge, all the nodes calculations are performed separately (Pad -> Conv -> Quantizer -> ReLU -> ...). However within the CPP export, some core operators are merged in meta operators. For instance, the padding, scaling and ReLU are performed within the Conv kernel. In this step, we use graph regex techniques to find the desired patterns within the graph in order to match the export implementation of the kernels. """ # Expand meta ops """ We first need to expand the graph to break all the metaops that may already exist. For instance, PaddedConv will become Pad -> Conv. """ aidge_core.expand_metaops(model) model.save("after_expand") # Exclude unwanted producers """ Before fusing the nodes, we set a tag on the Producers in order to exclude from the export the ones holding coefficients, as they are directly handled within the layers parameters. """ exclude_unwanted_producers(model) # Fuse nodes cpp_fuse_to_metaops(model) # Remove optional inputs """ Some optional inputs may be added by the quantization step (for instance with the clipping nodes). Here we make sure that they will not be considered as actual graph inputs by the export, by excluding them from the ordered_inputs list of the model. """ remove_optional_inputs(model) # Reset scheduler to apply graph modifications """ The scheduler always needs to be reset after graph manipulation. """ scheduler.reset_scheduling() # Name newly created MetaOps """ As names are optional in Aidge, the fuse_to_metaops function will not automatically give a name to the newly created metaOps. However, in an export context, we need our operators to be named, as this will be used to name the corresponding files. """ scheduler.generate_scheduling() # Scheduler needs to be generated as it has just been reset set_nodes_names(scheduler) # -------------------------------------------------------------- # LOG OUTPUTS FOR THE LAST IMAGE OF THE TEST DATASET # -------------------------------------------------------------- """ Here a final inference is made on the input we want to export and run. This will ensure that all the feature maps tensors (between the layers) hold the data corresponding to this specific input. Then, the "log_outputs()" function (called later) will store these tensors into log files that may be exported as well for comparison purpose. """ output_array = propagate(model, scheduler, aidge_tensors[0]) print("### Exported Sample ###") print("Aidge prediction after quantization :", np.argmax(output_array), "(" + str(np.max(output_array)) + ")") print("Label :", labels[0]) # -------------------------------------------------------------- # HANDLING DATATYPE # -------------------------------------------------------------- """ Now, despite the quantization stage, all the tensors of the model are still "virtually" in Int32. Before exporting the model, we have to set tensors' datatypes to Int8, except for biases which should remain in Int32. """ if quantize_model: set_nodes_datatypes(model) # Store tensors values into log files """ Once the tensors has been casted, the log_outputs() function can be called to store their values into log files. """ if os.path.isdir("log_outputs"): shutil.rmtree("log_outputs") model.log_outputs("log_outputs") # -------------------------------------------------------------- # TEST MODE # -------------------------------------------------------------- """ The test mode is mainly used for validation and benchmark. The model will be exported in a way that each layer's result will be compared with the CPU implementation. The timings for each layer will be displayed. In case of error, you will be able to enter debug mode, showing in-layer data or changing the inputs of the layer, to isolate the source of the issue. """ for node in model.get_nodes(): node.attributes().dev_mode = DEV_MODE # -------------------------------------------------------------- # AIDGE CMP # -------------------------------------------------------------- """ If the --aidge_cmp option is enabled, the feature maps generated by aidge with the backend cpu will be exported in the generated export. It will be used as reference to verify that the results with the optimized kernels are correct for the exported model. This option has to be passed to each node in order to be used within the Export Nodes. (JConv, JPad, ...) that you can find in the "export_gen/operator_export" folder. """ for node in model.get_nodes(): node.attributes().aidge_cmp = AIDGE_CMP # -------------------------------------------------------------- # EXPORT THE MODEL # -------------------------------------------------------------- model.save("exported_model") inputs_tensor = aidge_core.Tensor(np.array(aidge_tensors[0])) inputs_tensor.set_data_format(aidge_core.dformat.nchw) # Init the dataformat (default -> nchw) inputs_tensor.set_data_format(aidge_core.dformat.nhwc) # Transpose the data (nchw -> nhwc) if args.dtype == "int8": inputs_tensor.set_datatype(aidge_core.dtype.int8) aidge_export_cpp.export(EXPORT_FOLDER, model, scheduler, labels = aidge_core.Tensor(labels[0]), inputs_tensor=inputs_tensor, dev_mode = DEV_MODE, aidge_cmp = AIDGE_CMP)