Fix LeNet int8 export

* Add code to download model from Hugging Face * Fix set_backend that was wrongly placed * Update code to compile and execute the export inside the script

Fix LeNet int8 export
* Add code to download model from Hugging Face * Fix set_backend that was wrongly placed * Update code to compile and execute the export inside the script
6754e159 · Cyril Moineau · f961aeeb · f961aeeb · 6754e159
Commit 6754e159 authored 4 months ago by Cyril Moineau
--- a/examples/export_LeNet/create_lenet.py
+++ b/examples/export_LeNet/create_lenet.py
-"""
-create_lenet.py
-
-This file creates a simple lenet network using the MNIST dataset. 
-It is meant to be used by the lenet.py file. 
-"""
-
-import random
-
-import torch
-from torchvision import datasets, transforms
-import torch.nn as nn
-import torch.nn.functional as F
-
-# Download the MNIST Dataset
-
-def get_mnist_dataset():
-    transform = transforms.ToTensor()
-    train_set = datasets.MNIST(root='./data', train=True,  transform=transform, download=True)
-    test_set  = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
-    return train_set, test_set
-
-# Create the lenet model
-
-class Classifier(torch.nn.Module):     
-    def __init__(self):
-        super().__init__()
-        self.network = nn.Sequential(
-            nn.Conv2d(1, 32, 5),  # 28 -> 24
-            nn.ReLU(),
-            nn.MaxPool2d(2, 2),   # 24 -> 12
-            nn.Conv2d(32, 32, 5), # 12 ->  8
-            nn.ReLU(),
-            nn.MaxPool2d(2, 2),   #  8 ->  4           
-            nn.Flatten(),
-            nn.Linear(32*4*4, 100),
-            nn.ReLU(),
-            nn.Linear(100, 100),
-            nn.ReLU(),
-            nn.Linear(100, 10)
-        )
-
-    def forward(self, x):
-        return self.network(x)
-
-# Compute accuracy function
-        
-def compute_accuracy(model, data_set, nb_samples):
-    nb_valid = 0
-    for it in range(nb_samples):
-        # get a sample
-        sample_idx = torch.randint(len(data_set), size=(1,)).item()
-        img, label = data_set[sample_idx]
-        # compute the output
-        x = torch.reshape(img, (1,1,28,28))
-        y_h = model.forward(x)
-        pred_label = torch.argmax(y_h).item()
-        if label == pred_label :
-            nb_valid = nb_valid + 1
-    return nb_valid / nb_samples
-
-# Train the model
-
-def train_model(NB_ITERATION, CHECK_PERIOD, train_set, test_set, classifier):
-    accuracy_history = []
-    for it in range(NB_ITERATION):
-        sample_idx = random.randint(0, len(train_set)-1)
-        img, label = train_set[sample_idx]
-        x = torch.flatten(img)
-        x = torch.reshape(x, (1,1,28,28))
-        y = torch.zeros(1,10)
-        y[0][label] = 1
-        y_h = classifier.forward(x)
-        #print(y_h.shape, 'test')
-        l = F.mse_loss(y, y_h)
-        l.backward()
-        for p in classifier.parameters():
-            with torch.no_grad():
-                p -= 0.01 * p.grad
-            p.grad.zero_()
-
-        if it % CHECK_PERIOD == 0:
-            accuracy = compute_accuracy(classifier, test_set, CHECK_PERIOD)
-            accuracy_history.append(accuracy)
-            print(f'it {it}: accuracy = {accuracy:.8f} ')
-
-
-def create_lenet():
-    
-    # Get Dataset
-    train_set, test_set = get_mnist_dataset()
-
-    # Create model
-    classifier = Classifier()
-
-    # Train model
-    NB_ITERATION = 50000
-    CHECK_PERIOD = 3000
-    print("NB_ITERATIONS = ", NB_ITERATION)
-    print("CHECK_PERIOD  = ", CHECK_PERIOD)
-    print("\nTraining LeNet...")
-    train_model(NB_ITERATION, CHECK_PERIOD, train_set, test_set, classifier)
-
-    # Export as ONNX
-    x = torch.Tensor(1,1,28,28)
-    torch.onnx.export(classifier.network, x, 'lenet.onnx', verbose=False, input_names=[ "input" ], output_names=[ "output" ])
\ No newline at end of file
--- a/examples/export_LeNet/lenet.py
+++ b/examples/export_LeNet/lenet.py
 """
 lenet.py

-Run this file to export a LeNet using the Aidge CPP Export module. 
+Run this file to export a LeNet using the Aidge CPP Export module.
 """

 import os
 import shutil
 import random
+import aidge_core.utils
 import numpy as np
+import subprocess

 # Aidge Modules
 import aidge_core
@@ -67,7 +69,7 @@ elif args.verbose == 1:
 elif args.verbose == 2:
    aidge_core.Log.set_console_level(aidge_core.Level.Info)
 elif args.verbose >= 3:
-    aidge_core.Log.set_console_level(aidge_core.Level.Debug) 
+    aidge_core.Log.set_console_level(aidge_core.Level.Debug)

 if USE_CUDA:
    import aidge_backend_cuda
@@ -79,39 +81,39 @@ if USE_CUDA:
 """
 Export configuration details :
 - RNG_SEED :        Fix a random seed for torch to always get the same images from the dataset,
-                        therefore always getting the same output. 
+                        therefore always getting the same output.
 - NB_TEST :         Number of example inferences to perform (used to get an accuracy approximation).
- NB_CALIB :        Number of samples used for the calibration step of quantization. 
- MODEL_NAME :      Should be the same name as the onnx file you want to load and export. 
+- NB_CALIB :        Number of samples used for the calibration step of quantization.
+- MODEL_NAME :      Should be the same name as the onnx file you want to load and export.
 - DO_EXAMPLES :     Perform example inferences (and allow to get accuracy approximation)
- NB_BITS :         Quantization output precision. Should be 8 to work with this export. 
+- NB_BITS :         Quantization output precision. Should be 8 to work with this export.
 - TARGET_TYPE :     The aidge datatype for tensors to be casted after the quantization step [float64, float32, int32].
- OPTIM_SIGN :      Quantization optional optimization based on data sign. 
+- OPTIM_SIGN :      Quantization optional optimization based on data sign.
 - SINGLE_SHIFT :    Quantization option specifying if inserted scaling nodes should be
                        single shift or floating point.
 - NO_QUANT :        Skip the quantization step.
- CLIPPING :        Clipping method during quantization. 
+- CLIPPING :        Clipping method during quantization.
 - FOLD_GRAPH :      The quantization step adds cast nodes to cast the graph into the given TARGET_TYPE.
                        Enabling the FOLD_GRAPH will automatically fold these nodes into the following
-                        ones at the end of quantization step. 
+                        ones at the end of quantization step.
 - USE_CUDA :        Determine if the quantization step uses the GPU. It is generally recommended
                        to enable this option if you have access to GPUs as the quantization step
-                        may take a while to complete. 
- DEV_MODE :        The dev mode allows to identify errors more easily exporting the model with 
+                        may take a while to complete.
+- DEV_MODE :        The dev mode allows to identify errors more easily exporting the model with
                        symbolic links enabling to modify the source files directly in the
                        generated export (make sure you installed the export plugin running
-                        `pip install -e .`). 
-                        Enabled running this python file, adding the --dev argument. 
+                        `pip install -e .`).
+                        Enabled running this python file, adding the --dev argument.
 - AIDGE_CMP :       Saves and export the outputs generated by the aidge inferences in order
-                        to compare it with the export outputs. 
-                        Enabled running this python file, adding the --aidge_cmp argument. 
+                        to compare it with the export outputs.
+                        Enabled running this python file, adding the --aidge_cmp argument.
 """

 print(" Available backends : ", aidge_core.Tensor.get_available_backends())

 quantize_model = False
 NB_BITS = 32
-TARGET_TYPE = aidge_core.dtype.float32 
+TARGET_TYPE = aidge_core.dtype.float32

 if args.dtype == "float32":
    quantize_model = False
@@ -124,7 +126,7 @@ else:
    print(f"[ERROR] Supported datatypes : {supported_types}.")
    exit(1)

-RNG_SEED      = 1234 
+RNG_SEED      = 1234
 NB_TEST       = 10 # Example inferences
 NB_CALIB      = 20 # Calibration set
 MODEL_NAME    = 'lenet'
@@ -135,7 +137,7 @@ DO_EXAMPLES   = True
 OPTIM_SIGN      = False
 SINGLE_SHIFT    = True
 ROUNDING        = True
-NO_QUANT = False  
+NO_QUANT = False
 CLIPPING        = aidge_quantization.Clipping.MSE  # 'MAX'
 FOLD_GRAPH      = True

@@ -164,16 +166,14 @@ backend = "cuda" if USE_CUDA else "cpu"
 # CREATE THE LENET MODEL
 # ------------------------------------------------------------
 """
-The LeNet model is created and trained using the create_lenet file. 
-If a lenet.onnx file is already present in the current folder, this step will be skiped. 
-The generated network is not yet quantized. 
+The LeNet model is created and trained using the create_lenet file.
+If a lenet.onnx file is already present in the current folder, this step will be skiped.
+The generated network is not yet quantized.
 """
-
-from create_lenet import create_lenet
-
-if not os.path.isfile("./lenet.onnx"):
-    print("\nTraining LeNet...")
-    create_lenet()
+# Define the target path and filename
+file_url = "https://huggingface.co/EclipseAidge/LeNet/resolve/main/lenet_mnist.onnx?download=true"
+file_path = MODEL_NAME + "_mnist.onnx"
+aidge_core.utils.download_file(file_path, file_url)

 # --------------------------------------------------------------
 # CREATE THE SAMPLES
@@ -185,8 +185,8 @@ test_set  = datasets.MNIST(root='./data', train=False, transform=transform, down
 tensors = []
 labels  = []
 index = 0
-for input, label in test_set:
-    array = np.array(input)
+for in_tensor, label in test_set:
+    array = np.array(in_tensor)
    array = np.reshape(array, (1, 1, 28, 28))
    tensor = aidge_core.Tensor(array)
    tensor.set_backend(backend)
@@ -204,11 +204,11 @@ for input, label in test_set:
 """
 Load the .onnx model and perform some usual graph modifications :
    - Remove the flatten nodes;
-    - Fuse the batchnorm nodes into the biases producers. 
+    - Fuse the batchnorm nodes into the biases producers.
    - Expand the metaOperators to perform the desired fusions.
 """

-model = aidge_onnx.load_onnx(MODEL_NAME + ".onnx", verbose=False)
+model = aidge_onnx.load_onnx(file_path, verbose=False)
 aidge_core.remove_flatten(model)
 aidge_core.fuse_batchnorm(model)
 aidge_core.expand_metaops(model)
@@ -220,14 +220,14 @@ model.save("imported_model")

 """
 The scheduler is an ordered version of the model, allowing to schedule
-nodes to be able to run inferences, for instance. 
+nodes to be able to run inferences, for instance.
 """

 # Set up the backend
 model.set_datatype(aidge_core.dtype.float32)
 model.set_backend(backend)

-# Create the Scheduler 
+# Create the Scheduler
 scheduler = aidge_core.SequentialScheduler(model)

 # --------------------------------------------------------------
@@ -235,11 +235,12 @@ scheduler = aidge_core.SequentialScheduler(model)
 # --------------------------------------------------------------

 def propagate(model, scheduler, tensor):
-    """ 
+    """
    Propagate the given tensor into the model and return the
-    output tensor. 
+    output tensor.
    """
-    # Run the inference 
+    print(f"Propagate: {tensor.backend()}")
+    # Run the inference
    scheduler.forward(True, [tensor])
    # Gather the results
    output_node = model.get_output_nodes().pop()
@@ -253,6 +254,7 @@ if (DO_EXAMPLES):
    nb_valid = 0
    base_values = []
    for i in range(NB_TEST):
+        print(f"Inférence: {tensors[i].backend()}")
        output_array = propagate(model, scheduler, tensors[i])
        print(labels[i], ' VS ', np.argmax(output_array), ' -> ', np.max(output_array))
        base_values.append(np.max(output_array))
@@ -263,18 +265,18 @@ if (DO_EXAMPLES):

 # --------------------------------------------------------------
 # PERFORM THE QUANTIZATION
-# -------------------------------------------------------------- 
+# --------------------------------------------------------------

 if quantize_model:
    aidge_quantization.quantize_network(
-        network = model, 
-        nb_bits = NB_BITS, 
-        calibration_set = tensors[0:NB_CALIB], 
+        network = model,
+        nb_bits = NB_BITS,
+        calibration_set = tensors[0:NB_CALIB],
        clipping_mode = CLIPPING,
        target_type = TARGET_TYPE,
        no_quant = NO_QUANT,
-        optimize_signs = OPTIM_SIGN, 
-        single_shift = SINGLE_SHIFT, 
+        optimize_signs = OPTIM_SIGN,
+        single_shift = SINGLE_SHIFT,
        use_cuda = USE_CUDA,
        fold_graph = FOLD_GRAPH)

@@ -293,31 +295,36 @@ model.save("post_ptq_model")
 # --------------------------------------------------------------

 """
-Once the quantization is done, the graph now only accepts integer inputs. 
+Once the quantization is done, the graph now only accepts integer inputs.
 So we need to rescale the dataset for the data to be within [0, 255].
-Also, tensors should be casted to be the same type as TARGET_TYPE. 
+Also, tensors should be casted to be the same type as TARGET_TYPE.
 """

 if quantize_model:
    rescaling = 2**(NB_BITS-1)-1
    for i in range(NB_TEST):
-        array = np.array(tensors[i]) * rescaling 
+        tensors[i].set_backend("cpu")
+        array = np.array(tensors[i]) * rescaling
        array = np.round(array).astype(int)
        tensors[i] = aidge_core.Tensor(array)
        tensors[i].set_datatype(TARGET_TYPE)
+        tensors[i].set_backend("cpu")
+    # Setting modele to CPU for export
+    model.set_backend("cpu")
+

 # --------------------------------------------------------------
 # GENERATE NEW SCHEDULER
 # --------------------------------------------------------------

 """
-Each time the graph has been change, it has to be reset. 
-Here some Quantizer and Cast nodes have been added. 
+Each time the graph has been change, it has to be reset.
+Here some Quantizer and Cast nodes have been added.
 """

 """ [Issue]
-We need first to manually add an input tensor with the correct datatype, 
-as it is not automatically done in PTQ. 
+We need first to manually add an input tensor with the correct datatype,
+as it is not automatically done in PTQ.
 """
 if quantize_model:
    input_node = model.get_ordered_inputs()[0]
@@ -329,10 +336,11 @@ if quantize_model:
 # --------------------------------------------------------------

 if (DO_EXAMPLES and quantize_model):
-    print('\n QUANTIZED EXAMPLE INFERENCES :')
+    print('\n QUANTIZED EXAMPLE INFERENCES:')
    nb_valid = 0
    post_values = []
    for i in range(NB_TEST):
+        print(f"QEI: {tensors[i].backend()}")
        output_array = propagate(model, scheduler, tensors[i])
        print(labels[i], ' VS ', np.argmax(output_array), ' -> ', np.max(output_array))
        post_values.append(np.max(output_array))
@@ -343,12 +351,6 @@ if (DO_EXAMPLES and quantize_model):
    print('\n MODEL ACCURACY = ', accuracy * 100, '%')
    print('\n QUANTIZED ACCURACY = ', quant_accuracy * 100, '%')

-    output_array = propagate(model, scheduler, tensors[0])
-
-if USE_CUDA:
-    model.set_backend("cpu")
-    for tensor in tensors:
-        tensor.set_backend("cpu")

 # --------------------------------------------------------------
 # FUSE NODES INTO METAOPS
@@ -361,28 +363,28 @@ are performed separately (Pad -> Conv -> Quantizer -> ReLU -> ...).

 However within the CPP export, some core operators are merged
 in meta operators. For instance, the padding, scaling and ReLU are
-performed within the Conv kernel. 
+performed within the Conv kernel.

 In this step, we use graph regex techniques to find the desired patterns
-within the graph in order to match the export implementation of the kernels. 
+within the graph in order to match the export implementation of the kernels.
 """

-# Exclude unwanted producers 
+# Exclude unwanted producers
 """
 Before fusing the nodes, we set a tag on the Producers in order to exclude
 from the export the ones holding coefficients, as they are directly handled
-within the layers parameters. 
+within the layers parameters.
 """
 exclude_unwanted_producers(model)

 # Fuse nodes
 cpp_fuse_to_metaops(model)

-# Remove optional inputs 
+# Remove optional inputs
 """
 Some optional inputs may be added by the quantization step (for instance with the clipping nodes).
 Here we make sure that they will not be considered as actual graph inputs by the export, by
-excluding them from the ordered_inputs list of the model. 
+excluding them from the ordered_inputs list of the model.
 """
 remove_optional_inputs(model)

@@ -395,7 +397,7 @@ scheduler.reset_scheduling()
 # Name newly created MetaOps
 """
 As names are optional in Aidge, the fuse_to_metaops function will not automatically
-give a name to the newly created metaOps. However, in an export context, we need 
+give a name to the newly created metaOps. However, in an export context, we need
 our operators to be named, as this will be used to name the corresponding files.
 """

@@ -407,11 +409,11 @@ set_nodes_names(scheduler)
 # --------------------------------------------------------------

 """
-Here a final inference is made on the input we want to export and run. 
+Here a final inference is made on the input we want to export and run.
 This will ensure that all the feature maps tensors (between the layers)
-hold the data corresponding to this specific input. 
+hold the data corresponding to this specific input.
 Then, the "log_outputs()" function (called later) will store these tensors
-into log files that may be exported as well for comparison purpose. 
+into log files that may be exported as well for comparison purpose.
 """

 output_array = propagate(model, scheduler, tensors[0])
@@ -436,7 +438,7 @@ if quantize_model:
 # Store tensors values into log files
 """
 Once the tensors have been casted, the log_outputs() function can be
-called to store their values into log files. 
+called to store their values into log files.
 """

 if os.path.isdir("log_outputs"):
@@ -448,11 +450,11 @@ model.log_outputs("log_outputs")
 # --------------------------------------------------------------

 """
-The test mode is mainly used for validation and benchmark. The model will be 
-exported in a way that each layer's result will be compared with the CPU implementation. 
-The timings for each layer will be displayed. 
-In case of error, you will be able to enter debug mode, showing in-layer data or 
-changing the inputs of the layer, to isolate the source of the issue. 
+The test mode is mainly used for validation and benchmark. The model will be
+exported in a way that each layer's result will be compared with the CPU implementation.
+The timings for each layer will be displayed.
+In case of error, you will be able to enter debug mode, showing in-layer data or
+changing the inputs of the layer, to isolate the source of the issue.
 """

 for node in model.get_nodes():
@@ -463,12 +465,12 @@ for node in model.get_nodes():
 # --------------------------------------------------------------

 """
-If the --aidge_cmp option is enabled, the feature maps generated by aidge with the 
+If the --aidge_cmp option is enabled, the feature maps generated by aidge with the
 backend cpu will be exported in the generated export. It will be used as reference
 to verify that the results with the optimized kernels are correct for the exported
-model. 
+model.
 This option has to be passed to each node in order to be used within the Export Nodes.
-(JConv, JPad, ...) that you can find in the "operators" folder. 
+(JConv, JPad, ...) that you can find in the "operators" folder.
 """

 if AIDGE_CMP:
@@ -481,10 +483,22 @@ if AIDGE_CMP:

 model.save("exported_model")

-aidge_export_cpp.export(EXPORT_FOLDER, 
-                        model, 
-                        scheduler, 
+aidge_export_cpp.export(EXPORT_FOLDER,
+                        model,
+                        scheduler,
                        # tensors[0],
-                        labels = aidge_core.Tensor(labels[0]), 
+                        labels = aidge_core.Tensor(labels[0]),
                        dev_mode = DEV_MODE,
                        aidge_cmp = AIDGE_CMP)
+print("\n### Compiling the export ###")
+try:
+    for std_line in aidge_core.utils.run_command(["make"], cwd=EXPORT_FOLDER):
+        print(std_line, end="")
+except subprocess.CalledProcessError as e:
+            raise RuntimeError(0, f"An error occurred, failed to build export.") from e
+print("\n### Running the export ###")
+try:
+    for std_line in aidge_core.utils.run_command(["./bin/run_export"], cwd=EXPORT_FOLDER):
+        print(std_line, end="")
+except subprocess.CalledProcessError as e:
+    raise RuntimeError(0, f"An error occurred, failed to run export.") from e
\ No newline at end of file