lenet.py

"""
lenet.py

Run this file to export a LeNet using the Aidge CPP Export module.
"""

import os
import shutil
import random
import aidge_core.utils
import numpy as np
import subprocess

# Aidge Modules
import aidge_core
import aidge_onnx
import aidge_backend_cpu
import aidge_quantization
import aidge_export_cpp

from aidge_export_cpp.export_utils import *

from aidge_core.export_utils import remove_optional_inputs, get_node_from_metaop

# Torch (Dataset)
import torch
import torch.nn.functional as F
from torch import nn
from torchvision import transforms, datasets

# Arguments
import argparse

supported_types = ["float32", "int8"]

parser = argparse.ArgumentParser(description="Export the LeNet model with the aidge_export_cpp module.")
parser.add_argument("--dev", action="store_true", help="Export in dev mode")
parser.add_argument("--mem_wrap", action="store_true", help="Use memory wrapping")
parser.add_argument("--no_cuda", action="store_true", help="Disable USE_CUDA usage to perform inferences and training.")
parser.add_argument("--dtype", type=str, choices=supported_types, default="float32", help="Specify the targeted datatype : [int8, float32]")
parser.add_argument("--aidge_cmp", action="store_true", help="Use aidge tensor results as reference.")
parser.add_argument(
    '-v', '--verbose',
    action='count',
    default=0,
    help = (
        "Set the verbosity level of the console output."
        "Use -v to increase verbosity, with the following levels in ascending ordern"
        "default WARN - Only warnings and higher (WARN, ERROR, FATAL) are displayed.n"
        "-v NOTICE - Notices and higher (NOTICE, WARN, ERROR, FATAL) are displayed.n"
        "-vv INFO - Informational messages and higher (INFO, NOTICE, WARN, ERROR, FATAL) are displayed.n"
        "-vvv DEBUG - All messages, including debug information, are displayed.n"
        "Available levels in descending order of severityn"
        "DEBUG < INFO < NOTICE < WARN < ERROR < FATAL."
    )
)
args = parser.parse_args()

USE_CUDA = not args.no_cuda

# Setting Aidge verbose level
if args.verbose == 0:
    aidge_core.Log.set_console_level(aidge_core.Level.Error)
elif args.verbose == 1:
    aidge_core.Log.set_console_level(aidge_core.Level.Notice)
elif args.verbose == 2:
    aidge_core.Log.set_console_level(aidge_core.Level.Info)
elif args.verbose >= 3:
    aidge_core.Log.set_console_level(aidge_core.Level.Debug)

if USE_CUDA:
    import aidge_backend_cuda

# ------------------------------------------------------------
# EXPORT CONFIG
# ------------------------------------------------------------

"""
Export configuration details :
- RNG_SEED :        Fix a random seed for torch to always get the same images from the dataset,
                        therefore always getting the same output.
- NB_TEST :         Number of example inferences to perform (used to get an accuracy approximation).
- NB_CALIB :        Number of samples used for the calibration step of quantization.
- MODEL_NAME :      Should be the same name as the onnx file you want to load and export.
- DO_EXAMPLES :     Perform example inferences (and allow to get accuracy approximation)
- NB_BITS :         Quantization output precision. Should be 8 to work with this export.
- TARGET_TYPE :     The aidge datatype for tensors to be casted after the quantization step [float64, float32, int32].
- OPTIM_SIGN :      Quantization optional optimization based on data sign.
- SINGLE_SHIFT :    Quantization option specifying if inserted scaling nodes should be
                        single shift or floating point.
- NO_QUANT :        Skip the quantization step.
- CLIPPING :        Clipping method during quantization.
- FOLD_GRAPH :      The quantization step adds cast nodes to cast the graph into the given TARGET_TYPE.
                        Enabling the FOLD_GRAPH will automatically fold these nodes into the following
                        ones at the end of quantization step.
- USE_CUDA :        Determine if the quantization step uses the GPU. It is generally recommended
                        to enable this option if you have access to GPUs as the quantization step
                        may take a while to complete.
- DEV_MODE :        The dev mode allows to identify errors more easily exporting the model with
                        symbolic links enabling to modify the source files directly in the
                        generated export (make sure you installed the export plugin running
                        `pip install -e .`).
                        Enabled running this python file, adding the --dev argument.
- AIDGE_CMP :       Saves and export the outputs generated by the aidge inferences in order
                        to compare it with the export outputs.
                        Enabled running this python file, adding the --aidge_cmp argument.
"""

print(" Available backends : ", aidge_core.Tensor.get_available_backends())

quantize_model = False
NB_BITS = 32
TARGET_TYPE = aidge_core.dtype.float32

if args.dtype == "float32":
    quantize_model = False
elif args.dtype == "int8":
    quantize_model = True
    NB_BITS = 8
    TARGET_TYPE = aidge_core.dtype.int32    # int8 not yet available
else:
    print(f"[ERROR] Datatype '{args.dtype}' not supported.")
    print(f"[ERROR] Supported datatypes : {supported_types}.")
    exit(1)

RNG_SEED      = 1234
NB_TEST       = 10 # Example inferences
NB_CALIB      = 20 # Calibration set
MODEL_NAME    = 'lenet'
EXPORT_FOLDER   = f"export_{MODEL_NAME}_int8"
DO_EXAMPLES   = True

# Quantization params
OPTIM_SIGN      = False
SINGLE_SHIFT    = True
ROUNDING        = True
NO_QUANT = False
CLIPPING        = aidge_quantization.Clipping.MSE  # 'MAX'
FOLD_GRAPH      = True

# Export modes
DEV_MODE      = args.dev
AIDGE_CMP     = args.aidge_cmp

print('\n RNG_SEED         = ', RNG_SEED)
print(' MODEL_NAME       = ', MODEL_NAME)
print(' NB_TEST          = ', NB_TEST)
print(' NB_CALIB         = ', NB_CALIB)
print(' NB_BITS          = ', NB_BITS)
print(' OPTIM_SIGN       = ', OPTIM_SIGN)
print(' NO_QUANT         = ', NO_QUANT)
print(' CLIPPING         = ', CLIPPING)
print(' SINGLE_SHIFT     = ', SINGLE_SHIFT)
print(' USE_CUDA         = ', USE_CUDA)
print(' DEV_MODE         = ', DEV_MODE)

torch.manual_seed(RNG_SEED)
random.seed(RNG_SEED)

backend = "cuda" if USE_CUDA else "cpu"

# ------------------------------------------------------------
# CREATE THE LENET MODEL
# ------------------------------------------------------------
"""
The LeNet model is created and trained using the create_lenet file.
If a lenet.onnx file is already present in the current folder, this step will be skiped.
The generated network is not yet quantized.
"""
# Define the target path and filename
file_url = "https://huggingface.co/EclipseAidge/LeNet/resolve/main/lenet_mnist.onnx?download=true"
file_path = MODEL_NAME + "_mnist.onnx"
aidge_core.utils.download_file(file_path, file_url)

# --------------------------------------------------------------
# CREATE THE SAMPLES
# --------------------------------------------------------------

transform = transforms.ToTensor()
test_set  = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

tensors = []
labels  = []
index = 0
for in_tensor, label in test_set:
    array = np.array(in_tensor)
    array = np.reshape(array, (1, 1, 28, 28))
    tensor = aidge_core.Tensor(array)
    tensor.set_backend(backend)
    tensor.set_datatype(aidge_core.dtype.float32)
    tensors.append(tensor)
    labels.append(label)
    index += 1
    if (index == max(NB_TEST, NB_CALIB)):
        break

# --------------------------------------------------------------
# LOAD THE MODEL
# --------------------------------------------------------------

"""
Load the .onnx model and perform some usual graph modifications :
    - Remove the flatten nodes;
    - Fuse the batchnorm nodes into the biases producers.
    - Expand the metaOperators to perform the desired fusions.
"""

model = aidge_onnx.load_onnx(file_path, verbose=False)
aidge_core.remove_flatten(model)
aidge_core.fuse_batchnorm(model)
aidge_core.expand_metaops(model)
model.save("imported_model")

# --------------------------------------------------------------
# SET UP THE AIDGE SCHEDULER
# --------------------------------------------------------------

"""
The scheduler is an ordered version of the model, allowing to schedule
nodes to be able to run inferences, for instance.
"""

# Set up the backend
model.set_datatype(aidge_core.dtype.float32)
model.set_backend(backend)

# Create the Scheduler
scheduler = aidge_core.SequentialScheduler(model)

# --------------------------------------------------------------
# RUN SOME EXAMPLES INFERENCES
# --------------------------------------------------------------

def propagate(model, scheduler, tensor):
    """
    Propagate the given tensor into the model and return the
    output tensor.
    """
    print(f"Propagate: {tensor.backend()}")
    # Run the inference
    scheduler.forward(True, [tensor])
    # Gather the results
    output_node = model.get_ordered_outputs()[0][0]
    output_tensor = output_node.get_operator().get_output(0).clone()
    output_tensor.set_backend("cpu")
    return np.array(output_tensor)

accuracy = 0
if (DO_EXAMPLES):
    print('\n EXAMPLE INFERENCES :')
    nb_valid = 0
    base_values = []
    for i in range(NB_TEST):
        print(f"Inférence: {tensors[i].backend()}")
        output_array = propagate(model, scheduler, tensors[i])
        print(labels[i], ' VS ', np.argmax(output_array), ' -> ', np.max(output_array))
        base_values.append(np.max(output_array))
        if (labels[i] == np.argmax(output_array)):
            nb_valid += 1
    accuracy = nb_valid / NB_TEST
    print('\n MODEL ACCURACY = ', accuracy * 100, '%')

# --------------------------------------------------------------
# PERFORM THE QUANTIZATION
# --------------------------------------------------------------

if quantize_model:
    aidge_quantization.quantize_network(
        network = model,
        nb_bits = NB_BITS,
        calibration_set = tensors[0:NB_CALIB],
        clipping_mode = CLIPPING,
        target_type = TARGET_TYPE,
        no_quant = NO_QUANT,
        optimize_signs = OPTIM_SIGN,
        single_shift = SINGLE_SHIFT,
        use_cuda = USE_CUDA,
        fold_graph = FOLD_GRAPH)

# Tag the scaling producers
for node in model.get_nodes():
    if node.type() == "Quantizer":
        for SNode in get_node_from_metaop(node, "BitShift"):
            SNode.get_parent(1).attributes().shift_prod = True
        for CNode in get_node_from_metaop(node, "Mul"):
            CNode.get_parent(1).attributes().coef_prod = True

model.save("post_ptq_model")

# --------------------------------------------------------------
# RESCALE THE INPUT SAMPLES
# --------------------------------------------------------------

"""
Once the quantization is done, the graph now only accepts integer inputs.
So we need to rescale the dataset for the data to be within [0, 255].
Also, tensors should be casted to be the same type as TARGET_TYPE.
"""

if quantize_model:
    rescaling = 2**(NB_BITS-1)-1
    for i in range(NB_TEST):
        tensors[i].set_backend("cpu")
        array = np.array(tensors[i]) * rescaling
        array = np.round(array).astype(int)
        tensors[i] = aidge_core.Tensor(array)
        tensors[i].set_datatype(TARGET_TYPE)
        tensors[i].set_backend("cpu")
    # Setting modele to CPU for export
    model.set_backend("cpu")


# --------------------------------------------------------------
# GENERATE NEW SCHEDULER
# --------------------------------------------------------------

"""
Each time the graph has been change, it has to be reset.
Here some Quantizer and Cast nodes have been added.
"""

""" [Issue]
We need first to manually add an input tensor with the correct datatype,
as it is not automatically done in PTQ.
"""
if quantize_model:
    input_node = model.get_ordered_inputs()[0]
    input_node[0].get_operator().set_input(0, tensors[0])
    scheduler.reset_scheduling()

# --------------------------------------------------------------
# PERFORM THE EXAMPLE INFERENCES AGAIN
# --------------------------------------------------------------

if (DO_EXAMPLES and quantize_model):
    print('\n QUANTIZED EXAMPLE INFERENCES:')
    nb_valid = 0
    post_values = []
    for i in range(NB_TEST):
        print(f"QEI: {tensors[i].backend()}")
        output_array = propagate(model, scheduler, tensors[i])
        print(labels[i], ' VS ', np.argmax(output_array), ' -> ', np.max(output_array))
        post_values.append(np.max(output_array))
        if (labels[i] == np.argmax(output_array)):
            nb_valid += 1

    quant_accuracy = nb_valid / NB_TEST
    print('\n MODEL ACCURACY = ', accuracy * 100, '%')
    print('\n QUANTIZED ACCURACY = ', quant_accuracy * 100, '%')


# --------------------------------------------------------------
# FUSE NODES INTO METAOPS
# --------------------------------------------------------------

"""
Here is made the link between the Aidge model and the CPP
kernels implementation. In aidge, all the nodes calculations
are performed separately (Pad -> Conv -> Quantizer -> ReLU -> ...).

However within the CPP export, some core operators are merged
in meta operators. For instance, the padding, scaling and ReLU are
performed within the Conv kernel.

In this step, we use graph regex techniques to find the desired patterns
within the graph in order to match the export implementation of the kernels.
"""

# Exclude unwanted producers
"""
Before fusing the nodes, we set a tag on the Producers in order to exclude
from the export the ones holding coefficients, as they are directly handled
within the layers parameters.
"""
exclude_unwanted_producers(model)

# Fuse nodes
cpp_fuse_to_metaops(model)

# Remove optional inputs
"""
Some optional inputs may be added by the quantization step (for instance with the clipping nodes).
Here we make sure that they will not be considered as actual graph inputs by the export, by
excluding them from the ordered_inputs list of the model.
"""
remove_optional_inputs(model)

# Reset scheduler to apply graph modifications
"""
The scheduler always needs to be reset after graph manipulation.
"""
scheduler.reset_scheduling()

# Name newly created MetaOps
"""
As names are optional in Aidge, the fuse_to_metaops function will not automatically
give a name to the newly created metaOps. However, in an export context, we need
our operators to be named, as this will be used to name the corresponding files.
"""

scheduler.generate_scheduling() # Scheduler needs to be generated as it has just been reset
set_nodes_names(scheduler)

# --------------------------------------------------------------
# LOG OUTPUTS FOR THE FIRST IMAGE OF THE TEST DATASET
# --------------------------------------------------------------

"""
Here a final inference is made on the input we want to export and run.
This will ensure that all the feature maps tensors (between the layers)
hold the data corresponding to this specific input.
Then, the "log_outputs()" function (called later) will store these tensors
into log files that may be exported as well for comparison purpose.
"""

output_array = propagate(model, scheduler, tensors[0])

print("### Exported Sample ###")
print("Aidge prediction :", np.argmax(output_array), "(" + str(np.max(output_array)) + ")")
print("Label :", labels[0])

# --------------------------------------------------------------
# HANDLING DATATYPE
# --------------------------------------------------------------

"""
Now, despite the quantization stage, all the tensors of the model are
still "virtually" in Int32. Before exporting the model, we have to set
tensors' datatypes to Int8, except for biases which should remain in Int32.
"""

if quantize_model:
    set_nodes_datatypes(model)

# Store tensors values into log files
"""
Once the tensors have been casted, the log_outputs() function can be
called to store their values into log files.
"""

if AIDGE_CMP:
    generate_aidge_ifmaps(model)

# --------------------------------------------------------------
# TEST MODE
# --------------------------------------------------------------

"""
The test mode is mainly used for validation and benchmark. The model will be
exported in a way that each layer's result will be compared with the CPU implementation.
The timings for each layer will be displayed.
In case of error, you will be able to enter debug mode, showing in-layer data or
changing the inputs of the layer, to isolate the source of the issue.
"""

for node in model.get_nodes():
    node.attributes().dev_mode = DEV_MODE

# --------------------------------------------------------------
# EXPORT THE MODEL
# --------------------------------------------------------------

aidge_export_cpp.export(EXPORT_FOLDER,
                        model,
                        scheduler,
                        # tensors[0],
                        labels = aidge_core.Tensor(labels[0]),
                        dev_mode = DEV_MODE,
                        aidge_cmp = AIDGE_CMP,
                        memory_manager_args = {"wrapping": True} if args.mem_wrap else {})
print("\n### Compiling the export ###")
try:
    for std_line in aidge_core.utils.run_command(["make"], cwd=EXPORT_FOLDER):
        print(std_line, end="")
except subprocess.CalledProcessError as e:
            raise RuntimeError(0, f"An error occurred, failed to build export.") from e
print("\n### Running the export ###")
try:
    for std_line in aidge_core.utils.run_command(["./bin/run_export"], cwd=EXPORT_FOLDER):
        print(std_line, end="")
except subprocess.CalledProcessError as e:
    raise RuntimeError(0, f"An error occurred, failed to run export.") from e