Skip to content
Snippets Groups Projects
Commit 6754e159 authored by Cyril Moineau's avatar Cyril Moineau
Browse files

Fix LeNet int8 export

* Add code to download model from Hugging Face
 * Fix set_backend that was wrongly placed
 * Update code to compile and execute the export inside the script
parent f961aeeb
No related branches found
No related tags found
2 merge requests!490.3.1,!32[Feat] Add quantized exports for Resnet18 and LeNet
Pipeline #72702 passed
"""
create_lenet.py
This file creates a simple lenet network using the MNIST dataset.
It is meant to be used by the lenet.py file.
"""
import random
import torch
from torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.functional as F
# Download the MNIST Dataset
def get_mnist_dataset():
transform = transforms.ToTensor()
train_set = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_set = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
return train_set, test_set
# Create the lenet model
class Classifier(torch.nn.Module):
def __init__(self):
super().__init__()
self.network = nn.Sequential(
nn.Conv2d(1, 32, 5), # 28 -> 24
nn.ReLU(),
nn.MaxPool2d(2, 2), # 24 -> 12
nn.Conv2d(32, 32, 5), # 12 -> 8
nn.ReLU(),
nn.MaxPool2d(2, 2), # 8 -> 4
nn.Flatten(),
nn.Linear(32*4*4, 100),
nn.ReLU(),
nn.Linear(100, 100),
nn.ReLU(),
nn.Linear(100, 10)
)
def forward(self, x):
return self.network(x)
# Compute accuracy function
def compute_accuracy(model, data_set, nb_samples):
nb_valid = 0
for it in range(nb_samples):
# get a sample
sample_idx = torch.randint(len(data_set), size=(1,)).item()
img, label = data_set[sample_idx]
# compute the output
x = torch.reshape(img, (1,1,28,28))
y_h = model.forward(x)
pred_label = torch.argmax(y_h).item()
if label == pred_label :
nb_valid = nb_valid + 1
return nb_valid / nb_samples
# Train the model
def train_model(NB_ITERATION, CHECK_PERIOD, train_set, test_set, classifier):
accuracy_history = []
for it in range(NB_ITERATION):
sample_idx = random.randint(0, len(train_set)-1)
img, label = train_set[sample_idx]
x = torch.flatten(img)
x = torch.reshape(x, (1,1,28,28))
y = torch.zeros(1,10)
y[0][label] = 1
y_h = classifier.forward(x)
#print(y_h.shape, 'test')
l = F.mse_loss(y, y_h)
l.backward()
for p in classifier.parameters():
with torch.no_grad():
p -= 0.01 * p.grad
p.grad.zero_()
if it % CHECK_PERIOD == 0:
accuracy = compute_accuracy(classifier, test_set, CHECK_PERIOD)
accuracy_history.append(accuracy)
print(f'it {it}: accuracy = {accuracy:.8f} ')
def create_lenet():
# Get Dataset
train_set, test_set = get_mnist_dataset()
# Create model
classifier = Classifier()
# Train model
NB_ITERATION = 50000
CHECK_PERIOD = 3000
print("NB_ITERATIONS = ", NB_ITERATION)
print("CHECK_PERIOD = ", CHECK_PERIOD)
print("\nTraining LeNet...")
train_model(NB_ITERATION, CHECK_PERIOD, train_set, test_set, classifier)
# Export as ONNX
x = torch.Tensor(1,1,28,28)
torch.onnx.export(classifier.network, x, 'lenet.onnx', verbose=False, input_names=[ "input" ], output_names=[ "output" ])
\ No newline at end of file
"""
lenet.py
Run this file to export a LeNet using the Aidge CPP Export module.
Run this file to export a LeNet using the Aidge CPP Export module.
"""
import os
import shutil
import random
import aidge_core.utils
import numpy as np
import subprocess
# Aidge Modules
import aidge_core
......@@ -67,7 +69,7 @@ elif args.verbose == 1:
elif args.verbose == 2:
aidge_core.Log.set_console_level(aidge_core.Level.Info)
elif args.verbose >= 3:
aidge_core.Log.set_console_level(aidge_core.Level.Debug)
aidge_core.Log.set_console_level(aidge_core.Level.Debug)
if USE_CUDA:
import aidge_backend_cuda
......@@ -79,39 +81,39 @@ if USE_CUDA:
"""
Export configuration details :
- RNG_SEED : Fix a random seed for torch to always get the same images from the dataset,
therefore always getting the same output.
therefore always getting the same output.
- NB_TEST : Number of example inferences to perform (used to get an accuracy approximation).
- NB_CALIB : Number of samples used for the calibration step of quantization.
- MODEL_NAME : Should be the same name as the onnx file you want to load and export.
- NB_CALIB : Number of samples used for the calibration step of quantization.
- MODEL_NAME : Should be the same name as the onnx file you want to load and export.
- DO_EXAMPLES : Perform example inferences (and allow to get accuracy approximation)
- NB_BITS : Quantization output precision. Should be 8 to work with this export.
- NB_BITS : Quantization output precision. Should be 8 to work with this export.
- TARGET_TYPE : The aidge datatype for tensors to be casted after the quantization step [float64, float32, int32].
- OPTIM_SIGN : Quantization optional optimization based on data sign.
- OPTIM_SIGN : Quantization optional optimization based on data sign.
- SINGLE_SHIFT : Quantization option specifying if inserted scaling nodes should be
single shift or floating point.
- NO_QUANT : Skip the quantization step.
- CLIPPING : Clipping method during quantization.
- CLIPPING : Clipping method during quantization.
- FOLD_GRAPH : The quantization step adds cast nodes to cast the graph into the given TARGET_TYPE.
Enabling the FOLD_GRAPH will automatically fold these nodes into the following
ones at the end of quantization step.
ones at the end of quantization step.
- USE_CUDA : Determine if the quantization step uses the GPU. It is generally recommended
to enable this option if you have access to GPUs as the quantization step
may take a while to complete.
- DEV_MODE : The dev mode allows to identify errors more easily exporting the model with
may take a while to complete.
- DEV_MODE : The dev mode allows to identify errors more easily exporting the model with
symbolic links enabling to modify the source files directly in the
generated export (make sure you installed the export plugin running
`pip install -e .`).
Enabled running this python file, adding the --dev argument.
`pip install -e .`).
Enabled running this python file, adding the --dev argument.
- AIDGE_CMP : Saves and export the outputs generated by the aidge inferences in order
to compare it with the export outputs.
Enabled running this python file, adding the --aidge_cmp argument.
to compare it with the export outputs.
Enabled running this python file, adding the --aidge_cmp argument.
"""
print(" Available backends : ", aidge_core.Tensor.get_available_backends())
quantize_model = False
NB_BITS = 32
TARGET_TYPE = aidge_core.dtype.float32
TARGET_TYPE = aidge_core.dtype.float32
if args.dtype == "float32":
quantize_model = False
......@@ -124,7 +126,7 @@ else:
print(f"[ERROR] Supported datatypes : {supported_types}.")
exit(1)
RNG_SEED = 1234
RNG_SEED = 1234
NB_TEST = 10 # Example inferences
NB_CALIB = 20 # Calibration set
MODEL_NAME = 'lenet'
......@@ -135,7 +137,7 @@ DO_EXAMPLES = True
OPTIM_SIGN = False
SINGLE_SHIFT = True
ROUNDING = True
NO_QUANT = False
NO_QUANT = False
CLIPPING = aidge_quantization.Clipping.MSE # 'MAX'
FOLD_GRAPH = True
......@@ -164,16 +166,14 @@ backend = "cuda" if USE_CUDA else "cpu"
# CREATE THE LENET MODEL
# ------------------------------------------------------------
"""
The LeNet model is created and trained using the create_lenet file.
If a lenet.onnx file is already present in the current folder, this step will be skiped.
The generated network is not yet quantized.
The LeNet model is created and trained using the create_lenet file.
If a lenet.onnx file is already present in the current folder, this step will be skiped.
The generated network is not yet quantized.
"""
from create_lenet import create_lenet
if not os.path.isfile("./lenet.onnx"):
print("\nTraining LeNet...")
create_lenet()
# Define the target path and filename
file_url = "https://huggingface.co/EclipseAidge/LeNet/resolve/main/lenet_mnist.onnx?download=true"
file_path = MODEL_NAME + "_mnist.onnx"
aidge_core.utils.download_file(file_path, file_url)
# --------------------------------------------------------------
# CREATE THE SAMPLES
......@@ -185,8 +185,8 @@ test_set = datasets.MNIST(root='./data', train=False, transform=transform, down
tensors = []
labels = []
index = 0
for input, label in test_set:
array = np.array(input)
for in_tensor, label in test_set:
array = np.array(in_tensor)
array = np.reshape(array, (1, 1, 28, 28))
tensor = aidge_core.Tensor(array)
tensor.set_backend(backend)
......@@ -204,11 +204,11 @@ for input, label in test_set:
"""
Load the .onnx model and perform some usual graph modifications :
- Remove the flatten nodes;
- Fuse the batchnorm nodes into the biases producers.
- Fuse the batchnorm nodes into the biases producers.
- Expand the metaOperators to perform the desired fusions.
"""
model = aidge_onnx.load_onnx(MODEL_NAME + ".onnx", verbose=False)
model = aidge_onnx.load_onnx(file_path, verbose=False)
aidge_core.remove_flatten(model)
aidge_core.fuse_batchnorm(model)
aidge_core.expand_metaops(model)
......@@ -220,14 +220,14 @@ model.save("imported_model")
"""
The scheduler is an ordered version of the model, allowing to schedule
nodes to be able to run inferences, for instance.
nodes to be able to run inferences, for instance.
"""
# Set up the backend
model.set_datatype(aidge_core.dtype.float32)
model.set_backend(backend)
# Create the Scheduler
# Create the Scheduler
scheduler = aidge_core.SequentialScheduler(model)
# --------------------------------------------------------------
......@@ -235,11 +235,12 @@ scheduler = aidge_core.SequentialScheduler(model)
# --------------------------------------------------------------
def propagate(model, scheduler, tensor):
"""
"""
Propagate the given tensor into the model and return the
output tensor.
output tensor.
"""
# Run the inference
print(f"Propagate: {tensor.backend()}")
# Run the inference
scheduler.forward(True, [tensor])
# Gather the results
output_node = model.get_output_nodes().pop()
......@@ -253,6 +254,7 @@ if (DO_EXAMPLES):
nb_valid = 0
base_values = []
for i in range(NB_TEST):
print(f"Inférence: {tensors[i].backend()}")
output_array = propagate(model, scheduler, tensors[i])
print(labels[i], ' VS ', np.argmax(output_array), ' -> ', np.max(output_array))
base_values.append(np.max(output_array))
......@@ -263,18 +265,18 @@ if (DO_EXAMPLES):
# --------------------------------------------------------------
# PERFORM THE QUANTIZATION
# --------------------------------------------------------------
# --------------------------------------------------------------
if quantize_model:
aidge_quantization.quantize_network(
network = model,
nb_bits = NB_BITS,
calibration_set = tensors[0:NB_CALIB],
network = model,
nb_bits = NB_BITS,
calibration_set = tensors[0:NB_CALIB],
clipping_mode = CLIPPING,
target_type = TARGET_TYPE,
no_quant = NO_QUANT,
optimize_signs = OPTIM_SIGN,
single_shift = SINGLE_SHIFT,
optimize_signs = OPTIM_SIGN,
single_shift = SINGLE_SHIFT,
use_cuda = USE_CUDA,
fold_graph = FOLD_GRAPH)
......@@ -293,31 +295,36 @@ model.save("post_ptq_model")
# --------------------------------------------------------------
"""
Once the quantization is done, the graph now only accepts integer inputs.
Once the quantization is done, the graph now only accepts integer inputs.
So we need to rescale the dataset for the data to be within [0, 255].
Also, tensors should be casted to be the same type as TARGET_TYPE.
Also, tensors should be casted to be the same type as TARGET_TYPE.
"""
if quantize_model:
rescaling = 2**(NB_BITS-1)-1
for i in range(NB_TEST):
array = np.array(tensors[i]) * rescaling
tensors[i].set_backend("cpu")
array = np.array(tensors[i]) * rescaling
array = np.round(array).astype(int)
tensors[i] = aidge_core.Tensor(array)
tensors[i].set_datatype(TARGET_TYPE)
tensors[i].set_backend("cpu")
# Setting modele to CPU for export
model.set_backend("cpu")
# --------------------------------------------------------------
# GENERATE NEW SCHEDULER
# --------------------------------------------------------------
"""
Each time the graph has been change, it has to be reset.
Here some Quantizer and Cast nodes have been added.
Each time the graph has been change, it has to be reset.
Here some Quantizer and Cast nodes have been added.
"""
""" [Issue]
We need first to manually add an input tensor with the correct datatype,
as it is not automatically done in PTQ.
We need first to manually add an input tensor with the correct datatype,
as it is not automatically done in PTQ.
"""
if quantize_model:
input_node = model.get_ordered_inputs()[0]
......@@ -329,10 +336,11 @@ if quantize_model:
# --------------------------------------------------------------
if (DO_EXAMPLES and quantize_model):
print('\n QUANTIZED EXAMPLE INFERENCES :')
print('\n QUANTIZED EXAMPLE INFERENCES:')
nb_valid = 0
post_values = []
for i in range(NB_TEST):
print(f"QEI: {tensors[i].backend()}")
output_array = propagate(model, scheduler, tensors[i])
print(labels[i], ' VS ', np.argmax(output_array), ' -> ', np.max(output_array))
post_values.append(np.max(output_array))
......@@ -343,12 +351,6 @@ if (DO_EXAMPLES and quantize_model):
print('\n MODEL ACCURACY = ', accuracy * 100, '%')
print('\n QUANTIZED ACCURACY = ', quant_accuracy * 100, '%')
output_array = propagate(model, scheduler, tensors[0])
if USE_CUDA:
model.set_backend("cpu")
for tensor in tensors:
tensor.set_backend("cpu")
# --------------------------------------------------------------
# FUSE NODES INTO METAOPS
......@@ -361,28 +363,28 @@ are performed separately (Pad -> Conv -> Quantizer -> ReLU -> ...).
However within the CPP export, some core operators are merged
in meta operators. For instance, the padding, scaling and ReLU are
performed within the Conv kernel.
performed within the Conv kernel.
In this step, we use graph regex techniques to find the desired patterns
within the graph in order to match the export implementation of the kernels.
within the graph in order to match the export implementation of the kernels.
"""
# Exclude unwanted producers
# Exclude unwanted producers
"""
Before fusing the nodes, we set a tag on the Producers in order to exclude
from the export the ones holding coefficients, as they are directly handled
within the layers parameters.
within the layers parameters.
"""
exclude_unwanted_producers(model)
# Fuse nodes
cpp_fuse_to_metaops(model)
# Remove optional inputs
# Remove optional inputs
"""
Some optional inputs may be added by the quantization step (for instance with the clipping nodes).
Here we make sure that they will not be considered as actual graph inputs by the export, by
excluding them from the ordered_inputs list of the model.
excluding them from the ordered_inputs list of the model.
"""
remove_optional_inputs(model)
......@@ -395,7 +397,7 @@ scheduler.reset_scheduling()
# Name newly created MetaOps
"""
As names are optional in Aidge, the fuse_to_metaops function will not automatically
give a name to the newly created metaOps. However, in an export context, we need
give a name to the newly created metaOps. However, in an export context, we need
our operators to be named, as this will be used to name the corresponding files.
"""
......@@ -407,11 +409,11 @@ set_nodes_names(scheduler)
# --------------------------------------------------------------
"""
Here a final inference is made on the input we want to export and run.
Here a final inference is made on the input we want to export and run.
This will ensure that all the feature maps tensors (between the layers)
hold the data corresponding to this specific input.
hold the data corresponding to this specific input.
Then, the "log_outputs()" function (called later) will store these tensors
into log files that may be exported as well for comparison purpose.
into log files that may be exported as well for comparison purpose.
"""
output_array = propagate(model, scheduler, tensors[0])
......@@ -436,7 +438,7 @@ if quantize_model:
# Store tensors values into log files
"""
Once the tensors have been casted, the log_outputs() function can be
called to store their values into log files.
called to store their values into log files.
"""
if os.path.isdir("log_outputs"):
......@@ -448,11 +450,11 @@ model.log_outputs("log_outputs")
# --------------------------------------------------------------
"""
The test mode is mainly used for validation and benchmark. The model will be
exported in a way that each layer's result will be compared with the CPU implementation.
The timings for each layer will be displayed.
In case of error, you will be able to enter debug mode, showing in-layer data or
changing the inputs of the layer, to isolate the source of the issue.
The test mode is mainly used for validation and benchmark. The model will be
exported in a way that each layer's result will be compared with the CPU implementation.
The timings for each layer will be displayed.
In case of error, you will be able to enter debug mode, showing in-layer data or
changing the inputs of the layer, to isolate the source of the issue.
"""
for node in model.get_nodes():
......@@ -463,12 +465,12 @@ for node in model.get_nodes():
# --------------------------------------------------------------
"""
If the --aidge_cmp option is enabled, the feature maps generated by aidge with the
If the --aidge_cmp option is enabled, the feature maps generated by aidge with the
backend cpu will be exported in the generated export. It will be used as reference
to verify that the results with the optimized kernels are correct for the exported
model.
model.
This option has to be passed to each node in order to be used within the Export Nodes.
(JConv, JPad, ...) that you can find in the "operators" folder.
(JConv, JPad, ...) that you can find in the "operators" folder.
"""
if AIDGE_CMP:
......@@ -481,10 +483,22 @@ if AIDGE_CMP:
model.save("exported_model")
aidge_export_cpp.export(EXPORT_FOLDER,
model,
scheduler,
aidge_export_cpp.export(EXPORT_FOLDER,
model,
scheduler,
# tensors[0],
labels = aidge_core.Tensor(labels[0]),
labels = aidge_core.Tensor(labels[0]),
dev_mode = DEV_MODE,
aidge_cmp = AIDGE_CMP)
print("\n### Compiling the export ###")
try:
for std_line in aidge_core.utils.run_command(["make"], cwd=EXPORT_FOLDER):
print(std_line, end="")
except subprocess.CalledProcessError as e:
raise RuntimeError(0, f"An error occurred, failed to build export.") from e
print("\n### Running the export ###")
try:
for std_line in aidge_core.utils.run_command(["./bin/run_export"], cwd=EXPORT_FOLDER):
print(std_line, end="")
except subprocess.CalledProcessError as e:
raise RuntimeError(0, f"An error occurred, failed to run export.") from e
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment