Skip to content
Snippets Groups Projects

Real quantization cast for PTQ

Merged Noam Zerah requested to merge noamzerah/aidge_quantization:quantization_cast into dev
4 files
+ 93
222
Compare changes
  • Side-by-side
  • Inline
Files
4
import unittest
import gzip
import numpy as np
from pathlib import Path
import gzip
import aidge_core
import aidge_backend_cpu
import aidge_onnx
import aidge_backend_cpu
import aidge_quantization
import sys
from pathlib import Path
from aidge_core import Log, Level
"""
Unit test for the PTQ pipeline:
This script is designed to test and validate the accuracy of five small model topologies on the MNIST dataset:
["MiniResNet", "ConvNet", "BranchNetV4", "TestNet", "MLP"]
It compares the results for three configurations: the baseline, quantization, and quantization with single shift.
The value of sigma represents the tolerance for the tests.
"""
aidge_core.Log.set_console_level(aidge_core.Level.Error) # Reduce useless logs
# --------------------------------------------------------------
# CONFIGS
# CONFIGURATION
# --------------------------------------------------------------
NB_SAMPLES = 1000 # max : 1000
SAMPLE_SHAPE = (1, 1, 28, 28)
MODEL_NAME = 'MiniResNet.onnx' # 'ConvNet.onnx'
ACCURACIES = (95.4, 94.4) # (97.9, 97.7)
NB_BITS = 4
NB_SAMPLES = 1000
SAMPLE_SHAPE = (1, 1, 28, 28)
NB_BITS = 4
CLIPPING = aidge_quantization.Clipping.MSE
EXPECTED_RESULTS = {
"MiniResNet.onnx": (95.4, 94.5, 94.7),
"ConvNet.onnx": (97.9, 97.7, 97.4),
"BranchNetV4.onnx": (93.8, 93.2, 93.7),
"TestNet.onnx": (95.5, 94.2, 94.2),
"MLP.onnx": (94.7, 94.2, 93.3)
}
SIGMA = 0.05
# --------------------------------------------------------------
# UTILS
# --------------------------------------------------------------
def propagate(model, scheduler, sample):
sample = np.reshape(sample, SAMPLE_SHAPE)
input_tensor = aidge_core.Tensor(sample)
scheduler.forward(True, [input_tensor])
output_node = model.get_output_nodes().pop()
output_tensor = output_node.get_operator().get_output(0)
return np.array(output_tensor)
def prepare_sample(sample):
sample = np.reshape(sample, SAMPLE_SHAPE)
return sample.astype('float32')
def compute_accuracy(model, samples, labels):
acc = 0
scheduler = aidge_core.SequentialScheduler(model)
for i, sample in enumerate(samples):
x = prepare_sample(sample)
y = propagate(model, scheduler, x)
if labels[i] == np.argmax(y):
acc += 1
schedueler = aidge_core.SequentialScheduler(model)
acc = sum(labels[i] == np.argmax(propagate(model, schedueler, x)) for i, x in enumerate(samples))
return acc / len(samples)
# --------------------------------------------------------------
# TEST CLASS
# --------------------------------------------------------------
class test_ptq(unittest.TestCase):
class TestQuantization(unittest.TestCase):
def setUp(self):
# load the samples / labels (numpy)
curr_file_dir = Path(__file__).parent.resolve()
self.samples = np.load(gzip.GzipFile(curr_file_dir / 'assets/mnist_samples.npy.gz', "r"))
self.labels = np.load(gzip.GzipFile(curr_file_dir / 'assets/mnist_labels.npy.gz', "r"))
# load the model in AIDGE
self.model = aidge_onnx.load_onnx(curr_file_dir / "assets/" / MODEL_NAME, verbose=False)
aidge_core.remove_flatten(self.model)
self.model.set_datatype(aidge_core.dtype.float32)
self.model.set_backend("cpu")
def tearDown(self):
pass
def test_model(self):
Log.set_console_level(Level.Info)
# compute the base accuracy
accuracy = compute_accuracy(self.model, self.samples[0:NB_SAMPLES], self.labels)
self.assertAlmostEqual(accuracy * 100, ACCURACIES[0], msg='base accuracy does not meet the baseline !', delta=0.1)
def test_quant_model(self):
Log.set_console_level(Level.Debug)
# create the calibration dataset
tensors = []
for sample in self.samples[0:NB_SAMPLES]:
sample = prepare_sample(sample)
tensor = aidge_core.Tensor(sample)
tensors.append(tensor)
# quantize the model
aidge_quantization.quantize_network(
self.model,
NB_BITS,
tensors,
clipping_mode=aidge_quantization.Clipping.MSE,
no_quantization=False,
optimize_signs=True,
single_shift=False
)
# rescale the inputs
scaling = 2**(NB_BITS-1)-1
for i in range(NB_SAMPLES):
self.samples[i] = self.samples[i]*scaling # XXX np.round ???
# compute the quantized accuracy
accuracy = compute_accuracy(self.model, self.samples, self.labels)
self.assertAlmostEqual(accuracy * 100, ACCURACIES[1], msg='quantized accuracy does not meet the baseline !', delta=0.1)
self.labels = np.load(gzip.GzipFile(curr_file_dir / 'assets/mnist_labels.npy.gz', "r"))
self.quantized_sample = np.load(gzip.GzipFile(curr_file_dir / 'assets/mnist_samples.npy.gz', "r")) * ((1 << (NB_BITS - 1)) - 1)
def run_model_test(self, model_name):
model_path = Path(__file__).parent / "assets" / model_name
model = aidge_onnx.load_onnx(model_path, verbose=False)
aidge_core.remove_flatten(model)
model.set_datatype(aidge_core.dtype.float64)
model.set_backend("cpu")
expected_base, expected_quant, expected_quant_ss = EXPECTED_RESULTS[model_name]
# Baseline Accuracy
base_accuracy = compute_accuracy(model, self.samples[:NB_SAMPLES], self.labels)
self.assertAlmostEqual(base_accuracy * 100, expected_base, delta=SIGMA, msg=f"[X] Baseline accuracy mismatch for {model_name}. Expected accuracy was: {expected_base}, but got: {base_accuracy * 100}")
# Quantize
tensors = [aidge_core.Tensor(np.reshape(sample, SAMPLE_SHAPE)) for sample in self.samples[:NB_SAMPLES]]
aidge_quantization.quantize_network(network = model,
nb_bits = NB_BITS,
input_dataset = tensors,
clipping_mode = CLIPPING,
target_type = aidge_core.dtype.float64,
no_quantization = False,
optimize_signs = True,
single_shift = False,
use_cuda = False,
fold_graph = True,
bitshift_rounding = False,
verbose = False)
quant_accuracy = compute_accuracy(model, self.quantized_sample[:NB_SAMPLES], self.labels)
self.assertAlmostEqual(quant_accuracy * 100, expected_quant, delta=SIGMA, msg=f"[X] Quantized accuracy mismatch for {model_name},Expected accuracy was: {expected_quant}, but got: {quant_accuracy * 100}")
# Quantize with Single Shift
model_ss = aidge_onnx.load_onnx(model_path, verbose=False)
aidge_core.remove_flatten(model_ss)
model_ss.set_datatype(aidge_core.dtype.float64)
model_ss.set_backend("cpu")
aidge_quantization.quantize_network(network = model_ss,
nb_bits = NB_BITS,
input_dataset = tensors,
clipping_mode = CLIPPING,
target_type = aidge_core.dtype.float64,
no_quantization = False,
optimize_signs = True,
single_shift = True,
use_cuda = False,
fold_graph = True,
bitshift_rounding = False,
verbose = False)
quant_accuracy_ss = compute_accuracy(model_ss, self.quantized_sample[:NB_SAMPLES], self.labels)
self.assertAlmostEqual(quant_accuracy_ss * 100, expected_quant_ss, delta=SIGMA, msg=f"[X] Quantized Single Shift accuracy mismatch for {model_name}.Expected accuracy was: {expected_quant_ss}, but got: {quant_accuracy_ss * 100}")
def test_models(self):
for model in EXPECTED_RESULTS.keys():
with self.subTest(model=model):
self.run_model_test(model)
if __name__ == '__main__':
unittest.main()
Loading