diff --git a/aidge_quantization/unit_tests/test_ptq.py b/aidge_quantization/unit_tests/test_ptq.py index 56080bff0d1f4a95248fa983316dbafd35565501..f6b243c27b1a08dbbfc5da522e385ceb4ec9c2f4 100644 --- a/aidge_quantization/unit_tests/test_ptq.py +++ b/aidge_quantization/unit_tests/test_ptq.py @@ -1,118 +1,137 @@ import unittest -import gzip import numpy as np -from pathlib import Path - +import gzip import aidge_core -import aidge_backend_cpu import aidge_onnx +import aidge_backend_cpu import aidge_quantization +import sys +from pathlib import Path -from aidge_core import Log, Level - +""" + Unit test for the PTQ pipeline + ============================== + This script is designed to test and validate the accuracy of five small topologies on the MNIST dataset : + ["MiniResNet", "ConvNet", "BranchNetV4", "TestNet", "MLP"] + It compares the results for three configurations : baseline, quantization, and quantization with single shift. + The value of delta represents the tolerance of the tests. +""" +aidge_core.Log.set_console_level(aidge_core.Level.Error) # Reduce useless logs # -------------------------------------------------------------- -# CONFIGS +# CONFIGURATION # -------------------------------------------------------------- -NB_SAMPLES = 1000 # max : 1000 -SAMPLE_SHAPE = (1, 1, 28, 28) -MODEL_NAME = 'MiniResNet.onnx' # 'ConvNet.onnx' -ACCURACIES = (95.4, 94.4) # (97.9, 97.7) -NB_BITS = 4 +NB_SAMPLES = 1000 +SAMPLE_SHAPE = (1, 1, 28, 28) +NB_BITS = 4 +TARGET_TYPE = aidge_core.dtype.int32 +CLIPPING = aidge_quantization.Clipping.MSE +NO_QUANT = False +OPTIM_SIGNS = True +FOLD_GRAPH = True +DELTA = 0.05 + +EXPECTED_RESULTS = { + "MiniResNet.onnx" : (95.4, 94.4, 95.0), + "ConvNet.onnx" : (97.9, 97.2, 96.7), + "BranchNetV4.onnx" : (93.8, 92.7, 93.7), + "TestNet.onnx" : (95.5, 94.0, 94.5), + "MLP.onnx" : (94.7, 92.9, 93.8) +} # -------------------------------------------------------------- # UTILS # -------------------------------------------------------------- def propagate(model, scheduler, sample): + sample = np.reshape(sample, SAMPLE_SHAPE) input_tensor = aidge_core.Tensor(sample) scheduler.forward(True, [input_tensor]) output_node = model.get_output_nodes().pop() output_tensor = output_node.get_operator().get_output(0) return np.array(output_tensor) -def prepare_sample(sample): - sample = np.reshape(sample, SAMPLE_SHAPE) - return sample.astype('float32') - def compute_accuracy(model, samples, labels): - acc = 0 scheduler = aidge_core.SequentialScheduler(model) - for i, sample in enumerate(samples): - x = prepare_sample(sample) - y = propagate(model, scheduler, x) - if labels[i] == np.argmax(y): - acc += 1 + acc = sum(labels[i] == np.argmax(propagate(model, scheduler, x)) for i, x in enumerate(samples)) return acc / len(samples) # -------------------------------------------------------------- # TEST CLASS # -------------------------------------------------------------- -class test_ptq(unittest.TestCase): +class TestQuantization(unittest.TestCase): def setUp(self): - - # load the samples / labels (numpy) - curr_file_dir = Path(__file__).parent.resolve() self.samples = np.load(gzip.GzipFile(curr_file_dir / 'assets/mnist_samples.npy.gz', "r")) - self.labels = np.load(gzip.GzipFile(curr_file_dir / 'assets/mnist_labels.npy.gz', "r")) - - # load the model in AIDGE + self.labels = np.load(gzip.GzipFile(curr_file_dir / 'assets/mnist_labels.npy.gz', "r")) + self.quant_samples = np.round(self.samples.copy() * (2**(NB_BITS-1)-1)) - self.model = aidge_onnx.load_onnx(curr_file_dir / "assets/" / MODEL_NAME, verbose=False) - aidge_core.remove_flatten(self.model) + def run_model_test(self, model_name): - self.model.set_datatype(aidge_core.dtype.float32) - self.model.set_backend("cpu") + expected_base, expected_quant, expected_quant_ssa = EXPECTED_RESULTS[model_name] - def tearDown(self): - pass + # load the model ... + model_path = Path(__file__).parent / "assets" / model_name + model = aidge_onnx.load_onnx(model_path, verbose=False) + aidge_core.remove_flatten(model) - def test_model(self): + model.set_datatype(aidge_core.dtype.float32) + model.set_backend("cpu") + + # create the tensor subset - Log.set_console_level(Level.Info) - # compute the base accuracy - accuracy = compute_accuracy(self.model, self.samples[0:NB_SAMPLES], self.labels) - self.assertAlmostEqual(accuracy * 100, ACCURACIES[0], msg='base accuracy does not meet the baseline !', delta=0.1) + tensors = [aidge_core.Tensor(np.reshape(sample, SAMPLE_SHAPE)) for sample in self.samples[:NB_SAMPLES]] - def test_quant_model(self): + # BASELINE ACCURACY - Log.set_console_level(Level.Debug) + base_accuracy = compute_accuracy(model, tensors, self.labels[:NB_SAMPLES]) + self.assertAlmostEqual(base_accuracy * 100, expected_base, delta=DELTA, msg=f"[X] Baseline accuracy mismatch for {model_name}. Expected accuracy was: {expected_base}, but got: {base_accuracy * 100}") - # create the calibration dataset - - tensors = [] - for sample in self.samples[0:NB_SAMPLES]: - sample = prepare_sample(sample) - tensor = aidge_core.Tensor(sample) - tensors.append(tensor) - - # quantize the model + # QUANTIZED ACCURACY aidge_quantization.quantize_network( - self.model, - NB_BITS, - tensors, - clipping_mode=aidge_quantization.Clipping.MSE, - no_quantization=False, - optimize_signs=True, - single_shift=False - ) - - # rescale the inputs - - scaling = 2**(NB_BITS-1)-1 - for i in range(NB_SAMPLES): - self.samples[i] = self.samples[i]*scaling # XXX np.round ??? - - # compute the quantized accuracy - - accuracy = compute_accuracy(self.model, self.samples, self.labels) - self.assertAlmostEqual(accuracy * 100, ACCURACIES[1], msg='quantized accuracy does not meet the baseline !', delta=0.1) - + network=model, + nb_bits=NB_BITS, + calibration_set=tensors, + target_type=TARGET_TYPE, + clipping_mode=CLIPPING, + no_quant=NO_QUANT, + optimize_signs=OPTIM_SIGNS, + single_shift=False, + use_cuda=False, + fold_graph=FOLD_GRAPH) + + quant_accuracy = compute_accuracy(model, self.quant_samples[:NB_SAMPLES], self.labels) + self.assertAlmostEqual(quant_accuracy * 100, expected_quant, delta=DELTA, msg=f"[X] Quantized accuracy mismatch for {model_name}. Expected accuracy was: {expected_quant}, but got: {quant_accuracy * 100}") + + # QUANTIZED ACCURACY WITH SSA + + model = aidge_onnx.load_onnx(model_path, verbose=False) + model.set_datatype(aidge_core.dtype.float32) + model.set_backend("cpu") + + aidge_quantization.quantize_network( + network=model, + nb_bits=NB_BITS, + calibration_set=tensors, + target_type=TARGET_TYPE, + clipping_mode=CLIPPING, + no_quant=NO_QUANT, + optimize_signs=OPTIM_SIGNS, + single_shift=True, + use_cuda=False, + fold_graph=FOLD_GRAPH) + + quant_accuracy_ssa = compute_accuracy(model, self.quant_samples[:NB_SAMPLES], self.labels) + self.assertAlmostEqual(quant_accuracy_ssa * 100, expected_quant_ssa, delta=DELTA, msg=f"[X] Quantized accuracy (with SSA) mismatch for {model_name}. Expected accuracy was: {expected_quant_ssa}, but got: {quant_accuracy_ssa * 100}") + + def test_models(self): + for model in EXPECTED_RESULTS.keys(): + with self.subTest(model=model): + self.run_model_test(model) if __name__ == '__main__': unittest.main()