Skip to content
Snippets Groups Projects

Real quantization cast for PTQ

Merged Noam Zerah requested to merge noamzerah/aidge_quantization:quantization_cast into dev
4 files
+ 93
222
Compare changes
  • Side-by-side
  • Inline
Files
4
import unittest
import unittest
import gzip
import numpy as np
import numpy as np
from pathlib import Path
import gzip
import aidge_core
import aidge_core
import aidge_backend_cpu
import aidge_onnx
import aidge_onnx
 
import aidge_backend_cpu
import aidge_quantization
import aidge_quantization
 
import sys
 
from pathlib import Path
from aidge_core import Log, Level
"""
Unit test for the PTQ pipeline:
 
This script is designed to test and validate the accuracy of five small model topologies on the MNIST dataset:
 
["MiniResNet", "ConvNet", "BranchNetV4", "TestNet", "MLP"]
 
It compares the results for three configurations: the baseline, quantization, and quantization with single shift.
 
The value of sigma represents the tolerance for the tests.
 
"""
 
aidge_core.Log.set_console_level(aidge_core.Level.Error) # Reduce useless logs
# --------------------------------------------------------------
# --------------------------------------------------------------
# CONFIGS
# CONFIGURATION
# --------------------------------------------------------------
# --------------------------------------------------------------
NB_SAMPLES = 1000 # max : 1000
NB_SAMPLES = 1000
SAMPLE_SHAPE = (1, 1, 28, 28)
SAMPLE_SHAPE = (1, 1, 28, 28)
MODEL_NAME = 'MiniResNet.onnx' # 'ConvNet.onnx'
NB_BITS = 4
ACCURACIES = (95.4, 94.4) # (97.9, 97.7)
CLIPPING = aidge_quantization.Clipping.MSE
NB_BITS = 4
EXPECTED_RESULTS = {
 
"MiniResNet.onnx": (95.4, 94.5, 94.7),
 
"ConvNet.onnx": (97.9, 97.7, 97.4),
 
"BranchNetV4.onnx": (93.8, 93.2, 93.7),
 
"TestNet.onnx": (95.5, 94.2, 94.2),
 
"MLP.onnx": (94.7, 94.2, 93.3)
 
}
 
SIGMA = 0.05
# --------------------------------------------------------------
# --------------------------------------------------------------
# UTILS
# UTILS
# --------------------------------------------------------------
# --------------------------------------------------------------
def propagate(model, scheduler, sample):
def propagate(model, scheduler, sample):
 
sample = np.reshape(sample, SAMPLE_SHAPE)
input_tensor = aidge_core.Tensor(sample)
input_tensor = aidge_core.Tensor(sample)
scheduler.forward(True, [input_tensor])
scheduler.forward(True, [input_tensor])
output_node = model.get_output_nodes().pop()
output_node = model.get_output_nodes().pop()
output_tensor = output_node.get_operator().get_output(0)
output_tensor = output_node.get_operator().get_output(0)
return np.array(output_tensor)
return np.array(output_tensor)
def prepare_sample(sample):
sample = np.reshape(sample, SAMPLE_SHAPE)
return sample.astype('float32')
def compute_accuracy(model, samples, labels):
def compute_accuracy(model, samples, labels):
acc = 0
schedueler = aidge_core.SequentialScheduler(model)
scheduler = aidge_core.SequentialScheduler(model)
acc = sum(labels[i] == np.argmax(propagate(model, schedueler, x)) for i, x in enumerate(samples))
for i, sample in enumerate(samples):
x = prepare_sample(sample)
y = propagate(model, scheduler, x)
if labels[i] == np.argmax(y):
acc += 1
return acc / len(samples)
return acc / len(samples)
# --------------------------------------------------------------
# --------------------------------------------------------------
# TEST CLASS
# TEST CLASS
# --------------------------------------------------------------
# --------------------------------------------------------------
class test_ptq(unittest.TestCase):
class TestQuantization(unittest.TestCase):
def setUp(self):
def setUp(self):
# load the samples / labels (numpy)
curr_file_dir = Path(__file__).parent.resolve()
curr_file_dir = Path(__file__).parent.resolve()
self.samples = np.load(gzip.GzipFile(curr_file_dir / 'assets/mnist_samples.npy.gz', "r"))
self.samples = np.load(gzip.GzipFile(curr_file_dir / 'assets/mnist_samples.npy.gz', "r"))
self.labels = np.load(gzip.GzipFile(curr_file_dir / 'assets/mnist_labels.npy.gz', "r"))
self.labels = np.load(gzip.GzipFile(curr_file_dir / 'assets/mnist_labels.npy.gz', "r"))
self.quantized_sample = np.load(gzip.GzipFile(curr_file_dir / 'assets/mnist_samples.npy.gz', "r")) * ((1 << (NB_BITS - 1)) - 1)
# load the model in AIDGE
def run_model_test(self, model_name):
self.model = aidge_onnx.load_onnx(curr_file_dir / "assets/" / MODEL_NAME, verbose=False)
model_path = Path(__file__).parent / "assets" / model_name
aidge_core.remove_flatten(self.model)
model = aidge_onnx.load_onnx(model_path, verbose=False)
aidge_core.remove_flatten(model)
self.model.set_datatype(aidge_core.dtype.float32)
model.set_datatype(aidge_core.dtype.float64)
self.model.set_backend("cpu")
model.set_backend("cpu")
def tearDown(self):
expected_base, expected_quant, expected_quant_ss = EXPECTED_RESULTS[model_name]
pass
# Baseline Accuracy
base_accuracy = compute_accuracy(model, self.samples[:NB_SAMPLES], self.labels)
def test_model(self):
self.assertAlmostEqual(base_accuracy * 100, expected_base, delta=SIGMA, msg=f"[X] Baseline accuracy mismatch for {model_name}. Expected accuracy was: {expected_base}, but got: {base_accuracy * 100}")
Log.set_console_level(Level.Info)
# Quantize
# compute the base accuracy
tensors = [aidge_core.Tensor(np.reshape(sample, SAMPLE_SHAPE)) for sample in self.samples[:NB_SAMPLES]]
accuracy = compute_accuracy(self.model, self.samples[0:NB_SAMPLES], self.labels)
self.assertAlmostEqual(accuracy * 100, ACCURACIES[0], msg='base accuracy does not meet the baseline !', delta=0.1)
aidge_quantization.quantize_network(network = model,
nb_bits = NB_BITS,
def test_quant_model(self):
input_dataset = tensors,
clipping_mode = CLIPPING,
Log.set_console_level(Level.Debug)
target_type = aidge_core.dtype.float64,
no_quantization = False,
# create the calibration dataset
optimize_signs = True,
single_shift = False,
tensors = []
use_cuda = False,
for sample in self.samples[0:NB_SAMPLES]:
fold_graph = True,
sample = prepare_sample(sample)
bitshift_rounding = False,
tensor = aidge_core.Tensor(sample)
verbose = False)
tensors.append(tensor)
quant_accuracy = compute_accuracy(model, self.quantized_sample[:NB_SAMPLES], self.labels)
# quantize the model
self.assertAlmostEqual(quant_accuracy * 100, expected_quant, delta=SIGMA, msg=f"[X] Quantized accuracy mismatch for {model_name},Expected accuracy was: {expected_quant}, but got: {quant_accuracy * 100}")
aidge_quantization.quantize_network(
# Quantize with Single Shift
self.model,
model_ss = aidge_onnx.load_onnx(model_path, verbose=False)
NB_BITS,
aidge_core.remove_flatten(model_ss)
tensors,
model_ss.set_datatype(aidge_core.dtype.float64)
clipping_mode=aidge_quantization.Clipping.MSE,
model_ss.set_backend("cpu")
no_quantization=False,
optimize_signs=True,
aidge_quantization.quantize_network(network = model_ss,
single_shift=False
nb_bits = NB_BITS,
)
input_dataset = tensors,
clipping_mode = CLIPPING,
# rescale the inputs
target_type = aidge_core.dtype.float64,
no_quantization = False,
scaling = 2**(NB_BITS-1)-1
optimize_signs = True,
for i in range(NB_SAMPLES):
single_shift = True,
self.samples[i] = self.samples[i]*scaling # XXX np.round ???
use_cuda = False,
fold_graph = True,
# compute the quantized accuracy
bitshift_rounding = False,
verbose = False)
accuracy = compute_accuracy(self.model, self.samples, self.labels)
self.assertAlmostEqual(accuracy * 100, ACCURACIES[1], msg='quantized accuracy does not meet the baseline !', delta=0.1)
quant_accuracy_ss = compute_accuracy(model_ss, self.quantized_sample[:NB_SAMPLES], self.labels)
self.assertAlmostEqual(quant_accuracy_ss * 100, expected_quant_ss, delta=SIGMA, msg=f"[X] Quantized Single Shift accuracy mismatch for {model_name}.Expected accuracy was: {expected_quant_ss}, but got: {quant_accuracy_ss * 100}")
 
 
def test_models(self):
 
for model in EXPECTED_RESULTS.keys():
 
with self.subTest(model=model):
 
self.run_model_test(model)
if __name__ == '__main__':
if __name__ == '__main__':
unittest.main()
unittest.main()
Loading