Skip to content
Snippets Groups Projects

Draft: Dev ptq

Closed Cyril Moineau requested to merge DevPTQ into master
Files
8
@@ -2,22 +2,43 @@
import aidge_core
import aidge_backend_cpu
import aidge_onnx
import aidge_quantization
import numpy as np
import matplotlib.pyplot as plt
from jinja2 import Environment, FileSystemLoader
import os
import pathlib
import onnx
model = aidge_onnx.load_onnx("MNIST_model/LeNet_MNIST.onnx")
digit = np.load("MNIST_model/digit.npy")
#check if input tensor is signed or not:
input_signed = np.any(digit[:, 0] < 0)
if(input_signed == True):
input_unsigned = False
else:
input_unsigned = True
#print(f"Aidge Input Tensor unsigned : \n{input_unsigned}")
output_model = np.load("MNIST_model/output_digit.npy")
plt.imshow(digit[0][0], cmap='gray')
print(output_model)
input_tensor = aidge_core.Tensor(digit)
print(f"Aidge Input Tensor dimensions: \n{input_tensor.dims()}")
#print(output_model)
#renormalize digit (between 0 and 1)
#digit_norm = (digit-np.min(digit))/(np.max(digit)-np.min(digit))
#renormalize digit
max_el = np.max(digit)
min_el = np.min(digit)
#print(f"max_el = ", {max_el})
#print(f"min_el = ", {min_el})
if(abs(min_el) > max_el):
max_el = abs(min_el)
digit_norm = np.divide(digit, max_el)
input_tensor = aidge_core.Tensor(digit_norm)
#print(f"Aidge Input Tensor : \n{input_tensor}")
## Create Producer Node for the Graph
input_node = aidge_core.Producer(input_tensor, "X")
@@ -29,18 +50,7 @@ input_node.get_operator().set_backend("cpu")
## Link Producer to the Graph
input_node.add_child(model)
nodesRegexes = {}
nodesRegexes["Flatten"] = aidge_core.NodeRegex("Flatten")
# Graph Regex
graphRegex = ["Flatten;"]
graphMatching = aidge_core.GRegex(nodesRegexes, graphRegex)
all_match = graphMatching.match(model)
print('Number of match : ', all_match.get_nb_match())
for mn in all_match.get_match_nodes():
aidge_core.remove_flatten(mn)
aidge_core.remove_flatten(model)
model.save("my_supported_LeNet")
@@ -48,29 +58,109 @@ model.save("my_supported_LeNet")
model.set_datatype(aidge_core.DataType.Float32)
model.set_backend("cpu")
#when we normalize the input we have to rescale also bias in Conv/FC layers, before quantizing
for node in model.get_nodes():
if((node.type()=='Conv') or (node.type()=='FC')):
aidge_quantization.rescale_additive_params(node, max_el)
# Add hooks to every operator
for node in model.get_nodes():
node.get_operator().add_hook("output_range")
# Create SCHEDULER
scheduler = aidge_core.SequentialScheduler(model)
# Run inference !
scheduler.forward()
# # for debug if needed
# for node in model.get_nodes():
# print('node name = ', node.name())
# if(node.name()=='__feature_extractor_0__feature_extractor_0_0_Conv_output_0'):
# output_aidge = np.array(node.get_operator().input(0))
# output_aidge = np.array(node.get_operator().output(0))
# print('Relu output = ', output_aidge)
# # if(node.name()=='/_Flatten_output_0'):
# # output_aidge = np.array(node.get_operator().output(0))
# # print('MaxPool output = ', output_aidge)
# # Assert results
# for outNode in model.get_output_nodes():
# output_aidge = np.array(outNode.get_operator().output(0))
# # assert(np.allclose(output_aidge, output_model,rtol=1e-04))
# print(output_aidge)
# print("=====")
# print(output_model)
#add PTQ part for testing
#created ordered graph, as in cpp and do
nb_bits = 8
aidge_quantization.quantize_network(input_unsigned, scheduler, nb_bits, True)
model.save("my_quantized_LeNet")
#check the output of the quantized model with quantized input
model_quantized = model
#model_quantized = model.clone()
for node in model_quantized.get_nodes():
print('node name = ', node.name())
print('Inputs quantization')
quant_scaling_factor = np.power(2, nb_bits - 1) - 1
digit_quant = np.multiply(digit_norm,quant_scaling_factor)
#print(f'digit_quant', digit_quant)
input_tensor_quant = aidge_core.Tensor(digit_quant)
#print(f"Aidge Input Tensor : \n{input_tensor_quant}")
## Create Producer Node for the Graph
input_node_quant = aidge_core.Producer(input_tensor_quant, "X")
### Configuration for input
input_node_quant.get_operator().set_datatype(aidge_core.DataType.Float32)
input_node_quant.get_operator().set_backend("cpu")
# for debug if needed
## Link Producer to the Graph
input_node_graph = aidge_core.Node
for node in model_quantized.get_nodes():
if(node.name() == '__feature_extractor_0__feature_extractor_0_0_Conv_output_0'):
input_node_graph = node
input_node_quant.add_child(model_quantized,0,(input_node_graph, 0))
# Configure the model
model_quantized.set_datatype(aidge_core.DataType.Float32)
model_quantized.set_backend("cpu")
scheduler_quant = aidge_core.SequentialScheduler(model_quantized)
scheduler_quant.forward(verbose=True)
#if debug is needed at some point
'''
for node in model.get_nodes():
print('node name = ', node.name())
if(node.name()=='/_feature_extractor_0/_feature_extractor_0.1/Relu_output_0'):
output_aidge = np.array(node.get_operator().output(0))
print('Relu output = ', output_aidge)
if(node.name()=='/MaxPool_output_0'):
output_aidge = np.array(node.get_operator().output(0))
print('MaxPool output = ', output_aidge)
if(node.name()=='__feature_extractor_0__feature_extractor_0_0_Conv_output_0'):
input = np.array(node.get_operator().input(0))
weights = np.array(node.get_operator().input(1))
bias = np.array(node.get_operator().input(2))
#output = np.array(node.get_operator().output(0))
print('conv0 input = ', input)
print('conv0 weights = ', weights)
print('conv0 bias = ', bias)
#if(node.name()=='__feature_extractor_0__feature_extractor_0_1_Relu_output_0'):
# output = np.array(node.get_operator().output(0))
# print('conv0 RELU output = ', output)
#if(node.name()=='__feature_extractor_0__feature_extractor_0_1_Relu_output_0_rescale_act'):
# output = np.array(node.get_operator().output(0))
# print('conv0 RELU Scaling output = ', output)
'''
#the answer is taken from n2d2
output_n2d2_quant_np = np.array([[127., -33., 14., -28., -22., -33., 6., -0., -13., 14.]])
output_n2d2_quant = aidge_core.Tensor(output_n2d2_quant_np)
# Assert results
for outNode in model.get_output_nodes():
output_aidge = np.array(outNode.get_operator().output(0))
assert(np.allclose(output_aidge, output_model,rtol=1e-04))
#add PTQ part for testing
#created ordered graph, as in cpp and do
#quantizeNetwork(ordered_graph_view, 8, verbose);
\ No newline at end of file
for outNode in model_quantized.get_output_nodes():
output_aidge_quant = np.array(outNode.get_operator().output(0))
print(output_aidge_quant)
print("==============================")
print(output_n2d2_quant)
assert(np.allclose(output_aidge_quant, output_n2d2_quant,rtol=1e-04))
\ No newline at end of file
Loading