"""
resnet.py

This file allows the generation of a resnet18 CPP export.

In order for this file to work properly, you should first download the imagenet dataset
(search for "ILSVRC2012").
"""

import random
import numpy as np
import os
import shutil
from PIL import Image

# Aidge Modules
import aidge_core
import aidge_onnx
import aidge_backend_cpu
import aidge_quantization
import aidge_export_cpp

from aidge_export_cpp.export_utils import (
    cpp_fuse_to_metaops, 
    exclude_unwanted_producers, 
    set_nodes_names, 
    set_nodes_datatypes,
    normalize)

from aidge_core.export_utils import remove_optional_inputs

# Torch (Dataset)
import torch
import torch.nn.functional as F
from torch import nn
from torchvision import transforms, datasets

# Arguments
import argparse

supported_types = ["float32", "int8"]

parser = argparse.ArgumentParser(description="Export the ResNet18 model with the aidge_export_cpp module.")
parser.add_argument("--dev", action="store_true", help="Export in dev mode")
parser.add_argument("--no_cuda", action="store_true", help="Disable USE_CUDA usage to perform inferences and training.")
parser.add_argument("--dtype", type=str, choices=supported_types, default="float32", help="Specify the targeted datatype : [int8, float32]")
parser.add_argument("--aidge_cmp", action="store_true", help="Use aidge tensor results as reference.")
parser.add_argument(
    '-v', '--verbose',
    action='count',
    default=0,
    help = (
        "Set the verbosity level of the console output."
        "Use -v to increase verbosity, with the following levels in ascending ordern"
        "default WARN - Only warnings and higher (WARN, ERROR, FATAL) are displayed.n"
        "-v NOTICE - Notices and higher (NOTICE, WARN, ERROR, FATAL) are displayed.n"
        "-vv INFO - Informational messages and higher (INFO, NOTICE, WARN, ERROR, FATAL) are displayed.n"
        "-vvv DEBUG - All messages, including debug information, are displayed.n"
        "Available levels in descending order of severityn"
        "DEBUG < INFO < NOTICE < WARN < ERROR < FATAL."
    )
)
args = parser.parse_args()

USE_CUDA = not args.no_cuda

# Setting Aidge verbose level
if args.verbose == 0:
    aidge_core.Log.set_console_level(aidge_core.Level.Error)
elif args.verbose == 1:
    aidge_core.Log.set_console_level(aidge_core.Level.Notice)
elif args.verbose == 2:
    aidge_core.Log.set_console_level(aidge_core.Level.Info)
elif args.verbose >= 3:
    aidge_core.Log.set_console_level(aidge_core.Level.Debug) 

if USE_CUDA:
    import aidge_backend_cuda

# ------------------------------------------------------------
# EXPORT CONFIG
# ------------------------------------------------------------

"""
Export configuration details :
- RNG_SEED :        Fix a random seed for torch to always get the same images from the dataset,
                        therefore always getting the same output. 
- NB_TEST :         Number of example inferences to perform (used to get an accuracy approximation).
- NB_CALIB :        Number of samples used for the calibration step of quantization. 
- MODEL_NAME :      Should be the same name as the onnx file you want to load and export. 
- DO_EXAMPLES :     Perform example inferences (and allow to get accuracy approximation)
- NB_BITS :         Quantization output precision. Should be 8 to work with this export. 
- TARGET_TYPE :     The aidge datatype for tensors to be casted after the quantization step.
- OPTIM_SIGN :      Quantization optional optimization based on data sign. 
- SINGLE_SHIFT :    Quantization option specifying if inserted scaling nodes should be
                        single shift or floating point.
- ROUNDING :        Apply rounding on the data after the single shift step. 
- NO_QUANTIZATION : Skip the quantization step. Should be set to False. 
- CLIPPING :        Clipping method during quantization. 
- FOLD_GRAPH :      The quantization step adds cast nodes to cast the graph into the given TARGET_TYPE.
                        Enabling the FOLD_GRAPH will automatically fold these nodes into the following
                        ones at the end of quantization step. 
- USE_CUDA :        Determine if the quantization step uses the GPU. It is generally recommended
                        to enable this option if you have access to GPUs as the quantization step
                        may take a while to complete.
- DEV_MODE :        The dev mode allows to identify errors more easily export the model with 
                        symbolic links enabling to modify the source files directly in the
                        generated export (make sure you installed the export plugin running
                        `pip install -e .`). 
                        Enabled running this python file, adding the --test argument. 
- AIDGE_MODE :      Saves and export the outputs generated by the aidge inferences in order
                        to compare it with the export outputs. 
                        Enabled running this python file, adding the --aidge_cmp argument.
"""

print(" Available backends : ", aidge_core.Tensor.get_available_backends())

quantize_model = False
NB_BITS = 32
TARGET_TYPE = aidge_core.dtype.float32 

if args.dtype == "float32":
    quantize_model = False
elif args.dtype == "int8":
    quantize_model = True
    NB_BITS = 8
    TARGET_TYPE = aidge_core.dtype.int32    # int8 not yet available
else:
    print(f"[ERROR] Datatype '{args.dtype}' not supported.")
    print(f"[ERROR] Supported datatypes : {supported_types}.")
    exit(1)

RNG_SEED        = 1234 
NB_TEST         = 20 # Test set
NB_CALIB        = 20 # Calibration set
MODEL_NAME      = 'resnet18'
EXPORT_FOLDER   = f"export_{MODEL_NAME}_int8"
DO_EXAMPLES     = True
 
# Quantization params
OPTIM_SIGN      = False   
SINGLE_SHIFT    = True    
ROUNDING        = True 
NO_QUANTIZATION = False
CLIPPING        = aidge_quantization.Clipping.MSE  # 'MAX'
FOLD_GRAPH      = True

# Export modes
DEV_MODE      = args.dev
AIDGE_CMP     = args.aidge_cmp

### Add your paths here ###
IMAGENET_PATH = "/database/ILSVRC2012/val"  # Look for ILSVRC2012/val
VAL_PATH = "/database/ILSVRC2012/val.txt"   # File containing labels of image of val folder (Look for val.txt)
###########################

def print_cfg():
    print('\n RNG_SEED         = ', RNG_SEED)
    print(' MODEL_NAME       = ', MODEL_NAME)
    print(' NB_TEST          = ', NB_TEST)
    print(' NB_CALIB         = ', NB_CALIB)
    print(' NB_BITS          = ', NB_BITS)
    print(' OPTIM_SIGN       = ', OPTIM_SIGN)
    print(' NO_QUANTIZATION  = ', NO_QUANTIZATION)
    print(' CLIPPING         = ', CLIPPING)
    print(' SINGLE_SHIFT     = ', SINGLE_SHIFT)
    print(' TARGET_TYPE      = ', TARGET_TYPE)
    print(' FOLD_GRAPH       = ', FOLD_GRAPH)
    print(' USE_CUDA         = ', USE_CUDA)
    print(' DEV_MODE         = ', DEV_MODE)
    print(' ROUNDING         = ', ROUNDING)

print_cfg()

torch.manual_seed(RNG_SEED)
random.seed(RNG_SEED)
np.random.seed(RNG_SEED)

backend = "cuda" if USE_CUDA else "cpu"

image_label_pairs = []
with open(VAL_PATH, 'r') as f:
    for line in f:
        parts = line.strip().split()
        if len(parts) == 2:
            image_name, label = parts
            image_label_pairs.append((image_name, int(label)))

np.random.seed(RNG_SEED)
NB_SELECT = max(NB_TEST, NB_CALIB)  # Check that NB_TEST and NB_CALIB are fixed
selected_pairs = image_label_pairs[:NB_SELECT]

# --------------------------------------------------------------
# CREATE THE SAMPLES
# --------------------------------------------------------------

transform_val = transforms.Compose([transforms.Resize(256),
                                    transforms.CenterCrop(224),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485,0.456, 0.406], std=[0.229, 0.224, 0.225])
                                    ])

tensors = []
labels  = []
paths   = []
index = 0

for image_name, label in selected_pairs:
    image_path = os.path.join(IMAGENET_PATH, image_name)
    if os.path.exists(image_path):
        try:
            image = Image.open(image_path)
            if image.mode != 'RGB':
                image = image.convert('RGB')
            tensor = transform_val(image)
            tensors.append(tensor)
            labels.append(label)
            paths.append(image_path)
        except Exception as e:
            print(f"Error with image {image_path}: {e}")

backend = "cuda" if USE_CUDA else "cpu"
aidge_tensors = []
for tensor in tensors:
    array = tensor.numpy()
    array = np.reshape(array, (1, 3, 224, 224))
    array = normalize(array)
    aidge_tensor = aidge_core.Tensor(array)
    aidge_tensor.set_backend(backend)
    aidge_tensor.set_datatype(aidge_core.dtype.float32)
    aidge_tensors.append(aidge_tensor)


# --------------------------------------------------------------
# LOAD THE MODEL
# --------------------------------------------------------------

"""
Load the .onnx model and perform some usual graph modifications :
    - Remove the flatten nodes;
    - Fuse the batchnorm nodes into the biases producers. 
"""

model = aidge_onnx.load_onnx(MODEL_NAME + ".onnx", verbose=False)
model.save("imported_model")
aidge_core.remove_flatten(model)
aidge_core.fuse_batchnorm(model)
model.save("imported_model_fused_bn")

# --------------------------------------------------------------
# SET UP THE AIDGE SCHEDULER
# --------------------------------------------------------------

"""
The scheduler is an ordered version of the model, allowing to schedule
nodes to be able to run inferences, for instance. 
"""

# Set up the backend
model.set_datatype(aidge_core.dtype.float32)
model.set_backend(backend)

# Create the Scheduler 
scheduler = aidge_core.SequentialScheduler(model)

# --------------------------------------------------------------
# RUN SOME EXAMPLES INFERENCES
# --------------------------------------------------------------

def propagate(model, scheduler, aidge_tensor):
    """ Propagate the given tensor into the model
    """
    # Run the inference 
    scheduler.forward(True, [aidge_tensor])    
    # Gather the results
    output_node = model.get_output_nodes().pop()
    output_tensor = output_node.get_operator().get_output(0).clone()
    output_tensor.set_backend("cpu")
    return np.array(output_tensor)

accuracy = 0
if (DO_EXAMPLES):
    print('\n EXAMPLE INFERENCES :')
    nb_valid = 0
    base_values = []
    for i in range(NB_TEST):
        output_array = propagate(model, scheduler, aidge_tensors[i])
        print(labels[i], ' VS ', np.argmax(output_array), ' -> ', np.max(output_array))
        base_values.append(np.max(output_array))
        if (labels[i] == np.argmax(output_array)):
            nb_valid += 1
    accuracy = nb_valid / NB_TEST
    print('\n MODEL ACCURACY = ', accuracy * 100, '%')


#--------------------------------------------------------------
# PERFORM THE QUANTIZATION
# --------------------------------------------------------------

if quantize_model:
    aidge_quantization.quantize_network(
        network = model, 
        nb_bits = NB_BITS, 
        input_dataset = aidge_tensors[0:NB_CALIB], 
        clipping_mode = CLIPPING,
        target_type = TARGET_TYPE,
        no_quantization = NO_QUANTIZATION,
        optimize_signs = OPTIM_SIGN, 
        single_shift = SINGLE_SHIFT, 
        use_cuda = USE_CUDA,
        fold_graph = FOLD_GRAPH,
        bitshift_rounding = ROUNDING)

# --------------------------------------------------------------
# RESCALE THE INPUT SAMPLES 
# --------------------------------------------------------------

"""
Once the quantization is done, the graph now only accepts integer inputs. 
So we need to rescale the dataset for the data to be within [0, 255].
Also, tensors should be casted to be the same type as TARGET_TYPE. 
"""
if quantize_model:
    rescaling = 2**(NB_BITS-1)-1
    for i in range(max(NB_TEST, NB_CALIB)):
        array = np.array(aidge_tensors[i]) * rescaling 
        aidge_tensors[i] = aidge_core.Tensor(array)
        aidge_tensors[i].set_datatype(TARGET_TYPE)

# --------------------------------------------------------------
# GENERATE NEW SCHEDULER
# --------------------------------------------------------------

"""
Each time the graph has been change, it has to be reset. 
Here some Quantizer and Cast nodes have been added. 
"""

if quantize_model:
    scheduler.reset_scheduling()

# --------------------------------------------------------------
# PERFORM THE EXAMPLE INFERENCES AGAIN
# --------------------------------------------------------------

model.save("post_ptq")

if (DO_EXAMPLES and quantize_model):
    print('\n QUANTIZED EXAMPLE INFERENCES :')
    nb_valid = 0
    post_values = []
    for i in range(NB_TEST):
        output_array = propagate(model, scheduler, aidge_tensors[i])
        print(labels[i], ' VS ', np.argmax(output_array), ' -> ', np.max(output_array))
        post_values.append(np.max(output_array))
        if (labels[i] == np.argmax(output_array)):
            nb_valid += 1

    quant_accuracy = nb_valid / NB_TEST
    print('\n MODEL ACCURACY = ', accuracy * 100, '%')
    print('\n QUANTIZED ACCURACY = ', quant_accuracy * 100, '%')

if USE_CUDA:    
    model.set_backend("cpu")
    for aidge_tensor in aidge_tensors:
        aidge_tensor.set_backend("cpu")

# --------------------------------------------------------------
# FUSE NODES INTO METAOPS
# --------------------------------------------------------------

"""
Here is made the link between the Aidge model and the CPP
kernels implementation. In aidge, all the nodes calculations
are performed separately (Pad -> Conv -> Quantizer -> ReLU -> ...).

However within the CPP export, some core operators are merged
in meta operators. For instance, the padding, scaling and ReLU are
performed within the Conv kernel. 

In this step, we use graph regex techniques to find the desired patterns
within the graph in order to match the export implementation of the kernels. 
"""

# Expand meta ops
"""
We first need to expand the graph to break all the metaops that may already
exist. For instance, PaddedConv will become Pad -> Conv. 
"""
aidge_core.expand_metaops(model)


model.save("after_expand")

# Exclude unwanted producers 
"""
Before fusing the nodes, we set a tag on the Producers in order to exclude
from the export the ones holding coefficients, as they are directly handled
within the layers parameters. 
"""
exclude_unwanted_producers(model)

# Fuse nodes
cpp_fuse_to_metaops(model)

# Remove optional inputs 
"""
Some optional inputs may be added by the quantization step (for instance with the clipping nodes).
Here we make sure that they will not be considered as actual graph inputs by the export, by
excluding them from the ordered_inputs list of the model. 
"""
remove_optional_inputs(model)

# Reset scheduler to apply graph modifications
"""
The scheduler always needs to be reset after graph manipulation.
"""
scheduler.reset_scheduling()

# Name newly created MetaOps
"""
As names are optional in Aidge, the fuse_to_metaops function will not automatically
give a name to the newly created metaOps. However, in an export context, we need 
our operators to be named, as this will be used to name the corresponding files.
"""
scheduler.generate_scheduling() # Scheduler needs to be generated as it has just been reset
set_nodes_names(scheduler)

# --------------------------------------------------------------
# LOG OUTPUTS FOR THE LAST IMAGE OF THE TEST DATASET
# --------------------------------------------------------------

"""
Here a final inference is made on the input we want to export and run. 
This will ensure that all the feature maps tensors (between the layers)
hold the data corresponding to this specific input. 
Then, the "log_outputs()" function (called later) will store these tensors
into log files that may be exported as well for comparison purpose. 
"""

output_array = propagate(model, scheduler, aidge_tensors[0])

print("### Exported Sample ###")
print("Aidge prediction after quantization :", np.argmax(output_array), "(" + str(np.max(output_array)) + ")")
print("Label :", labels[0])

# --------------------------------------------------------------
# HANDLING DATATYPE
# --------------------------------------------------------------

"""
Now, despite the quantization stage, all the tensors of the model are
still "virtually" in Int32. Before exporting the model, we have to set
tensors' datatypes to Int8, except for biases which should remain in Int32.
"""
if quantize_model:
    set_nodes_datatypes(model)

# Store tensors values into log files
"""
Once the tensors has been casted, the log_outputs() function can be
called to store their values into log files. 
"""

if os.path.isdir("log_outputs"):
    shutil.rmtree("log_outputs")
model.log_outputs("log_outputs")

# --------------------------------------------------------------
# TEST MODE
# --------------------------------------------------------------

"""
The test mode is mainly used for validation and benchmark. The model will be 
exported in a way that each layer's result will be compared with the CPU implementation. 
The timings for each layer will be displayed. 
In case of error, you will be able to enter debug mode, showing in-layer data or 
changing the inputs of the layer, to isolate the source of the issue. 
"""

for node in model.get_nodes():
    node.attributes().dev_mode = DEV_MODE

# --------------------------------------------------------------
# AIDGE CMP
# --------------------------------------------------------------

"""
If the --aidge_cmp option is enabled, the feature maps generated by aidge with the 
backend cpu will be exported in the generated export. It will be used as reference
to verify that the results with the optimized kernels are correct for the exported
model. 
This option has to be passed to each node in order to be used within the Export Nodes.
(JConv, JPad, ...) that you can find in the "export_gen/operator_export" folder. 
"""

for node in model.get_nodes():
    node.attributes().aidge_cmp = AIDGE_CMP

# --------------------------------------------------------------
# EXPORT THE MODEL
# --------------------------------------------------------------

model.save("exported_model")
inputs_tensor = aidge_core.Tensor(np.array(aidge_tensors[0]))
inputs_tensor.set_data_format(aidge_core.dformat.nchw)  # Init the dataformat (default -> nchw)
inputs_tensor.set_data_format(aidge_core.dformat.nhwc)  # Transpose the data  (nchw -> nhwc)
if args.dtype == "int8":
    inputs_tensor.set_datatype(aidge_core.dtype.int8)

aidge_export_cpp.export(EXPORT_FOLDER, 
                        model, 
                        scheduler, 
                        labels = aidge_core.Tensor(labels[0]),
                        inputs_tensor=inputs_tensor,
                        dev_mode = DEV_MODE,
                        aidge_cmp = AIDGE_CMP)