Unconvergent results when using MaxPool with aidge learning

Required prerequisites

Make sure you've read the documentation. Your issue may be addressed there.
Search the issue tracker and discussions to verify that this hasn't already been reported. +1 or comment there if it has.

What commit version of aidge do you use

Module	Tag version	Comit Hash
aidge/aidge_backend_cpu	v0.7.0	a9513dbac14c74438ba159b4c88d19b7dd7c3479
aidge/aidge_backend_cuda	v0.7.0-1-g8eb0dbb	8eb0dbb2818cc596ba7ff45c08f9cc3fba3afe86
aidge/aidge_backend_opencv	v0.1.7	1ff6cb03cf0f7ce63ff84405e9983acd986abbc4
aidge/aidge_core	0.7.0-14-g86d1a76f	86d1a76f8186dbf893b775902d9db564f92ef560
aidge/aidge_export_arm_cortexm	v0.1.4	ff5e2dc76d7ae06e4173e146ca8e391efcd9760d
aidge/aidge_export_cpp	v0.4.0	566e26e0d629352eb04ec465fe9444a0a5ea8e6a
aidge/aidge_export_tensorrt	0.2.1-3-g82e6ff9	82e6ff9c1e30ecf1fee2928200f79a145eaea096
aidge/aidge_interop_torch	v0.1.0	d9dde6ca880c882633a2ea31f02ddf5ad0d27153
aidge/aidge_learning	v0.4.0	65bc36c1
aidge/aidge_model_explorer	v0.1.0	1c246261aa38b50f4a874c60d4efe17902618efe
aidge/aidge_onnx	v0.6.0	0a73d6a6d85a79f76846031d71013aaafcc20d03
aidge/aidge_quantization	v0.4.2	df50eb71f42cc047c13dfc8f537a4b7eb413eb4a

Problem description

The following code is supposed to converge but do not on Aidge. Th issue is problably related to the MaxPool Operator.

❌ Results:

Files already downloaded and verified
Nb samples 64, loss: 0.40533682703971863, acc:12.5%, tot_acc:12.5%
Nb samples 128, loss: 0.34302958846092224, acc:9.375%, tot_acc:10.9375%
Nb samples 192, loss: 0.264714777469635, acc:9.375%, tot_acc:10.416666666666668%
Nb samples 256, loss: 0.2155873030424118, acc:9.375%, tot_acc:10.15625%
Nb samples 320, loss: 0.1741018444299698, acc:12.5%, tot_acc:10.625%
Nb samples 384, loss: 0.15113553404808044, acc:9.375%, tot_acc:10.416666666666668%

🐍 Code:

# "cpu" or "cuda"
BACKEND = "cpu"

import aidge_core

if BACKEND == "cuda":
    import aidge_backend_cuda
else:
    import aidge_backend_cpu

import aidge_learning

import numpy as np

# required to load CIFAR10 dataset
import torchvision
import torchvision.transforms as transforms




model = aidge_core.sequential(
    [
        aidge_core.Conv2D(in_channels=3, out_channels=16, kernel_dims=[3, 3]),
        aidge_core.Conv2D(in_channels=16, out_channels=16, kernel_dims=[3, 3]),
        aidge_core.ReLU(),
        aidge_core.MaxPooling2D(kernel_dims=[2, 2], stride_dims=[2, 2]),
        aidge_core.Conv2D(in_channels=16, out_channels=32, kernel_dims=[3, 3]),
        aidge_core.Conv2D(in_channels=32, out_channels=32, kernel_dims=[3, 3]),
        aidge_core.ReLU(),
        aidge_core.MaxPooling2D(kernel_dims=[2, 2]),
        aidge_core.Conv2D(in_channels=32, out_channels=64, kernel_dims=[3, 3]),
        aidge_core.Conv2D(in_channels=64, out_channels=64, kernel_dims=[3, 3]),
        aidge_core.ReLU(),
        aidge_core.MaxPooling2D(kernel_dims=[2, 2]),
        aidge_core.FC(in_channels=1024, out_channels=10, name="FC_0"),
        # aidge_core.Softmax(axis=1, name="Softmax_0"),
    ]
)



# Set backend and datatype
model.set_backend(BACKEND)
model.set_datatype(aidge_core.dtype.float32)

# Initialize parameters (weights and biases)
for node in model.get_nodes():
    if node.type() == "Producer":
        prod_op = node.get_operator()
        value = prod_op.get_output(0)
        tuple_out = node.output(0)[0]
        # No conv in current network
        if tuple_out[0].type() == "Conv2D" and tuple_out[1] == 1:
            # Conv weight
            aidge_core.xavier_uniform_filler(value)
        elif tuple_out[0].type() == "Conv2D" and tuple_out[1] == 2:
            # Conv bias
            aidge_core.constant_filler(value, 0.01)
        elif tuple_out[0].type() == "FC" and tuple_out[1] == 1:
            # FC weight
            aidge_core.he_filler(value)
        elif tuple_out[0].type() == "FC" and tuple_out[1] == 2:
            # FC bias
            aidge_core.constant_filler(value, 0.01)
        else:
            pass




def one_hot_encoding(cls, nb_cls):
    values = np.array([float(0.0)] * nb_cls)
    values[cls] = float(1.0)
    t = aidge_core.Tensor(np.array(values))
    t.set_datatype(aidge_core.dtype.float32)
    return t


class aidge_cifar10(aidge_core.Database):
    def __init__(self):
        aidge_core.Database.__init__(self)
        transform = transforms.Compose(
            [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

        self.trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                                download=True, transform=transform)

    def get_item(self, idx):
        data, label = self.trainset.__getitem__(idx)
        return [aidge_core.Tensor(data.numpy()),
                one_hot_encoding(label, 10)]

    def len(self):
        return len(self.trainset)

    def get_nb_modalities(self):
        return 2





aidge_database = aidge_cifar10()
BATCH_SIZE = 64
aidge_dataprovider = aidge_core.DataProvider(aidge_database,
                                             backend=BACKEND,
                                             batch_size=BATCH_SIZE,
                                             shuffle=True,
                                             drop_last=True)







 # Set object for learning
scheduler = aidge_core.SequentialScheduler(model)



 # setup optimizer
opt = aidge_learning.SGD()
learning_rates = aidge_learning.constant_lr(0.01)
opt.set_learning_rate_scheduler(learning_rates)
opt.set_parameters(list(aidge_core.producers(model)))




 
tot_acc = 0
for i, (input, label) in enumerate(aidge_dataprovider):
    # input.init_grad()
    scheduler.forward(data=[input])
    # Really long line should be a faster way ...
    node = model.get_ordered_nodes()[-1]
    pred = node.get_operator().get_output(0)
    opt.reset_grad(model)
    # Compute the loss and initialize the output gradient of pred
    loss = aidge_learning.loss.MSE(pred, label)
    acc = aidge_learning.metrics.Accuracy(pred, label, 1)[0]
    tot_acc += acc
    scheduler.backward()
    opt.update()
    print(f"Nb samples {(i+1)*BATCH_SIZE}, loss: {loss[0]}, acc:{(acc/BATCH_SIZE)*100}%, tot_acc:{(tot_acc/((i+1)*BATCH_SIZE))*100}%")
    # Break point
    if i == 5:
        break