Unconvergent results when using MaxPool with aidge learning
Required prerequisites
-
Make sure you've read the documentation. Your issue may be addressed there. -
Search the issue tracker and discussions to verify that this hasn't already been reported. +1 or comment there if it has.
What commit version of aidge do you use
Module | Tag version | Comit Hash |
---|---|---|
aidge/aidge_backend_cpu | v0.7.0 | a9513dbac14c74438ba159b4c88d19b7dd7c3479 |
aidge/aidge_backend_cuda | v0.7.0-1-g8eb0dbb | 8eb0dbb2818cc596ba7ff45c08f9cc3fba3afe86 |
aidge/aidge_backend_opencv | v0.1.7 | 1ff6cb03cf0f7ce63ff84405e9983acd986abbc4 |
aidge/aidge_core | 0.7.0-14-g86d1a76f | 86d1a76f8186dbf893b775902d9db564f92ef560 |
aidge/aidge_export_arm_cortexm | v0.1.4 | ff5e2dc76d7ae06e4173e146ca8e391efcd9760d |
aidge/aidge_export_cpp | v0.4.0 | 566e26e0d629352eb04ec465fe9444a0a5ea8e6a |
aidge/aidge_export_tensorrt | 0.2.1-3-g82e6ff9 | 82e6ff9c1e30ecf1fee2928200f79a145eaea096 |
aidge/aidge_interop_torch | v0.1.0 | d9dde6ca880c882633a2ea31f02ddf5ad0d27153 |
aidge/aidge_learning | v0.4.0 | 65bc36c1 |
aidge/aidge_model_explorer | v0.1.0 | 1c246261aa38b50f4a874c60d4efe17902618efe |
aidge/aidge_onnx | v0.6.0 | 0a73d6a6d85a79f76846031d71013aaafcc20d03 |
aidge/aidge_quantization | v0.4.2 | df50eb71f42cc047c13dfc8f537a4b7eb413eb4a |
Problem description
The following code is supposed to converge but do not on Aidge. Th issue is problably related to the MaxPool Operator.
❌ Results:
Files already downloaded and verified
Nb samples 64, loss: 0.40533682703971863, acc:12.5%, tot_acc:12.5%
Nb samples 128, loss: 0.34302958846092224, acc:9.375%, tot_acc:10.9375%
Nb samples 192, loss: 0.264714777469635, acc:9.375%, tot_acc:10.416666666666668%
Nb samples 256, loss: 0.2155873030424118, acc:9.375%, tot_acc:10.15625%
Nb samples 320, loss: 0.1741018444299698, acc:12.5%, tot_acc:10.625%
Nb samples 384, loss: 0.15113553404808044, acc:9.375%, tot_acc:10.416666666666668%
🐍 Code:
# "cpu" or "cuda"
BACKEND = "cpu"
import aidge_core
if BACKEND == "cuda":
import aidge_backend_cuda
else:
import aidge_backend_cpu
import aidge_learning
import numpy as np
# required to load CIFAR10 dataset
import torchvision
import torchvision.transforms as transforms
model = aidge_core.sequential(
[
aidge_core.Conv2D(in_channels=3, out_channels=16, kernel_dims=[3, 3]),
aidge_core.Conv2D(in_channels=16, out_channels=16, kernel_dims=[3, 3]),
aidge_core.ReLU(),
aidge_core.MaxPooling2D(kernel_dims=[2, 2], stride_dims=[2, 2]),
aidge_core.Conv2D(in_channels=16, out_channels=32, kernel_dims=[3, 3]),
aidge_core.Conv2D(in_channels=32, out_channels=32, kernel_dims=[3, 3]),
aidge_core.ReLU(),
aidge_core.MaxPooling2D(kernel_dims=[2, 2]),
aidge_core.Conv2D(in_channels=32, out_channels=64, kernel_dims=[3, 3]),
aidge_core.Conv2D(in_channels=64, out_channels=64, kernel_dims=[3, 3]),
aidge_core.ReLU(),
aidge_core.MaxPooling2D(kernel_dims=[2, 2]),
aidge_core.FC(in_channels=1024, out_channels=10, name="FC_0"),
# aidge_core.Softmax(axis=1, name="Softmax_0"),
]
)
# Set backend and datatype
model.set_backend(BACKEND)
model.set_datatype(aidge_core.dtype.float32)
# Initialize parameters (weights and biases)
for node in model.get_nodes():
if node.type() == "Producer":
prod_op = node.get_operator()
value = prod_op.get_output(0)
tuple_out = node.output(0)[0]
# No conv in current network
if tuple_out[0].type() == "Conv2D" and tuple_out[1] == 1:
# Conv weight
aidge_core.xavier_uniform_filler(value)
elif tuple_out[0].type() == "Conv2D" and tuple_out[1] == 2:
# Conv bias
aidge_core.constant_filler(value, 0.01)
elif tuple_out[0].type() == "FC" and tuple_out[1] == 1:
# FC weight
aidge_core.he_filler(value)
elif tuple_out[0].type() == "FC" and tuple_out[1] == 2:
# FC bias
aidge_core.constant_filler(value, 0.01)
else:
pass
def one_hot_encoding(cls, nb_cls):
values = np.array([float(0.0)] * nb_cls)
values[cls] = float(1.0)
t = aidge_core.Tensor(np.array(values))
t.set_datatype(aidge_core.dtype.float32)
return t
class aidge_cifar10(aidge_core.Database):
def __init__(self):
aidge_core.Database.__init__(self)
transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
self.trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
def get_item(self, idx):
data, label = self.trainset.__getitem__(idx)
return [aidge_core.Tensor(data.numpy()),
one_hot_encoding(label, 10)]
def len(self):
return len(self.trainset)
def get_nb_modalities(self):
return 2
aidge_database = aidge_cifar10()
BATCH_SIZE = 64
aidge_dataprovider = aidge_core.DataProvider(aidge_database,
backend=BACKEND,
batch_size=BATCH_SIZE,
shuffle=True,
drop_last=True)
# Set object for learning
scheduler = aidge_core.SequentialScheduler(model)
# setup optimizer
opt = aidge_learning.SGD()
learning_rates = aidge_learning.constant_lr(0.01)
opt.set_learning_rate_scheduler(learning_rates)
opt.set_parameters(list(aidge_core.producers(model)))
tot_acc = 0
for i, (input, label) in enumerate(aidge_dataprovider):
# input.init_grad()
scheduler.forward(data=[input])
# Really long line should be a faster way ...
node = model.get_ordered_nodes()[-1]
pred = node.get_operator().get_output(0)
opt.reset_grad(model)
# Compute the loss and initialize the output gradient of pred
loss = aidge_learning.loss.MSE(pred, label)
acc = aidge_learning.metrics.Accuracy(pred, label, 1)[0]
tot_acc += acc
scheduler.backward()
opt.update()
print(f"Nb samples {(i+1)*BATCH_SIZE}, loss: {loss[0]}, acc:{(acc/BATCH_SIZE)*100}%, tot_acc:{(tot_acc/((i+1)*BATCH_SIZE))*100}%")
# Break point
if i == 5:
break