diff --git a/.gitlab/ci/build.gitlab-ci.yml b/.gitlab/ci/build.gitlab-ci.yml
index cf06a8f5fcd065a553fda55511a8f23430e506eb..baea3ca70266f1d3fcd489824da681106c85b73c 100644
--- a/.gitlab/ci/build.gitlab-ci.yml
+++ b/.gitlab/ci/build.gitlab-ci.yml
@@ -31,6 +31,7 @@ build:ubuntu_python:
     - python3 -m pip install virtualenv
     - virtualenv venv
     - source venv/bin/activate
+    - python3 -m pip install numpy coverage onnxruntime # used for tests
     - python3 -m pip install -r requirements.txt
     - python3 -m pip install . -v
   artifacts:
diff --git a/.gitlab/ci/coverage.gitlab-ci.yml b/.gitlab/ci/coverage.gitlab-ci.yml
index 783e85dce05ee6f5e4e27a7febade59119c7050a..88dcd3bea4cbd10e72fd0e3c3fa47fd2b67b4236 100644
--- a/.gitlab/ci/coverage.gitlab-ci.yml
+++ b/.gitlab/ci/coverage.gitlab-ci.yml
@@ -5,7 +5,6 @@ coverage:ubuntu_python:
     - docker
   script:
     - source venv/bin/activate
-    - python3 -m pip install numpy coverage
     - cd ${CI_PROJECT_NAME}
     # Retrieve the installation path of the module, since it is installed with pip.
     - export MODULE_LOCATION=`python -c "import ${CI_PROJECT_NAME} as _; print(_.__path__[0])"`
diff --git a/aidge_interop_torch/unit_tests/test_pytorch.py b/aidge_interop_torch/unit_tests/test_pytorch.py
index 2c8405f83b9834c8f07ac194b3ba3b81a56f70db..463b9ef5ec0207011caad8adaeeb050e1e868296 100755
--- a/aidge_interop_torch/unit_tests/test_pytorch.py
+++ b/aidge_interop_torch/unit_tests/test_pytorch.py
@@ -10,12 +10,12 @@ torch.backends.cudnn.enabled = True
 torch.backends.cudnn.benchmark = True
 torch.set_printoptions(precision=7)
 weight_value = 0.05
-batch_size = 1
+batch_size = 2
 learning_rate = 0.01
 comparison_precision = 0.001
 absolute_presision = 0.0001
 epochs = 10
-
+# aidge_core.Log.set_console_level(aidge_core.Level.Debug)
 # TODO : add tensor test later ...
 
 # class test_tensor_conversion(unittest.TestCase):
@@ -104,7 +104,6 @@ class Test_Networks():
             return -1
 
         if self.test_backward:
-
             loss1 = self.criterion1(output1, label1)
             self.optimizer1.zero_grad()
             loss1.backward()
@@ -112,6 +111,7 @@ class Test_Networks():
 
             loss2 = self.criterion2(output2, label2)
             self.optimizer2.zero_grad()
+
             loss2.backward()
             self.optimizer2.step()
             NaN_flag = False
@@ -121,7 +121,6 @@ class Test_Networks():
             if loss2.isnan():
                 print("Loss2 is NaN")
                 NaN_flag = True
-
             if NaN_flag: return -1
             if self.compare_tensor(loss1, loss2):
                 print("Different loss : ", loss1.item(), "|", loss2.item())
@@ -197,10 +196,9 @@ class Easy_graph(torch.nn.Module):
         super(Easy_graph, self).__init__()
 
         self.layer = torch.nn.Sequential(
-            # torch.nn.LeakyReLU(0.1),
             torch.nn.Flatten(),
             torch.nn.Linear(3, 4),
-            # torch.nn.LeakyReLU(0.1),
+            torch.nn.ReLU(),
             torch.nn.Linear(4, 4),
             torch.nn.ReLU(),
             torch.nn.Linear(4, 4)
@@ -209,6 +207,25 @@ class Easy_graph(torch.nn.Module):
     def forward(self, x):
         x = self.layer(x)
         return x
+# class Easy_graph(torch.nn.Module):
+
+#     def __init__(self):
+#         super(Easy_graph, self).__init__()
+
+#         self.layer = torch.nn.Sequential(
+#             torch.nn.Flatten(),
+#             torch.nn.Linear(32*32*3, 512),
+#             torch.nn.ReLU(),
+#             torch.nn.Linear(512, 256),
+#             torch.nn.ReLU(),
+#             torch.nn.Linear(256, 128),
+#             torch.nn.ReLU(),
+#             torch.nn.Linear(128, 10)
+#         )
+
+#     def forward(self, x):
+#         x = self.layer(x)
+#         return x
 
 
 
@@ -241,6 +258,7 @@ class test_interop(unittest.TestCase):
         torch_model = Easy_graph()
 
         aidge_model = aidge_interop_torch.wrap(torch_model, input_size)
+
         opt = aidge_learning.SGD()
         lrs = aidge_learning.constant_lr(0.01)
         opt.set_learning_rate_scheduler(lrs)
diff --git a/aidge_interop_torch/utils.py b/aidge_interop_torch/utils.py
index 2dac8cecac2608e46ac80c80b89da5830bdea355..a685f57a594b1cca6e67f15c33dd2e73aa2fe3e5 100644
--- a/aidge_interop_torch/utils.py
+++ b/aidge_interop_torch/utils.py
@@ -98,6 +98,7 @@ class AidgeModule(torch.nn.Module):
         self.input_nodes = [None]
         self.scheduler = None
         self.optimizer = None
+        self.grad_compiled = False
 
     def set_optimizer(self, opt):
         self.optimizer = opt
@@ -150,6 +151,8 @@ class AidgeModule(torch.nn.Module):
 
             @staticmethod
             def backward(ctx, grad_output):
+                if not self.grad_compiled: aidge_core.compile_gradient(self._graph_view)
+
                 if self.multi_outputs_flag:
                     raise RuntimeError(
                         "Backward is not possible if the model has multi-outputs")
@@ -158,17 +161,24 @@ class AidgeModule(torch.nn.Module):
                         "Multi-input is not handled for now in pytorch backpropagation")
                 # convert the output gradient to an AIDGE Tensor
                 aidge_grad_output = torch_tensor_to_aidge(grad_output)
+
+                if len(self._graph_view.get_output_nodes()) != 1:
+                    RuntimeError(
+                        f"We only support one output got {len(self._graph_view.get_output_nodes())}")
+                output_node = list(self._graph_view.get_output_nodes())[0]
+                output_tensor = output_node.get_operator().get_output(0)
+                output_tensor.set_grad(aidge_grad_output)
+
                 # run the backpropagation
                 # TODO: remove update from the backprop
                 self.optimizer.reset_grad()
-                self.scheduler.backward([aidge_grad_output])
+                self.scheduler.backward()
                 self.optimizer.update()
                 # get grad of first layer no handling of multi input
-                g = self.input_nodes[0].get_operator().get_output(0).grad()
+                aidge_out_grad = self.input_nodes[0].get_operator().get_output(0).grad()
                 # convert grad to torch
-                t = aidge_tensor_to_torch(g)
-                return t
-
+                torch_out_grad = aidge_tensor_to_torch(aidge_out_grad)
+                return torch_out_grad
 
         # If the layer is at the beginning of the network requires grad is False.
         inputs.requires_grad = True