Merge branch 'bindLoss' into 'dev'

Update torch interrop to work with the new backprop. See merge request !1

Merge branch 'bindLoss' into 'dev'
Update torch interrop to work with the new backprop. See merge request !1
d21edb2d · Cyril Moineau · f82bcf43 · fbfe4038 · d21edb2d · d21edb2d
Commit d21edb2d authored 1 year ago by Cyril Moineau
--- a/.gitlab/ci/build.gitlab-ci.yml
+++ b/.gitlab/ci/build.gitlab-ci.yml
@@ -31,6 +31,7 @@ build:ubuntu_python:
    - python3 -m pip install virtualenv
    - virtualenv venv
    - source venv/bin/activate
+    - python3 -m pip install numpy coverage onnxruntime # used for tests
    - python3 -m pip install -r requirements.txt
    - python3 -m pip install . -v
  artifacts:

--- a/.gitlab/ci/coverage.gitlab-ci.yml
+++ b/.gitlab/ci/coverage.gitlab-ci.yml
@@ -5,7 +5,6 @@ coverage:ubuntu_python:
    - docker
  script:
    - source venv/bin/activate
-    - python3 -m pip install numpy coverage
    - cd ${CI_PROJECT_NAME}
    # Retrieve the installation path of the module, since it is installed with pip.
    - export MODULE_LOCATION=`python -c "import ${CI_PROJECT_NAME} as _; print(_.__path__[0])"`

--- a/aidge_interop_torch/unit_tests/test_pytorch.py
+++ b/aidge_interop_torch/unit_tests/test_pytorch.py
@@ -10,12 +10,12 @@ torch.backends.cudnn.enabled = True
 torch.backends.cudnn.benchmark = True
 torch.set_printoptions(precision=7)
 weight_value = 0.05
-batch_size = 1
+batch_size = 2
 learning_rate = 0.01
 comparison_precision = 0.001
 absolute_presision = 0.0001
 epochs = 10
-
+# aidge_core.Log.set_console_level(aidge_core.Level.Debug)
 # TODO : add tensor test later ...

 # class test_tensor_conversion(unittest.TestCase):
@@ -104,7 +104,6 @@ class Test_Networks():
            return -1

        if self.test_backward:
-
            loss1 = self.criterion1(output1, label1)
            self.optimizer1.zero_grad()
            loss1.backward()
@@ -112,6 +111,7 @@ class Test_Networks():

            loss2 = self.criterion2(output2, label2)
            self.optimizer2.zero_grad()
+
            loss2.backward()
            self.optimizer2.step()
            NaN_flag = False
@@ -121,7 +121,6 @@ class Test_Networks():
            if loss2.isnan():
                print("Loss2 is NaN")
                NaN_flag = True
-
            if NaN_flag: return -1
            if self.compare_tensor(loss1, loss2):
                print("Different loss : ", loss1.item(), "|", loss2.item())
@@ -197,10 +196,9 @@ class Easy_graph(torch.nn.Module):
        super(Easy_graph, self).__init__()

        self.layer = torch.nn.Sequential(
-            # torch.nn.LeakyReLU(0.1),
            torch.nn.Flatten(),
            torch.nn.Linear(3, 4),
-            # torch.nn.LeakyReLU(0.1),
+            torch.nn.ReLU(),
            torch.nn.Linear(4, 4),
            torch.nn.ReLU(),
            torch.nn.Linear(4, 4)
@@ -209,6 +207,25 @@ class Easy_graph(torch.nn.Module):
    def forward(self, x):
        x = self.layer(x)
        return x
+# class Easy_graph(torch.nn.Module):
+
+#     def __init__(self):
+#         super(Easy_graph, self).__init__()
+
+#         self.layer = torch.nn.Sequential(
+#             torch.nn.Flatten(),
+#             torch.nn.Linear(32*32*3, 512),
+#             torch.nn.ReLU(),
+#             torch.nn.Linear(512, 256),
+#             torch.nn.ReLU(),
+#             torch.nn.Linear(256, 128),
+#             torch.nn.ReLU(),
+#             torch.nn.Linear(128, 10)
+#         )
+
+#     def forward(self, x):
+#         x = self.layer(x)
+#         return x



@@ -241,6 +258,7 @@ class test_interop(unittest.TestCase):
        torch_model = Easy_graph()

        aidge_model = aidge_interop_torch.wrap(torch_model, input_size)
+
        opt = aidge_learning.SGD()
        lrs = aidge_learning.constant_lr(0.01)
        opt.set_learning_rate_scheduler(lrs)

--- a/aidge_interop_torch/utils.py
+++ b/aidge_interop_torch/utils.py
@@ -98,6 +98,7 @@ class AidgeModule(torch.nn.Module):
        self.input_nodes = [None]
        self.scheduler = None
        self.optimizer = None
+        self.grad_compiled = False

    def set_optimizer(self, opt):
        self.optimizer = opt
@@ -150,6 +151,8 @@ class AidgeModule(torch.nn.Module):

            @staticmethod
            def backward(ctx, grad_output):
+                if not self.grad_compiled: aidge_core.compile_gradient(self._graph_view)
+
                if self.multi_outputs_flag:
                    raise RuntimeError(
                        "Backward is not possible if the model has multi-outputs")
@@ -158,17 +161,24 @@ class AidgeModule(torch.nn.Module):
                        "Multi-input is not handled for now in pytorch backpropagation")
                # convert the output gradient to an AIDGE Tensor
                aidge_grad_output = torch_tensor_to_aidge(grad_output)
+
+                if len(self._graph_view.get_output_nodes()) != 1:
+                    RuntimeError(
+                        f"We only support one output got {len(self._graph_view.get_output_nodes())}")
+                output_node = list(self._graph_view.get_output_nodes())[0]
+                output_tensor = output_node.get_operator().get_output(0)
+                output_tensor.set_grad(aidge_grad_output)
+
                # run the backpropagation
                # TODO: remove update from the backprop
                self.optimizer.reset_grad()
-                self.scheduler.backward([aidge_grad_output])
+                self.scheduler.backward()
                self.optimizer.update()
                # get grad of first layer no handling of multi input
-                g = self.input_nodes[0].get_operator().get_output(0).grad()
+                aidge_out_grad = self.input_nodes[0].get_operator().get_output(0).grad()
                # convert grad to torch
-                t = aidge_tensor_to_torch(g)
-                return t
-
+                torch_out_grad = aidge_tensor_to_torch(aidge_out_grad)
+                return torch_out_grad

        # If the layer is at the beginning of the network requires grad is False.
        inputs.requires_grad = True