diff --git a/aidge_backend_cuda/__init__.py b/aidge_backend_cuda/__init__.py
index 7b57dd6b1704b16e2b9869c975ea1feb625f5bcf..889bf0043d14fc8613ab01a461fd9d2ba98d6ad3 100644
--- a/aidge_backend_cuda/__init__.py
+++ b/aidge_backend_cuda/__init__.py
@@ -1,2 +1,2 @@
 from aidge_backend_cuda.aidge_backend_cuda import *  # import so generated by PyBind
-
+from . import benchmark
diff --git a/aidge_backend_cuda/benchmark.py b/aidge_backend_cuda/benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..a163c09a3b58739a54cf95c6931d995e0fd25db4
--- /dev/null
+++ b/aidge_backend_cuda/benchmark.py
@@ -0,0 +1,47 @@
+import time
+
+import numpy as np
+
+import aidge_core
+import aidge_backend_cuda
+
+def measure_inference_time(model: aidge_core.GraphView, input_data: list[str, np.ndarray], nb_warmup: int = 10, nb_iterations: int = 50) -> list[float]:
+    # update model and inputs backend
+    model.set_backend("cuda")
+    ordered_inputs = [aidge_core.Tensor(i[1]) for i in input_data]
+    for ordered_input in ordered_inputs:
+        ordered_input.set_backend("cuda")
+
+    scheduler = aidge_core.SequentialScheduler(model)
+    scheduler.generate_scheduling()
+
+    timings = []
+    # Warm-up runs.
+    for i in range(nb_warmup + nb_iterations):
+        if i < nb_warmup:
+            scheduler.forward(forward_dims=False, data=ordered_inputs)
+        else:
+            start = time.process_time()
+            scheduler.forward(forward_dims=False, data=ordered_inputs)
+            end = time.process_time()
+            timings.append((end - start))
+    return timings
+
+def compute_output(model: aidge_core.GraphView, input_data: list[str, np.ndarray]) -> list[np.ndarray]:
+    # update model and inputs backend
+    model.set_backend("cuda", device = 1)
+    ordered_inputs = [aidge_core.Tensor(i[1]) for i in input_data]
+    for ordered_input in ordered_inputs:
+        ordered_input.set_backend("cuda", device = 1)
+
+    scheduler = aidge_core.SequentialScheduler(model)
+    scheduler.generate_scheduling()
+
+    scheduler.forward(forward_dims=False, data=ordered_inputs)
+    outs = []
+    for pair in model.get_ordered_outputs():
+        t = pair[0].get_operator().get_output(pair[1])
+        t.set_backend("cpu")
+        outs.append(t)
+
+    return [np.array(out) for out in outs]
\ No newline at end of file