diff --git a/unit_tests/optimizer/Test_Adam.cpp b/unit_tests/optimizer/Test_Adam.cpp
index bd297903d47b90b755ff59ace0e052aa62c309d7..2e9eb8be1cd1a4acbe60f55d1b1c28da412d8a80 100644
--- a/unit_tests/optimizer/Test_Adam.cpp
+++ b/unit_tests/optimizer/Test_Adam.cpp
@@ -30,6 +30,15 @@
 #include "aidge/backend/cpu/operator/SqrtImpl.hpp"
 #include "aidge/utils/TensorUtils.hpp"
 
+#if USE_AIDGE_BACKEND_CUDA
+#include "aidge/backend/cuda/data/TensorImpl.hpp"
+#include "aidge/backend/cuda/operator/AddImpl.hpp"
+#include "aidge/backend/cuda/operator/MulImpl.hpp"
+#include "aidge/backend/cuda/operator/SubImpl.hpp"
+#include "aidge/backend/cuda/operator/DivImpl.hpp"
+#include "aidge/backend/cuda/operator/SqrtImpl.hpp"
+#endif
+
 namespace Aidge {
 TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") {
     constexpr std::uint16_t NBTRIALS = 10;
@@ -41,105 +50,242 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") {
     std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5));
     std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
 
+    SECTION("CPU") {
+        for (std::size_t trial = 0; trial < NBTRIALS; ++trial) {
+            // create a random number of Tensor with random dims and random values
+            // Create random Tensor, Random Gradient and random
+            const std::size_t nb_tensors = dimSizeDist(gen);
+            std::vector<std::size_t> size_tensors(nb_tensors, 1);
 
-    for (std::size_t trial = 0; trial < NBTRIALS; ++trial) {
-        // create a random number of Tensor with random dims and random values
-        // Create random Tensor, Random Gradient and random
-        const std::size_t nb_tensors = dimSizeDist(gen);
-        std::vector<std::size_t> size_tensors(nb_tensors, 1);
+            std::vector<std::shared_ptr<Tensor>> tensors(nb_tensors);
+            std::vector<std::unique_ptr<float[]>> val_tensors(nb_tensors);
 
-        std::vector<std::shared_ptr<Tensor>> tensors(nb_tensors);
-        std::vector<std::unique_ptr<float[]>> val_tensors(nb_tensors);
+            std::vector<std::shared_ptr<Tensor>> optim_tensors(nb_tensors);
 
-        std::vector<std::shared_ptr<Tensor>> optim_tensors(nb_tensors);
+            std::vector<std::shared_ptr<Tensor>> grad_tensors(nb_tensors);
+            std::vector<std::unique_ptr<float[]>> val_grad_tensors(nb_tensors);
 
-        std::vector<std::shared_ptr<Tensor>> grad_tensors(nb_tensors);
-        std::vector<std::unique_ptr<float[]>> val_grad_tensors(nb_tensors);
+            std::vector<std::shared_ptr<Tensor>> momentum_tensors(nb_tensors);
+            std::vector<std::unique_ptr<float[]>> val_momentum1_tensors(nb_tensors);
+            std::vector<std::unique_ptr<float[]>> val_momentum2_tensors(nb_tensors);
 
-        std::vector<std::shared_ptr<Tensor>> momentum_tensors(nb_tensors);
-        std::vector<std::unique_ptr<float[]>> val_momentum1_tensors(nb_tensors);
-		std::vector<std::unique_ptr<float[]>> val_momentum2_tensors(nb_tensors);
+            for (std::size_t i = 0; i < nb_tensors; ++i) {
+                std::vector<std::size_t> dims(nbDimsDist(gen));
+                for (std::size_t d = 0; d < dims.size(); ++d) {
+                    dims[d] = dimSizeDist(gen);
+                    size_tensors[i] *= dims[d];
+                }
 
-        for (std::size_t i = 0; i < nb_tensors; ++i) {
-            std::vector<std::size_t> dims(nbDimsDist(gen));
-            for (std::size_t d = 0; d < dims.size(); ++d) {
-                dims[d] = dimSizeDist(gen);
-                size_tensors[i] *= dims[d];
-            }
+                val_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
+                val_grad_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
+                val_momentum1_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
+                val_momentum2_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
+                for (std::size_t j = 0; j < size_tensors[i]; ++j) {
+                    val_tensors[i][j] = valueDist(gen);
+                    val_grad_tensors[i][j] = valueDist(gen);
+                    val_momentum1_tensors[i][j] = 0.0f;
+                    val_momentum2_tensors[i][j] = 0.0f;
+                }
+                tensors[i] = std::make_shared<Tensor>(dims);
+                tensors[i]->setBackend("cpu");
+                tensors[i]->getImpl()->setRawPtr(val_tensors[i].get(), size_tensors[i]);
+                optim_tensors[i] = std::make_shared<Tensor>(dims);
+                optim_tensors[i]->setBackend("cpu");
+                optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]);
+                // optim_tensors[i]->initGrad();
 
-            val_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
-            val_grad_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
-            val_momentum1_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
-            val_momentum2_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
-            for (std::size_t j = 0; j < size_tensors[i]; ++j) {
-                val_tensors[i][j] = valueDist(gen);
-                val_grad_tensors[i][j] = valueDist(gen);
-                val_momentum1_tensors[i][j] = 0.0f;
-				val_momentum2_tensors[i][j] = 0.0f;
-            }
-            tensors[i] = std::make_shared<Tensor>(dims);
-            tensors[i]->setBackend("cpu");
-            tensors[i]->getImpl()->setRawPtr(val_tensors[i].get(), size_tensors[i]);
-            optim_tensors[i] = std::make_shared<Tensor>(dims);
-            optim_tensors[i]->setBackend("cpu");
-            optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]);
-            // optim_tensors[i]->initGrad();
-
-            grad_tensors[i] = std::make_shared<Tensor>(dims);
-            grad_tensors[i]->setBackend("cpu");
-            grad_tensors[i]->getImpl()->setRawPtr(val_grad_tensors[i].get(), size_tensors[i]);
-
-            momentum_tensors[i] = std::make_shared<Tensor>(dims);
-            momentum_tensors[i]->setBackend("cpu");
-            momentum_tensors[i]->getImpl()->setRawPtr(val_momentum1_tensors[i].get(), size_tensors[i]);
-            momentum_tensors[i]->getImpl()->setRawPtr(val_momentum2_tensors[i].get(), size_tensors[i]);
-
-            REQUIRE((tensors[i]->hasImpl() &&
-                    optim_tensors[i]->hasImpl() &&
-                    grad_tensors[i]->hasImpl()));
-        }
+                grad_tensors[i] = std::make_shared<Tensor>(dims);
+                grad_tensors[i]->setBackend("cpu");
+                grad_tensors[i]->getImpl()->setRawPtr(val_grad_tensors[i].get(), size_tensors[i]);
 
-        // generate parameters
-        float lr = paramDist(gen);
-        float beta1 = paramDist(gen);
-        float beta2 = paramDist(gen);
-        float epsilon = paramDist(gen);
-
-        // set Optimizer
-        Adam opt = Adam(beta1, beta2, epsilon);
-        opt.setParameters(optim_tensors);
-        for (std::size_t t = 0; t < nb_tensors; ++t) {
-            optim_tensors[t]->grad()->getImpl()->setRawPtr(val_grad_tensors[t].get(), size_tensors[t]);
-        }
-        opt.setLearningRateScheduler(learning::ConstantLR(lr));
+                momentum_tensors[i] = std::make_shared<Tensor>(dims);
+                momentum_tensors[i]->setBackend("cpu");
+                momentum_tensors[i]->getImpl()->setRawPtr(val_momentum1_tensors[i].get(), size_tensors[i]);
+                momentum_tensors[i]->getImpl()->setRawPtr(val_momentum2_tensors[i].get(), size_tensors[i]);
 
-        for (std::size_t t = 0; t < nb_tensors; ++t) {
-            const Tensor tmpt1= *(opt.parameters().at(t));
-            const Tensor tmpt2= *tensors[t];
-            REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-8f));
-        }
+                REQUIRE((tensors[i]->hasImpl() &&
+                        optim_tensors[i]->hasImpl() &&
+                        grad_tensors[i]->hasImpl()));
+            }
+
+            // generate parameters
+            float lr = paramDist(gen);
+            float beta1 = paramDist(gen);
+            float beta2 = paramDist(gen);
+            float epsilon = paramDist(gen);
 
-        for (std::size_t step = 0; step < 10; ++step) {
-            // truth
-            float lr2 = lr * std::sqrt(1.0f - std::pow(beta2, step + 1)) / (1.0f - std::pow(beta1, step + 1));
-            float epsilon2 = epsilon * std::sqrt(1.0f - std::pow(beta2, step + 1));
+            // set Optimizer
+            Adam opt = Adam(beta1, beta2, epsilon);
+            opt.setParameters(optim_tensors);
             for (std::size_t t = 0; t < nb_tensors; ++t) {
-                for (std::size_t i = 0; i < size_tensors[t]; ++i) {
-                    val_momentum1_tensors[t][i] = beta1 * val_momentum1_tensors[t][i] + (1.0f - beta1) * val_grad_tensors[t][i];
-                    val_momentum2_tensors[t][i] = beta2 * val_momentum2_tensors[t][i] + (1.0f - beta2) * val_grad_tensors[t][i] * val_grad_tensors[t][i];
-                    val_tensors[t][i] = val_tensors[t][i]
-                                      - lr2 * val_momentum1_tensors[t][i] / (std::sqrt(val_momentum2_tensors[t][i]) +  epsilon2);
-                }
+                optim_tensors[t]->grad()->getImpl()->setRawPtr(val_grad_tensors[t].get(), size_tensors[t]);
             }
-            // optimizer
-            opt.update();
-            // tests
+            opt.setLearningRateScheduler(learning::ConstantLR(lr));
+
             for (std::size_t t = 0; t < nb_tensors; ++t) {
                 const Tensor tmpt1= *(opt.parameters().at(t));
                 const Tensor tmpt2= *tensors[t];
                 REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-8f));
             }
+
+            for (std::size_t step = 0; step < 10; ++step) {
+                // truth
+                float lr2 = lr * std::sqrt(1.0f - std::pow(beta2, step + 1)) / (1.0f - std::pow(beta1, step + 1));
+                float epsilon2 = epsilon * std::sqrt(1.0f - std::pow(beta2, step + 1));
+                for (std::size_t t = 0; t < nb_tensors; ++t) {
+                    for (std::size_t i = 0; i < size_tensors[t]; ++i) {
+                        val_momentum1_tensors[t][i] = beta1 * val_momentum1_tensors[t][i] + (1.0f - beta1) * val_grad_tensors[t][i];
+                        val_momentum2_tensors[t][i] = beta2 * val_momentum2_tensors[t][i] + (1.0f - beta2) * val_grad_tensors[t][i] * val_grad_tensors[t][i];
+                        val_tensors[t][i] = val_tensors[t][i]
+                                        - lr2 * val_momentum1_tensors[t][i] / (std::sqrt(val_momentum2_tensors[t][i]) +  epsilon2);
+                    }
+                }
+                // optimizer
+                opt.update();
+                // tests
+                for (std::size_t t = 0; t < nb_tensors; ++t) {
+                    const Tensor tmpt1= *(opt.parameters().at(t));
+                    const Tensor tmpt2= *tensors[t];
+                    REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-8f));
+                }
+            }
+        }
+    }
+
+#if USE_AIDGE_BACKEND_CUDA
+    SECTION("CUDA") {
+        for (std::size_t trial = 0; trial < NBTRIALS; ++trial) {
+            // create a random number of Tensor with random dims and random values
+            // Create random Tensor, Random Gradient and random
+            const std::size_t nb_tensors = dimSizeDist(gen);
+            std::vector<std::size_t> size_tensors(nb_tensors, 1);
+
+            std::vector<std::shared_ptr<Tensor>> tensors(nb_tensors);
+            std::vector<std::unique_ptr<float[]>> val_tensors(nb_tensors);
+
+            std::vector<std::shared_ptr<Tensor>> optim_tensors(nb_tensors);
+
+            std::vector<std::shared_ptr<Tensor>> grad_tensors(nb_tensors);
+            std::vector<std::unique_ptr<float[]>> val_grad_tensors(nb_tensors);
+
+            std::vector<std::shared_ptr<Tensor>> momentum_tensors(nb_tensors);
+            std::vector<std::unique_ptr<float[]>> val_momentum1_tensors(nb_tensors);
+            std::vector<std::unique_ptr<float[]>> val_momentum2_tensors(nb_tensors);
+
+            // Device pointers
+            std::vector<float*> d_val_tensors(nb_tensors);
+            std::vector<float*> d_val_grad_tensors(nb_tensors);
+            std::vector<float*> d_val_momentum1_tensors(nb_tensors);
+            std::vector<float*> d_val_momentum2_tensors(nb_tensors);
+
+            for (std::size_t i = 0; i < nb_tensors; ++i) {
+                std::vector<std::size_t> dims(nbDimsDist(gen));
+                for (std::size_t d = 0; d < dims.size(); ++d) {
+                    dims[d] = dimSizeDist(gen);
+                    size_tensors[i] *= dims[d];
+                }
+
+                val_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
+                val_grad_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
+                val_momentum1_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
+                val_momentum2_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
+                for (std::size_t j = 0; j < size_tensors[i]; ++j) {
+                    val_tensors[i][j] = valueDist(gen);
+                    val_grad_tensors[i][j] = valueDist(gen);
+                    val_momentum1_tensors[i][j] = 0.0f;
+                    val_momentum2_tensors[i][j] = 0.0f;
+                }
+
+                // Allocate device memory
+                cudaMalloc(&d_val_tensors[i], size_tensors[i] * sizeof(float));
+                cudaMalloc(&d_val_grad_tensors[i], size_tensors[i] * sizeof(float));
+                cudaMalloc(&d_val_momentum1_tensors[i], size_tensors[i] * sizeof(float));
+                cudaMalloc(&d_val_momentum1_tensors[i], size_tensors[i] * sizeof(float));
+
+                // Copy data to device
+                cudaMemcpy(d_val_tensors[i], val_tensors[i].get(), size_tensors[i] * sizeof(float), cudaMemcpyHostToDevice);
+                cudaMemcpy(d_val_grad_tensors[i], val_grad_tensors[i].get(), size_tensors[i] * sizeof(float), cudaMemcpyHostToDevice);
+                cudaMemcpy(d_val_momentum1_tensors[i], val_momentum1_tensors[i].get(), size_tensors[i] * sizeof(float), cudaMemcpyHostToDevice);
+                cudaMemcpy(d_val_momentum2_tensors[i], val_momentum2_tensors[i].get(), size_tensors[i] * sizeof(float), cudaMemcpyHostToDevice);
+
+                tensors[i] = std::make_shared<Tensor>(dims);
+                tensors[i]->setBackend("cuda");
+                tensors[i]->getImpl()->setRawPtr(d_val_tensors[i], size_tensors[i]);
+
+                optim_tensors[i] = std::make_shared<Tensor>(dims);
+                optim_tensors[i]->setBackend("cuda");
+                optim_tensors[i]->getImpl()->copy(d_val_tensors[i], size_tensors[i]);
+                // optim_tensors[i]->initGrad();
+
+                grad_tensors[i] = std::make_shared<Tensor>(dims);
+                grad_tensors[i]->setBackend("cuda");
+                grad_tensors[i]->getImpl()->setRawPtr(d_val_grad_tensors[i], size_tensors[i]);
+
+                momentum_tensors[i] = std::make_shared<Tensor>(dims);
+                momentum_tensors[i]->setBackend("cuda");
+                momentum_tensors[i]->getImpl()->setRawPtr(d_val_momentum1_tensors[i], size_tensors[i]);
+                momentum_tensors[i]->getImpl()->setRawPtr(d_val_momentum2_tensors[i], size_tensors[i]);
+
+                REQUIRE((tensors[i]->hasImpl() &&
+                        optim_tensors[i]->hasImpl() &&
+                        grad_tensors[i]->hasImpl()));
+            }
+
+            // generate parameters
+            float lr = paramDist(gen);
+            float beta1 = paramDist(gen);
+            float beta2 = paramDist(gen);
+            float epsilon = paramDist(gen);
+
+            // set Optimizer
+            Adam opt = Adam(beta1, beta2, epsilon);
+            opt.setParameters(optim_tensors);
+            for (std::size_t t = 0; t < nb_tensors; ++t) {
+                optim_tensors[t]->grad()->getImpl()->setRawPtr(d_val_grad_tensors[t], size_tensors[t]);
+            }
+            opt.setLearningRateScheduler(learning::ConstantLR(lr));
+
+            for (std::size_t t = 0; t < nb_tensors; ++t) {
+                float* temp1   = new float[opt.parameters().at(t)->size()]();
+                cudaMemcpy(temp1, opt.parameters().at(t)->getImpl()->rawPtr(), sizeof(float) * opt.parameters().at(t)->size(), cudaMemcpyDeviceToHost);
+                float* temp2   = new float[tensors[t]->size()]();
+                cudaMemcpy(temp2, tensors[t]->getImpl()->rawPtr(), sizeof(float) * tensors[t]->size(), cudaMemcpyDeviceToHost);
+                for (size_t i = 0; i < tensors[t]->size(); ++i) {
+                    static_cast<float>(std::abs(temp1[i] - temp2[i])) > (1e-8f + (1e-5f * static_cast<float>(std::abs(temp1[i]))));
+                }
+            }
+
+            for (std::size_t step = 0; step < 10; ++step) {
+                // truth
+                float lr2 = lr * std::sqrt(1.0f - std::pow(beta2, step + 1)) / (1.0f - std::pow(beta1, step + 1));
+                float epsilon2 = epsilon * std::sqrt(1.0f - std::pow(beta2, step + 1));
+                for (std::size_t t = 0; t < nb_tensors; ++t) {
+                    for (std::size_t i = 0; i < size_tensors[t]; ++i) {
+                        val_momentum1_tensors[t][i] = beta1 * val_momentum1_tensors[t][i] + (1.0f - beta1) * val_grad_tensors[t][i];
+                        val_momentum2_tensors[t][i] = beta2 * val_momentum2_tensors[t][i] + (1.0f - beta2) * val_grad_tensors[t][i] * val_grad_tensors[t][i];
+                        val_tensors[t][i] = val_tensors[t][i]
+                                        - lr2 * val_momentum1_tensors[t][i] / (std::sqrt(val_momentum2_tensors[t][i]) +  epsilon2);
+                    }
+                    cudaMemcpy(d_val_momentum1_tensors[t], val_momentum1_tensors[t].get(), size_tensors[t] * sizeof(float), cudaMemcpyHostToDevice);
+                    cudaMemcpy(d_val_momentum2_tensors[t], val_momentum2_tensors[t].get(), size_tensors[t] * sizeof(float), cudaMemcpyHostToDevice);
+                    cudaMemcpy(d_val_tensors[t], val_tensors[t].get(), size_tensors[t] * sizeof(float), cudaMemcpyHostToDevice);
+                }
+
+                // optimizer
+                opt.update();
+                // tests
+                for (std::size_t t = 0; t < nb_tensors; ++t) {
+                    float* temp1   = new float[opt.parameters().at(t)->size()]();
+                    cudaMemcpy(temp1, opt.parameters().at(t)->getImpl()->rawPtr(), sizeof(float) * opt.parameters().at(t)->size(), cudaMemcpyDeviceToHost);
+                    float* temp2   = new float[tensors[t]->size()]();
+                    cudaMemcpy(temp2, tensors[t]->getImpl()->rawPtr(), sizeof(float) * tensors[t]->size(), cudaMemcpyDeviceToHost);
+                    for (size_t i = 0; i < tensors[t]->size(); ++i) {
+                        static_cast<float>(std::abs(temp1[i] - temp2[i])) > (1e-8f + (1e-5f * static_cast<float>(std::abs(temp1[i]))));
+                    }
+                }
+            }
         }
     }
+#endif
 }
 } // namespace Aidge