diff --git a/include/aidge/backend/cuda/operator/AddImpl.hpp b/include/aidge/backend/cuda/operator/AddImpl.hpp
index 429d6f1b04489d9e38ce96d584a1ce9528dd0b2d..42d420f8410f79100fdfdbe3eabb8b43e616a74a 100644
--- a/include/aidge/backend/cuda/operator/AddImpl.hpp
+++ b/include/aidge/backend/cuda/operator/AddImpl.hpp
@@ -36,7 +36,7 @@ public:
         return std::make_unique<AddImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/AndImpl.hpp b/include/aidge/backend/cuda/operator/AndImpl.hpp
index 4105ec87db2c58e218c629a1c94f31efd37c80ee..e90a4c5fe3d7b4cd529dcb4cb5400a6447f53e3c 100644
--- a/include/aidge/backend/cuda/operator/AndImpl.hpp
+++ b/include/aidge/backend/cuda/operator/AndImpl.hpp
@@ -36,7 +36,7 @@ public:
         return std::make_unique<AndImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/ArgMaxImpl.hpp b/include/aidge/backend/cuda/operator/ArgMaxImpl.hpp
index a89aebf96914f258f6be616b940ec195ec9ae2a9..7b4628084a913a10e48302597a4d5b77fb7f6d16 100644
--- a/include/aidge/backend/cuda/operator/ArgMaxImpl.hpp
+++ b/include/aidge/backend/cuda/operator/ArgMaxImpl.hpp
@@ -36,7 +36,7 @@ public:
         return std::make_unique<ArgMaxImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cuda/operator/AvgPoolingImpl.hpp
index 7f8fb4075affd3e5f17533ea67b051dbb6395f04..1c4efcf66850330fe9747c500093efa4456fa3f1 100644
--- a/include/aidge/backend/cuda/operator/AvgPoolingImpl.hpp
+++ b/include/aidge/backend/cuda/operator/AvgPoolingImpl.hpp
@@ -37,7 +37,7 @@ public:
         return std::make_unique<AvgPoolingImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/BatchNormImpl.hpp b/include/aidge/backend/cuda/operator/BatchNormImpl.hpp
index 5ba8656ef8a25ffa53584641a938f637ecff9b94..025ef406fa6a988e758707b11fb2ceab6c829f26 100644
--- a/include/aidge/backend/cuda/operator/BatchNormImpl.hpp
+++ b/include/aidge/backend/cuda/operator/BatchNormImpl.hpp
@@ -37,7 +37,7 @@ public:
         return std::make_unique<BatchNormImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/ConvImpl.hpp b/include/aidge/backend/cuda/operator/ConvImpl.hpp
index ce94ec6695735c93d5c8d0acfdc6153e91e7147d..27f3781a6824dd71d228b90c71df58b12ea0a6b3 100644
--- a/include/aidge/backend/cuda/operator/ConvImpl.hpp
+++ b/include/aidge/backend/cuda/operator/ConvImpl.hpp
@@ -43,7 +43,7 @@ public:
         return std::make_unique<ConvImpl_cuda<DIM>>(op, true);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Any}
         };
diff --git a/include/aidge/backend/cuda/operator/DivImpl.hpp b/include/aidge/backend/cuda/operator/DivImpl.hpp
index 4b15445cb791aa1cf2520018d1015e19aaf10ce3..fbd3c73f1741d05549f06290ba9166b8d11c604d 100644
--- a/include/aidge/backend/cuda/operator/DivImpl.hpp
+++ b/include/aidge/backend/cuda/operator/DivImpl.hpp
@@ -36,7 +36,7 @@ public:
         return std::make_unique<DivImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/FCImpl.hpp b/include/aidge/backend/cuda/operator/FCImpl.hpp
index f2dd0c90c0096a1b57fb6860e5991d0c1e824be9..8380754ea2419b2baff6de5126f8b6ff3e640178 100644
--- a/include/aidge/backend/cuda/operator/FCImpl.hpp
+++ b/include/aidge/backend/cuda/operator/FCImpl.hpp
@@ -36,7 +36,7 @@ public:
         return std::make_unique<FCImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/GlobalAveragePoolingImpl.hpp b/include/aidge/backend/cuda/operator/GlobalAveragePoolingImpl.hpp
index 3f0386dcfa68d4b55bebeb524dfedfd5edeb0fe9..5b0cf07ab8687b9746d13af2274465ad923e6571 100644
--- a/include/aidge/backend/cuda/operator/GlobalAveragePoolingImpl.hpp
+++ b/include/aidge/backend/cuda/operator/GlobalAveragePoolingImpl.hpp
@@ -36,7 +36,7 @@ public:
         return std::make_unique<GlobalAveragePoolingImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Any}
         };
diff --git a/include/aidge/backend/cuda/operator/ILayerNormImpl.hpp b/include/aidge/backend/cuda/operator/ILayerNormImpl.hpp
index 742401de7903f19ab4d8f51a153b0e864f21dd47..0d858c4719899094f996ca4f82f075df547a6fd4 100644
--- a/include/aidge/backend/cuda/operator/ILayerNormImpl.hpp
+++ b/include/aidge/backend/cuda/operator/ILayerNormImpl.hpp
@@ -37,7 +37,7 @@ public:
         return std::make_unique<ILayerNormImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/LnImpl.hpp b/include/aidge/backend/cuda/operator/LnImpl.hpp
index 1617754fbf5dd52e099a9787a25a827851933af9..fbbccc11275b5c11bbaa86d05a2c19a1a46c11c1 100644
--- a/include/aidge/backend/cuda/operator/LnImpl.hpp
+++ b/include/aidge/backend/cuda/operator/LnImpl.hpp
@@ -36,7 +36,7 @@ public:
         return std::make_unique<LnImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/MaxPoolingImpl.hpp b/include/aidge/backend/cuda/operator/MaxPoolingImpl.hpp
index a203e761beaeccec96b36bbd5a424a193cdb6387..474a408f9697e8e91ffe9c8e2a79a79d7968e80a 100644
--- a/include/aidge/backend/cuda/operator/MaxPoolingImpl.hpp
+++ b/include/aidge/backend/cuda/operator/MaxPoolingImpl.hpp
@@ -37,7 +37,7 @@ public:
         return std::make_unique<MaxPoolingImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Any}
         };
diff --git a/include/aidge/backend/cuda/operator/MulImpl.hpp b/include/aidge/backend/cuda/operator/MulImpl.hpp
index 37d3d5a0df7b63dc63ad13737d8a8b463bf315c8..9a1a4d79d32c7a962d2086319d948e60a9f51049 100644
--- a/include/aidge/backend/cuda/operator/MulImpl.hpp
+++ b/include/aidge/backend/cuda/operator/MulImpl.hpp
@@ -36,7 +36,7 @@ public:
         return std::make_unique<MulImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/PadImpl.hpp b/include/aidge/backend/cuda/operator/PadImpl.hpp
index d51361d6ee5a3ec9a858d290b3f5fe5251b6fa97..a0f7037c811cd3cb130cffed0bb7746e33220074 100644
--- a/include/aidge/backend/cuda/operator/PadImpl.hpp
+++ b/include/aidge/backend/cuda/operator/PadImpl.hpp
@@ -37,7 +37,7 @@ public:
         return std::make_unique<PadImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/PowImpl.hpp b/include/aidge/backend/cuda/operator/PowImpl.hpp
index 403648d9a294ee598f117c8b05e6f0875e998307..9b53d8dc04985794238f79cff9c78c44408fb6d7 100644
--- a/include/aidge/backend/cuda/operator/PowImpl.hpp
+++ b/include/aidge/backend/cuda/operator/PowImpl.hpp
@@ -36,7 +36,7 @@ public:
         return std::make_unique<PowImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/ReLUImpl.hpp b/include/aidge/backend/cuda/operator/ReLUImpl.hpp
index 344923ba1ee08642a3e3e5f685bfd2c7de8a74b4..306a56c4d0959dc4d818a6791173c375f5435360 100644
--- a/include/aidge/backend/cuda/operator/ReLUImpl.hpp
+++ b/include/aidge/backend/cuda/operator/ReLUImpl.hpp
@@ -36,7 +36,7 @@ public:
         return std::make_unique<ReLUImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Any}
         };
diff --git a/include/aidge/backend/cuda/operator/ReduceMeanImpl.hpp b/include/aidge/backend/cuda/operator/ReduceMeanImpl.hpp
index a50ff21b35f0b062c6a9c327ea2892c15055a175..1f6878480d69e19f8c73a12862cc12b2d675440d 100644
--- a/include/aidge/backend/cuda/operator/ReduceMeanImpl.hpp
+++ b/include/aidge/backend/cuda/operator/ReduceMeanImpl.hpp
@@ -36,7 +36,7 @@ public:
         return std::make_unique<ReduceMeanImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/ReduceSumImpl.hpp b/include/aidge/backend/cuda/operator/ReduceSumImpl.hpp
index a5a7ae48d7e5bd8f370964d7f81795ecbaa5986b..10af90ba3a4ffc1d1464dd73f15313315b0c0032 100644
--- a/include/aidge/backend/cuda/operator/ReduceSumImpl.hpp
+++ b/include/aidge/backend/cuda/operator/ReduceSumImpl.hpp
@@ -36,7 +36,7 @@ public:
         return std::make_unique<ReduceSumImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/ReshapeImpl.hpp b/include/aidge/backend/cuda/operator/ReshapeImpl.hpp
index d412590c63f925806973038d67ee18e0847f79c2..2c8ebd68cff0313031279f83109043eb17d919b5 100644
--- a/include/aidge/backend/cuda/operator/ReshapeImpl.hpp
+++ b/include/aidge/backend/cuda/operator/ReshapeImpl.hpp
@@ -36,7 +36,7 @@ public:
         return std::make_unique<ReshapeImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/ShiftGELUImpl.hpp b/include/aidge/backend/cuda/operator/ShiftGELUImpl.hpp
index f83b41ae139482cdb0cd1060846c77ba78fcc0ee..1eff6dfbb1777d8dbd823d7bc9b94894bb2646b9 100644
--- a/include/aidge/backend/cuda/operator/ShiftGELUImpl.hpp
+++ b/include/aidge/backend/cuda/operator/ShiftGELUImpl.hpp
@@ -37,7 +37,7 @@ public:
         return std::make_unique<ShiftGELUImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/ShiftMaxImpl.hpp b/include/aidge/backend/cuda/operator/ShiftMaxImpl.hpp
index 707b5616fde120f7e8ef38e6dc9f1552cfdb0d59..3e6e3744cb544d0928a9229aa5110cf776f0c507 100644
--- a/include/aidge/backend/cuda/operator/ShiftMaxImpl.hpp
+++ b/include/aidge/backend/cuda/operator/ShiftMaxImpl.hpp
@@ -37,7 +37,7 @@ public:
         return std::make_unique<ShiftMaxImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/SigmoidImpl.hpp b/include/aidge/backend/cuda/operator/SigmoidImpl.hpp
index bc29b9e5f53716641a692cd63c29f4600f3cdd02..dc1434c8ecc8568bd4f82c7c7ce5db78cc1885a9 100644
--- a/include/aidge/backend/cuda/operator/SigmoidImpl.hpp
+++ b/include/aidge/backend/cuda/operator/SigmoidImpl.hpp
@@ -36,7 +36,7 @@ public:
         return std::make_unique<SigmoidImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Any}
         };
diff --git a/include/aidge/backend/cuda/operator/SubImpl.hpp b/include/aidge/backend/cuda/operator/SubImpl.hpp
index 45c833f3e7f9f25258469a4d1e34e8598df068ef..529d0b2b2dd4a0ec8a3dae5bf0219f8a4f2968c6 100644
--- a/include/aidge/backend/cuda/operator/SubImpl.hpp
+++ b/include/aidge/backend/cuda/operator/SubImpl.hpp
@@ -36,7 +36,7 @@ public:
         return std::make_unique<SubImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Float64},
             {DataType::Float32},
diff --git a/include/aidge/backend/cuda/operator/TanhImpl.hpp b/include/aidge/backend/cuda/operator/TanhImpl.hpp
index 166acd6adee397a3f284363a9db1e71152467b94..a87d7bd8c318149cb625a3cf0122f7eac1ea6149 100644
--- a/include/aidge/backend/cuda/operator/TanhImpl.hpp
+++ b/include/aidge/backend/cuda/operator/TanhImpl.hpp
@@ -36,7 +36,7 @@ public:
         return std::make_unique<TanhImpl_cuda>(op);
     }
 
-    virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
+    virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
         return {
             {DataType::Any}
         };
diff --git a/unit_tests/Test_AddImpl.cpp b/unit_tests/Test_AddImpl.cpp
index b8129175d88323c896244e531f1dd52a5cbaa19e..dffabe6aab92bdfdd0c79b61ab59e9bc6efb9d94 100644
--- a/unit_tests/Test_AddImpl.cpp
+++ b/unit_tests/Test_AddImpl.cpp
@@ -22,48 +22,27 @@
 using namespace Aidge;
 
 TEST_CASE("[gpu/operator] Add(forward)", "[Add][GPU]") {
-    std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<float,3,3,3,2> {
-        {                                       //
-            {                                   //
-                {{20, 47},{21, 48},{22, 49}},   //
-                {{23, 50},{24, 51},{25, 52}},   //
-                {{26, 53},{27, 54},{28, 55}}    //
-            },                                  //
-            {                                   //
-                {{29, 56},{30, 57},{31, 58}},   //
-                {{32, 59},{33, 60},{34, 61}},   //
-                {{35, 62},{36, 63},{37, 64}}    //
-            },                                  //
-            {                                   //
-                {{38, 65},{39, 66},{40, 67}},   //
-                {{41, 68},{42, 69},{43, 70}},   //
-                {{44, 71},{45, 72},{46, 73}}    //
-            }                                   //
-        }                                       //
-    });                                         //
-    input1->setBackend("cuda");
-    SECTION("One input") {
-        std::shared_ptr<Node> myAdd = Add(1);
-        auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator());
-        op->associateInput(0, input1);
-        op->setBackend("cuda");
-        op->setDataType(DataType::Float32);
-        myAdd->forward();
-
-        float* computedOutput   = new float[input1->size()]();
-        cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * input1->size(), cudaMemcpyDeviceToHost);
-        float* targetOutput   = new float[input1->size()]();
-        cudaMemcpy(targetOutput, input1->getImpl()->rawPtr(), sizeof(float) * input1->size(), cudaMemcpyDeviceToHost);
-
-        for(int i = 0; i < input1->size(); i++){
-            REQUIRE(fabs(computedOutput[i] - targetOutput[i]) < 1e-6);
-        }
-
-        delete[] computedOutput;
-        delete[] targetOutput;
-    }
-
-    SECTION("Two inputs") {
+    SECTION("Same input") {
+        std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<float,3,3,3,2> {
+            {                                       //
+                {                                   //
+                    {{20, 47},{21, 48},{22, 49}},   //
+                    {{23, 50},{24, 51},{25, 52}},   //
+                    {{26, 53},{27, 54},{28, 55}}    //
+                },                                  //
+                {                                   //
+                    {{29, 56},{30, 57},{31, 58}},   //
+                    {{32, 59},{33, 60},{34, 61}},   //
+                    {{35, 62},{36, 63},{37, 64}}    //
+                },                                  //
+                {                                   //
+                    {{38, 65},{39, 66},{40, 67}},   //
+                    {{41, 68},{42, 69},{43, 70}},   //
+                    {{44, 71},{45, 72},{46, 73}}    //
+                }                                   //
+            }                                       //
+        });                                         //
+        input1->setBackend("cuda");
         std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,3,3,3,2> {
             {
                 {
@@ -84,7 +63,7 @@ TEST_CASE("[gpu/operator] Add(forward)", "[Add][GPU]") {
             }
         });
 
-        std::shared_ptr<Node> myAdd = Add(2);
+        std::shared_ptr<Node> myAdd = Add();
         auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator());
         op->associateInput(0, input1);
         op->associateInput(1, input1);
@@ -103,47 +82,6 @@ TEST_CASE("[gpu/operator] Add(forward)", "[Add][GPU]") {
         delete[] computedOutput;
     }
 
-    SECTION("Three inputs") {
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,3,3,3,2> {
-            {
-                {
-                    {{ 60, 141},{ 63, 144},{ 66, 147}},
-                    {{ 69, 150},{ 72, 153},{ 75, 156}},
-                    {{ 78, 159},{ 81, 162},{ 84, 165}}
-                },
-                {
-                    {{ 87, 168},{ 90, 171},{ 93, 174}},
-                    {{ 96, 177},{ 99, 180},{102, 183}},
-                    {{105, 186},{108, 189},{111, 192}}
-                },
-                {
-                    {{114, 195},{117, 198},{120, 201}},
-                    {{123, 204},{126, 207},{129, 210}},
-                    {{132, 213},{135, 216},{138, 219}}
-                }
-            }
-        });
-
-        std::shared_ptr<Node> myAdd = Add(3);
-        auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator());
-        op->associateInput(0, input1);
-        op->associateInput(1, input1);
-        op->associateInput(2, input1);
-        op->setDataType(DataType::Float32);
-        op->setBackend("cuda");
-        myAdd->forward();
-
-        float* computedOutput   = new float[input1->size()]();
-        cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * expectedOutput->size(), cudaMemcpyDeviceToHost);
-
-        for(int i = 0; i < expectedOutput->size(); i++){
-            const float targetOutput = *(static_cast<float*>(expectedOutput->getImpl()->rawPtr()) + i);
-            REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-6);
-        }
-
-        delete[] computedOutput;
-    }
-
     SECTION("Broadcasting") {
         std::shared_ptr<Tensor> input_0 = std::make_shared<Tensor>(Array4D<float,3,1,3,2> {
         {                                       //
@@ -168,47 +106,80 @@ TEST_CASE("[gpu/operator] Add(forward)", "[Add][GPU]") {
         }                                       //
         });                                     //
 
-        std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,2> {{100,200}});  
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,3,3,3,2> {
-            {                                               //
-                {                                           //
-                    {{ 120, 222},{ 124, 226},{ 128, 230}},  //
-                    {{ 126, 228},{ 130, 232},{ 134, 236}},  //
-                    {{ 132, 234},{ 136, 238},{ 140, 242}}   //
-                },                                          //
-                {                                           //
-                    {{ 126, 228},{ 130, 232},{ 134, 236}},  //
-                    {{ 132, 234},{ 136, 238},{ 140, 242}},  //
-                    {{ 138, 240},{ 142, 244},{ 146, 248}}   //
-                },                                          //
-                {                                           //
-                    {{ 132, 234},{ 136, 238},{140, 242}},   //
-                    {{ 138, 240},{ 142, 244},{146, 248}},   //
-                    {{ 144, 246},{ 148, 250},{152, 254}}    //
-                }                                           //
-            }                                               //
-        });                                                 //
+        std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,2> {{100,200}});
         input_0->setBackend("cuda");
         input_1->setBackend("cuda");
         input_2->setBackend("cuda");
-        std::shared_ptr<Node> myAdd = Add(3);
-        auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator());
-        op->associateInput(0, input_0);
-        op->associateInput(1, input_1);
-        op->associateInput(2, input_2);
-        op->setDataType(DataType::Float32);
-        op->setBackend("cuda");
-        myAdd->forward();
 
-        float* computedOutput   = new float[input1->size()]();
-        cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * expectedOutput->size(), cudaMemcpyDeviceToHost);
+        /// Input0(d0, 1, d2, d3) + Input1(1, d1, d2, d3) = Output(d0, d1, d2, d3)
+        std::shared_ptr<Tensor> expectedOutput0 = std::make_shared<Tensor>(Array4D<float,3,3,3,2> {
+            {                                         //
+                {                                     //
+                    {{ 20, 22},{ 24, 26},{ 28, 30}},  //
+                    {{ 26, 28},{ 30, 32},{ 34, 36}},  //
+                    {{ 32, 34},{ 36, 38},{ 40, 42}}   //
+                },                                    //
+                {                                     //
+                    {{ 26, 28},{ 30, 32},{ 34, 36}},  //
+                    {{ 32, 34},{ 36, 38},{ 40, 42}},  //
+                    {{ 38, 40},{ 42, 44},{ 46, 48}}   //
+                },                                    //
+                {                                     //
+                    {{ 32, 34},{ 36, 38},{40, 42}},   //
+                    {{ 38, 40},{ 42, 44},{46, 48}},   //
+                    {{ 44, 46},{ 48, 50},{52, 54}}    //
+                }                                     //
+            }                                         //
+        });                                           //
 
-        for(int i = 0; i < expectedOutput->size(); i++){
-            const float targetOutput = *(static_cast<float*>(expectedOutput->getImpl()->rawPtr()) + i);
-            REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-6);
+        std::shared_ptr<Node> myAdd0 = Add();
+        auto op0 = std::static_pointer_cast<OperatorTensor>(myAdd0 -> getOperator());
+        op0->associateInput(0, input_0);
+        op0->associateInput(1, input_1);
+        op0->setDataType(DataType::Float32);
+        op0->setBackend("cuda");
+        myAdd0->forward();
+
+        float* computedOutput0   = new float[expectedOutput0->size()]();
+        cudaMemcpy(computedOutput0, op0->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * expectedOutput0->size(), cudaMemcpyDeviceToHost);
+
+        for(int i = 0; i < expectedOutput0->size(); i++){
+            const float targetOutput = *(static_cast<float*>(expectedOutput0->getImpl()->rawPtr()) + i);
+            REQUIRE(fabs(computedOutput0[i] - targetOutput) < 1e-6);
         }
 
-        delete[] computedOutput;
+        delete[] computedOutput0;
+
+        /// Input0(d0, d1, d2, d3) + Input1(d3) = Output(d0, d1, d2, d3)
+        std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<float,3,1,3,2> {
+        {                                             //
+            {                                         //
+                {{100, 201},{102, 203},{104, 205}}    //
+            },                                        //
+            {                                         //
+                {{106, 207},{108, 209},{110, 211}}    //
+            },                                        //
+            {                                         //
+                {{112, 213},{114, 215},{116, 217}}    //
+            }                                         //
+        }                                             //
+        });                                           //
+        std::shared_ptr<Node> myAdd1 = Add();
+        auto op1 = std::static_pointer_cast<OperatorTensor>(myAdd1 -> getOperator());
+        op1->associateInput(0, input_0);
+        op1->associateInput(1, input_2);
+        op1->setDataType(DataType::Float32);
+        op1->setBackend("cuda");
+        myAdd1->forward();
+        float* computedOutput1   = new float[expectedOutput1->size()]();
+        cudaMemcpy(computedOutput1, op1->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * expectedOutput1->size(), cudaMemcpyDeviceToHost);
+
+        for(int i = 0; i < expectedOutput1->size(); i++){
+            const float targetOutput = *(static_cast<float*>(expectedOutput1->getImpl()->rawPtr()) + i);
+            REQUIRE(fabs(computedOutput1[i] - targetOutput) < 1e-6);
+        }
+
+        delete[] computedOutput1;
     }
 
     SECTION("Random Input") {
@@ -231,11 +202,11 @@ TEST_CASE("[gpu/operator] Add(forward)", "[Add][GPU]") {
         for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial)
         {
             // Create Add Operator CUDA
-            std::shared_ptr<Node> myAddCUDA = Add(2, "myaddcuda");
+            std::shared_ptr<Node> myAddCUDA = Add("myaddcuda");
             auto op_cuda = std::static_pointer_cast<OperatorTensor>(myAddCUDA -> getOperator());
 
             // Create Add Operator CPU
-            std::shared_ptr<Node> myAddCPU = Add(2, "myaddcpu");
+            std::shared_ptr<Node> myAddCPU = Add("myaddcpu");
             auto op_cpu = std::static_pointer_cast<OperatorTensor>(myAddCPU -> getOperator());
             op_cpu->setDataType(DataType::Float32);
             op_cpu->setBackend("cpu");
@@ -360,16 +331,12 @@ TEST_CASE("[gpu/operator] Add(backward)", "[Add][GPU]") {
         }                                       //
         });                                     //
 
-        std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,2> {{100,200}});
-
         input_0->setBackend("cuda");
         input_1->setBackend("cuda");
-        input_2->setBackend("cuda");
-        std::shared_ptr<Node> myAdd = Add(3);
+        std::shared_ptr<Node> myAdd = Add();
         auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator());
         op->associateInput(0, input_0);
         op->associateInput(1, input_1);
-        op->associateInput(2, input_2);
         op->setDataType(DataType::Float32);
         op->setBackend("cuda");
         myAdd->forward();
@@ -420,14 +387,11 @@ TEST_CASE("[gpu/operator] Add(backward)", "[Add][GPU]") {
             }                                   //
         }                                       //
         });                                     //
-        std::shared_ptr<Tensor> expectedInput3Grad = std::make_shared<Tensor>(Array1D<float,2> {{729, 756}});
 
         float *computedGrad1Cuda = new float[expectedInput1Grad->size()]();
         cudaMemcpy(computedGrad1Cuda, op->getInput(0)->grad()->getImpl()->rawPtr(), sizeof(float) * expectedInput1Grad->size(), cudaMemcpyDeviceToHost);
         float *computedGrad2Cuda = new float[expectedInput2Grad->size()]();
         cudaMemcpy(computedGrad2Cuda, op->getInput(1)->grad()->getImpl()->rawPtr(), sizeof(float) * expectedInput2Grad->size(), cudaMemcpyDeviceToHost);
-        float *computedGrad3Cuda = new float[expectedInput3Grad->size()]();
-        cudaMemcpy(computedGrad3Cuda, op->getInput(2)->grad()->getImpl()->rawPtr(), sizeof(float) * expectedInput3Grad->size(), cudaMemcpyDeviceToHost);
 
         for(int i = 0; i < expectedInput1Grad->size(); i++){
             const float targetOutput = *(static_cast<float*>(expectedInput1Grad->getImpl()->rawPtr()) + i);
@@ -437,12 +401,7 @@ TEST_CASE("[gpu/operator] Add(backward)", "[Add][GPU]") {
             const float targetOutput = *(static_cast<float*>(expectedInput2Grad->getImpl()->rawPtr()) + i);
             REQUIRE(fabs(computedGrad2Cuda[i] - targetOutput) < 1e-6);
         }
-        for(int i = 0; i < expectedInput3Grad->size(); i++){
-            const float targetOutput = *(static_cast<float*>(expectedInput3Grad->getImpl()->rawPtr()) + i);
-            REQUIRE(fabs(computedGrad3Cuda[i] - targetOutput) < 1e-6);
-        }
 
         delete[] computedGrad1Cuda;
         delete[] computedGrad2Cuda;
-        delete[] computedGrad3Cuda;
 }
\ No newline at end of file