diff --git a/unit_tests/Test_AddImpl.cpp b/unit_tests/Test_AddImpl.cpp index b8129175d88323c896244e531f1dd52a5cbaa19e..dffabe6aab92bdfdd0c79b61ab59e9bc6efb9d94 100644 --- a/unit_tests/Test_AddImpl.cpp +++ b/unit_tests/Test_AddImpl.cpp @@ -22,48 +22,27 @@ using namespace Aidge; TEST_CASE("[gpu/operator] Add(forward)", "[Add][GPU]") { - std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<float,3,3,3,2> { - { // - { // - {{20, 47},{21, 48},{22, 49}}, // - {{23, 50},{24, 51},{25, 52}}, // - {{26, 53},{27, 54},{28, 55}} // - }, // - { // - {{29, 56},{30, 57},{31, 58}}, // - {{32, 59},{33, 60},{34, 61}}, // - {{35, 62},{36, 63},{37, 64}} // - }, // - { // - {{38, 65},{39, 66},{40, 67}}, // - {{41, 68},{42, 69},{43, 70}}, // - {{44, 71},{45, 72},{46, 73}} // - } // - } // - }); // - input1->setBackend("cuda"); - SECTION("One input") { - std::shared_ptr<Node> myAdd = Add(1); - auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator()); - op->associateInput(0, input1); - op->setBackend("cuda"); - op->setDataType(DataType::Float32); - myAdd->forward(); - - float* computedOutput = new float[input1->size()](); - cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * input1->size(), cudaMemcpyDeviceToHost); - float* targetOutput = new float[input1->size()](); - cudaMemcpy(targetOutput, input1->getImpl()->rawPtr(), sizeof(float) * input1->size(), cudaMemcpyDeviceToHost); - - for(int i = 0; i < input1->size(); i++){ - REQUIRE(fabs(computedOutput[i] - targetOutput[i]) < 1e-6); - } - - delete[] computedOutput; - delete[] targetOutput; - } - - SECTION("Two inputs") { + SECTION("Same input") { + std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<float,3,3,3,2> { + { // + { // + {{20, 47},{21, 48},{22, 49}}, // + {{23, 50},{24, 51},{25, 52}}, // + {{26, 53},{27, 54},{28, 55}} // + }, // + { // + {{29, 56},{30, 57},{31, 58}}, // + {{32, 59},{33, 60},{34, 61}}, // + {{35, 62},{36, 63},{37, 64}} // + }, // + { // + {{38, 65},{39, 66},{40, 67}}, // + {{41, 68},{42, 69},{43, 70}}, // + {{44, 71},{45, 72},{46, 73}} // + } // + } // + }); // + input1->setBackend("cuda"); std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,3,3,3,2> { { { @@ -84,7 +63,7 @@ TEST_CASE("[gpu/operator] Add(forward)", "[Add][GPU]") { } }); - std::shared_ptr<Node> myAdd = Add(2); + std::shared_ptr<Node> myAdd = Add(); auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator()); op->associateInput(0, input1); op->associateInput(1, input1); @@ -103,47 +82,6 @@ TEST_CASE("[gpu/operator] Add(forward)", "[Add][GPU]") { delete[] computedOutput; } - SECTION("Three inputs") { - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,3,3,3,2> { - { - { - {{ 60, 141},{ 63, 144},{ 66, 147}}, - {{ 69, 150},{ 72, 153},{ 75, 156}}, - {{ 78, 159},{ 81, 162},{ 84, 165}} - }, - { - {{ 87, 168},{ 90, 171},{ 93, 174}}, - {{ 96, 177},{ 99, 180},{102, 183}}, - {{105, 186},{108, 189},{111, 192}} - }, - { - {{114, 195},{117, 198},{120, 201}}, - {{123, 204},{126, 207},{129, 210}}, - {{132, 213},{135, 216},{138, 219}} - } - } - }); - - std::shared_ptr<Node> myAdd = Add(3); - auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator()); - op->associateInput(0, input1); - op->associateInput(1, input1); - op->associateInput(2, input1); - op->setDataType(DataType::Float32); - op->setBackend("cuda"); - myAdd->forward(); - - float* computedOutput = new float[input1->size()](); - cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * expectedOutput->size(), cudaMemcpyDeviceToHost); - - for(int i = 0; i < expectedOutput->size(); i++){ - const float targetOutput = *(static_cast<float*>(expectedOutput->getImpl()->rawPtr()) + i); - REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-6); - } - - delete[] computedOutput; - } - SECTION("Broadcasting") { std::shared_ptr<Tensor> input_0 = std::make_shared<Tensor>(Array4D<float,3,1,3,2> { { // @@ -168,47 +106,80 @@ TEST_CASE("[gpu/operator] Add(forward)", "[Add][GPU]") { } // }); // - std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,2> {{100,200}}); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,3,3,3,2> { - { // - { // - {{ 120, 222},{ 124, 226},{ 128, 230}}, // - {{ 126, 228},{ 130, 232},{ 134, 236}}, // - {{ 132, 234},{ 136, 238},{ 140, 242}} // - }, // - { // - {{ 126, 228},{ 130, 232},{ 134, 236}}, // - {{ 132, 234},{ 136, 238},{ 140, 242}}, // - {{ 138, 240},{ 142, 244},{ 146, 248}} // - }, // - { // - {{ 132, 234},{ 136, 238},{140, 242}}, // - {{ 138, 240},{ 142, 244},{146, 248}}, // - {{ 144, 246},{ 148, 250},{152, 254}} // - } // - } // - }); // + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,2> {{100,200}}); input_0->setBackend("cuda"); input_1->setBackend("cuda"); input_2->setBackend("cuda"); - std::shared_ptr<Node> myAdd = Add(3); - auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator()); - op->associateInput(0, input_0); - op->associateInput(1, input_1); - op->associateInput(2, input_2); - op->setDataType(DataType::Float32); - op->setBackend("cuda"); - myAdd->forward(); - float* computedOutput = new float[input1->size()](); - cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * expectedOutput->size(), cudaMemcpyDeviceToHost); + /// Input0(d0, 1, d2, d3) + Input1(1, d1, d2, d3) = Output(d0, d1, d2, d3) + std::shared_ptr<Tensor> expectedOutput0 = std::make_shared<Tensor>(Array4D<float,3,3,3,2> { + { // + { // + {{ 20, 22},{ 24, 26},{ 28, 30}}, // + {{ 26, 28},{ 30, 32},{ 34, 36}}, // + {{ 32, 34},{ 36, 38},{ 40, 42}} // + }, // + { // + {{ 26, 28},{ 30, 32},{ 34, 36}}, // + {{ 32, 34},{ 36, 38},{ 40, 42}}, // + {{ 38, 40},{ 42, 44},{ 46, 48}} // + }, // + { // + {{ 32, 34},{ 36, 38},{40, 42}}, // + {{ 38, 40},{ 42, 44},{46, 48}}, // + {{ 44, 46},{ 48, 50},{52, 54}} // + } // + } // + }); // - for(int i = 0; i < expectedOutput->size(); i++){ - const float targetOutput = *(static_cast<float*>(expectedOutput->getImpl()->rawPtr()) + i); - REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-6); + std::shared_ptr<Node> myAdd0 = Add(); + auto op0 = std::static_pointer_cast<OperatorTensor>(myAdd0 -> getOperator()); + op0->associateInput(0, input_0); + op0->associateInput(1, input_1); + op0->setDataType(DataType::Float32); + op0->setBackend("cuda"); + myAdd0->forward(); + + float* computedOutput0 = new float[expectedOutput0->size()](); + cudaMemcpy(computedOutput0, op0->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * expectedOutput0->size(), cudaMemcpyDeviceToHost); + + for(int i = 0; i < expectedOutput0->size(); i++){ + const float targetOutput = *(static_cast<float*>(expectedOutput0->getImpl()->rawPtr()) + i); + REQUIRE(fabs(computedOutput0[i] - targetOutput) < 1e-6); } - delete[] computedOutput; + delete[] computedOutput0; + + /// Input0(d0, d1, d2, d3) + Input1(d3) = Output(d0, d1, d2, d3) + std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<float,3,1,3,2> { + { // + { // + {{100, 201},{102, 203},{104, 205}} // + }, // + { // + {{106, 207},{108, 209},{110, 211}} // + }, // + { // + {{112, 213},{114, 215},{116, 217}} // + } // + } // + }); // + std::shared_ptr<Node> myAdd1 = Add(); + auto op1 = std::static_pointer_cast<OperatorTensor>(myAdd1 -> getOperator()); + op1->associateInput(0, input_0); + op1->associateInput(1, input_2); + op1->setDataType(DataType::Float32); + op1->setBackend("cuda"); + myAdd1->forward(); + float* computedOutput1 = new float[expectedOutput1->size()](); + cudaMemcpy(computedOutput1, op1->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * expectedOutput1->size(), cudaMemcpyDeviceToHost); + + for(int i = 0; i < expectedOutput1->size(); i++){ + const float targetOutput = *(static_cast<float*>(expectedOutput1->getImpl()->rawPtr()) + i); + REQUIRE(fabs(computedOutput1[i] - targetOutput) < 1e-6); + } + + delete[] computedOutput1; } SECTION("Random Input") { @@ -231,11 +202,11 @@ TEST_CASE("[gpu/operator] Add(forward)", "[Add][GPU]") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { // Create Add Operator CUDA - std::shared_ptr<Node> myAddCUDA = Add(2, "myaddcuda"); + std::shared_ptr<Node> myAddCUDA = Add("myaddcuda"); auto op_cuda = std::static_pointer_cast<OperatorTensor>(myAddCUDA -> getOperator()); // Create Add Operator CPU - std::shared_ptr<Node> myAddCPU = Add(2, "myaddcpu"); + std::shared_ptr<Node> myAddCPU = Add("myaddcpu"); auto op_cpu = std::static_pointer_cast<OperatorTensor>(myAddCPU -> getOperator()); op_cpu->setDataType(DataType::Float32); op_cpu->setBackend("cpu"); @@ -360,16 +331,12 @@ TEST_CASE("[gpu/operator] Add(backward)", "[Add][GPU]") { } // }); // - std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,2> {{100,200}}); - input_0->setBackend("cuda"); input_1->setBackend("cuda"); - input_2->setBackend("cuda"); - std::shared_ptr<Node> myAdd = Add(3); + std::shared_ptr<Node> myAdd = Add(); auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator()); op->associateInput(0, input_0); op->associateInput(1, input_1); - op->associateInput(2, input_2); op->setDataType(DataType::Float32); op->setBackend("cuda"); myAdd->forward(); @@ -420,14 +387,11 @@ TEST_CASE("[gpu/operator] Add(backward)", "[Add][GPU]") { } // } // }); // - std::shared_ptr<Tensor> expectedInput3Grad = std::make_shared<Tensor>(Array1D<float,2> {{729, 756}}); float *computedGrad1Cuda = new float[expectedInput1Grad->size()](); cudaMemcpy(computedGrad1Cuda, op->getInput(0)->grad()->getImpl()->rawPtr(), sizeof(float) * expectedInput1Grad->size(), cudaMemcpyDeviceToHost); float *computedGrad2Cuda = new float[expectedInput2Grad->size()](); cudaMemcpy(computedGrad2Cuda, op->getInput(1)->grad()->getImpl()->rawPtr(), sizeof(float) * expectedInput2Grad->size(), cudaMemcpyDeviceToHost); - float *computedGrad3Cuda = new float[expectedInput3Grad->size()](); - cudaMemcpy(computedGrad3Cuda, op->getInput(2)->grad()->getImpl()->rawPtr(), sizeof(float) * expectedInput3Grad->size(), cudaMemcpyDeviceToHost); for(int i = 0; i < expectedInput1Grad->size(); i++){ const float targetOutput = *(static_cast<float*>(expectedInput1Grad->getImpl()->rawPtr()) + i); @@ -437,12 +401,7 @@ TEST_CASE("[gpu/operator] Add(backward)", "[Add][GPU]") { const float targetOutput = *(static_cast<float*>(expectedInput2Grad->getImpl()->rawPtr()) + i); REQUIRE(fabs(computedGrad2Cuda[i] - targetOutput) < 1e-6); } - for(int i = 0; i < expectedInput3Grad->size(); i++){ - const float targetOutput = *(static_cast<float*>(expectedInput3Grad->getImpl()->rawPtr()) + i); - REQUIRE(fabs(computedGrad3Cuda[i] - targetOutput) < 1e-6); - } delete[] computedGrad1Cuda; delete[] computedGrad2Cuda; - delete[] computedGrad3Cuda; } \ No newline at end of file