diff --git a/unit_tests/operator/Test_MetaOperator.cpp b/unit_tests/operator/Test_MetaOperator.cpp index 564d4a41692c033540fa74be7ddf8571f039815c..5fc68acf157400728740eb10dc6c1ae962164df7 100644 --- a/unit_tests/operator/Test_MetaOperator.cpp +++ b/unit_tests/operator/Test_MetaOperator.cpp @@ -191,9 +191,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { std::shared_ptr<Node> myPaddedConv = PaddedConv(3, 4, {3, 3}, "myPaddedConv", {1, 1}, {1, 1, 1, 1}); } - SECTION("LSTM(forward)") { - auto myLSTM = LSTM(32, 64, 16, true, "ltsm"); + auto pop = Pop(); + auto myLSTM = LSTM(32, 64, 0, true, "ltsm"); auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); auto microGraph = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph(); @@ -204,28 +204,55 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( - Array1D<float, 32>{{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}}); + Array2D<float, 16, 32>{}); std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 1, 64>{{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}}); + Array2D<float, 1, 64>{}); + std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( + Array2D<float, 64, 32>{}); + std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( + Array2D<float, 64, 64>{}); - op->associateInput(0, myInput); + pop->addChild(myLSTM, 0, 0); + pop->getOperator()->associateInput(0, myInput); op->associateInput(17, myInit); op->associateInput(18, myInit); - op->computeOutputDims(); + // Weights X + myLSTM->input(1).first->getOperator()->setOutput(0, myInitW); + myLSTM->input(2).first->getOperator()->setOutput(0, myInitW); + myLSTM->input(3).first->getOperator()->setOutput(0, myInitW); + myLSTM->input(4).first->getOperator()->setOutput(0, myInitW); + // Weights H + myLSTM->input(5).first->getOperator()->setOutput(0, myInitR); + myLSTM->input(6).first->getOperator()->setOutput(0, myInitR); + myLSTM->input(7).first->getOperator()->setOutput(0, myInitR); + myLSTM->input(8).first->getOperator()->setOutput(0, myInitR); + + auto g = getConnectedGraphView(myLSTM); + g->setDataType(DataType::Float32); + g->setBackend("cpu"); + + auto scheduler = SequentialScheduler(g); + scheduler.forward(true, true); + + g->save("lstm_outside_dims", true, true); + microGraph->save("lstm_dims", true, true); REQUIRE(op->outputDimsForwarded()); - op->setDataType(DataType::Float32); - op->setBackend("cpu"); - - op->forward(); auto microGraphScheduler = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraphScheduler(); microGraphScheduler->saveSchedulingDiagram("lstm_scheduling"); - } + REQUIRE(op->getNbConsumedData(0) == 512); + REQUIRE(op->getNbConsumedData(1) == 32768); + REQUIRE(op->getNbProducedData(0) == 1088); + REQUIRE(op->getNbProducedData(1) == 1088); + REQUIRE(microGraphScheduler->getStaticScheduling(0).size() == 26); + REQUIRE(microGraphScheduler->getStaticScheduling(1).size() == 24); + REQUIRE(microGraphScheduler->getStaticScheduling(15).size() == 24); + } SECTION("LSTM(forward_values)") { - auto myLSTM = LSTM(2, 3, 1, true, "ltsm"); + auto myLSTM = LSTM(2, 3, 0, true, "ltsm"); auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); auto microGraph = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph(); @@ -248,30 +275,32 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { op->associateInput(17, myInit); op->associateInput(18, myInit); - op->computeOutputDims(); - REQUIRE(op->outputDimsForwarded()); - microGraph->save("lstm_values_dims", false, true); - // Weights X - op->associateInput(1, myInitW); - op->associateInput(2, myInitW); - op->associateInput(3, myInitW); - op->associateInput(4, myInitW); + myLSTM->input(1).first->getOperator()->setOutput(0, myInitW); + myLSTM->input(2).first->getOperator()->setOutput(0, myInitW); + myLSTM->input(3).first->getOperator()->setOutput(0, myInitW); + myLSTM->input(4).first->getOperator()->setOutput(0, myInitW); // Weights H - op->associateInput(5, myInitR); - op->associateInput(6, myInitR); - op->associateInput(7, myInitR); - op->associateInput(8, myInitR); + myLSTM->input(5).first->getOperator()->setOutput(0, myInitR); + myLSTM->input(6).first->getOperator()->setOutput(0, myInitR); + myLSTM->input(7).first->getOperator()->setOutput(0, myInitR); + myLSTM->input(8).first->getOperator()->setOutput(0, myInitR); - op->setDataType(DataType::Float32); - op->setBackend("cpu"); + auto g = getConnectedGraphView(myLSTM); + g->setDataType(DataType::Float32); + g->setBackend("cpu"); + + auto scheduler = SequentialScheduler(g); + scheduler.forward(); + + microGraph->save("lstm_values_dims", false, true); std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( Array2D<float, 3, 3>{{{0.0952412, 0.0952412, 0.0952412}, {0.25606447, 0.25606447, 0.25606447}, {0.40323776, 0.40323776, 0.40323776}}}); - op->forward(); + auto microGraphScheduler = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraphScheduler(); microGraphScheduler->saveSchedulingDiagram("lstm_values_scheduling"); @@ -280,23 +309,73 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(approxEq<float>(*(op->getOutput(1)), *myHiddenState)); } - SECTION("LSTM(forward_values_seq)") { + auto pop = Pop(); + auto myLSTM = LSTM(2, 3, 2, true, "ltsm"); + auto myGraph = Sequential({pop, myLSTM}); + auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); + + REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); + REQUIRE(myLSTM->nbData() == 1); + REQUIRE(myLSTM->nbOutputs() == 2); + + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); + std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( + Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); + std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( + Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}}); + std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( + Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); + + pop->getOperator()->associateInput(0, myInput); + op->associateInput(17, myInit); + op->associateInput(18, myInit); + + // Weights X + myLSTM->input(1).first->getOperator()->setOutput(0, myInitW); + myLSTM->input(2).first->getOperator()->setOutput(0, myInitW); + myLSTM->input(3).first->getOperator()->setOutput(0, myInitW); + myLSTM->input(4).first->getOperator()->setOutput(0, myInitW); + // Weights H + myLSTM->input(5).first->getOperator()->setOutput(0, myInitR); + myLSTM->input(6).first->getOperator()->setOutput(0, myInitR); + myLSTM->input(7).first->getOperator()->setOutput(0, myInitR); + myLSTM->input(8).first->getOperator()->setOutput(0, myInitR); + + auto g = getConnectedGraphView(myLSTM); + g->setDataType(DataType::Float32); + g->setBackend("cpu"); + + g->save("lstm_seq", true, true); + + auto scheduler = SequentialScheduler(g); + scheduler.forward(true, true); + scheduler.saveSchedulingDiagram("lstm_seq_schedule"); + + std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( + Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, + {0.49801484, 0.49801484, 0.49801484}, + {0.67162132, 0.67162132, 0.67162132}}}); + + myGraph->save("lstm_seq_mygraph", true, true); + + op->getOutput(1)->print(); + myHiddenState->print(); + + REQUIRE(approxEq<float>(*(op->getOutput(1)), *myHiddenState)); + } + SECTION("LSTM(forward_values_seq_flatten)") { auto pop = Pop(); auto myLSTM = LSTM(2, 3, 2, true, "ltsm"); auto op = std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); - // NOTE: LSTM really need to be flatten in the graph before execution. - // Here, we actually don't use the meta-op as a closed black-box, - // because its scheduling cannot be run independently of the input. - // Since we use the Pop operator to generate sequential inputs, running - // the meta-op internal scheduler would not work because it would not - // update its input! + // Here we test LSTM as it is was flatten in the graph. // We just borrow its micro-graph into our larger myGraph graph. auto myGraph = std::make_shared<GraphView>(); - myGraph->add(pop); - myGraph->add(op->getMicroGraph()); pop->addChild(op->getMicroGraph()->getOrderedInputs()[0].first, 0, 0); + myGraph->add(op->getMicroGraph()); + myGraph->add(pop); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); REQUIRE(myLSTM->nbData() == 1); @@ -316,18 +395,22 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { op->associateInput(18, myInit); // Weights X - op->associateInput(1, myInitW); - op->associateInput(2, myInitW); - op->associateInput(3, myInitW); - op->associateInput(4, myInitW); + auto prodX = Producer(myInitW); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, 0, 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, 0, 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, 0, 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, 0, 1); // Weights H - op->associateInput(5, myInitR); - op->associateInput(6, myInitR); - op->associateInput(7, myInitR); - op->associateInput(8, myInitR); + auto prodH = Producer(myInitR); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, 0, 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, 0, 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, 0, 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, 0, 1); + myGraph->add({prodX, prodH}); myGraph->setDataType(DataType::Float32); myGraph->setBackend("cpu"); + myGraph->save("lstm_seq_flatten", true, true); std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, @@ -335,10 +418,8 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { {0.67162132, 0.67162132, 0.67162132}}}); auto scheduler = SequentialScheduler(myGraph); - scheduler.forward(); - scheduler.saveSchedulingDiagram("lstm_seq_schedule"); - - myGraph->save("lstm_seq", true, true); + scheduler.forward(true, true); + scheduler.saveSchedulingDiagram("lstm_seq_flatten_schedule"); op->getOutput(1)->print(); myHiddenState->print(); diff --git a/unit_tests/operator/Test_PaddedConv.cpp b/unit_tests/operator/Test_PaddedConv.cpp index 3baf0a7aa0f366a8f0dd4e3e9df6700a5cdb0cea..03a592e52b7d057065353a7d99c088d9831c67c7 100644 --- a/unit_tests/operator/Test_PaddedConv.cpp +++ b/unit_tests/operator/Test_PaddedConv.cpp @@ -150,12 +150,15 @@ TEST_CASE("[cpu/operator] PaddedConv(forward)", "[PaddedConv][CPU]") { }); myConv->getOperator()->associateInput(0,myInput); - myConv->getOperator()->associateInput(1,myWeights); - myConv->getOperator()->associateInput(2,myBias); - myConv->getOperator()->setDataType(DataType::Int32); - myConv->getOperator()->setBackend("cpu"); - op->computeOutputDims(); - myConv->forward(); + myConv->input(1).first->getOperator()->setOutput(0, myWeights); + myConv->input(2).first->getOperator()->setOutput(0, myBias); + + auto g = getConnectedGraphView(myConv); + g->setDataType(DataType::Int32); + g->setBackend("cpu"); + + auto scheduler = SequentialScheduler(g); + scheduler.forward(); REQUIRE(*(op->getOutput(0)) == *myOutput); } @@ -309,12 +312,15 @@ TEST_CASE("[cpu/operator] PaddedConv(forward)", "[PaddedConv][CPU]") { }); myConv->getOperator()->associateInput(0,myInput); - myConv->getOperator()->associateInput(1,myWeights); - myConv->getOperator()->associateInput(2,myBias); - myConv->getOperator()->setDataType(DataType::Int32); - myConv->getOperator()->setBackend("cpu"); - op->computeOutputDims(); - myConv->forward(); + myConv->input(1).first->getOperator()->setOutput(0, myWeights); + myConv->input(2).first->getOperator()->setOutput(0, myBias); + + auto g = getConnectedGraphView(myConv); + g->setDataType(DataType::Int32); + g->setBackend("cpu"); + + auto scheduler = SequentialScheduler(g); + scheduler.forward(); REQUIRE(*(op->getOutput(0)) == *myOutput); } diff --git a/unit_tests/recipies/Test_FuseBatchNorm.cpp b/unit_tests/recipies/Test_FuseBatchNorm.cpp index c4b3bf18a5f5b68d0e41b9cd40966790a0cf7ff6..5386a23177d589640849bf864985b463645622db 100644 --- a/unit_tests/recipies/Test_FuseBatchNorm.cpp +++ b/unit_tests/recipies/Test_FuseBatchNorm.cpp @@ -86,14 +86,11 @@ TEST_CASE("[core/recipies] FuseBatchNorm", "[recipies][FuseBatchNorm]") { myBNOp -> setInput(4, std::make_shared<Tensor>(Array1D<float,3> {{0.4470, 0.3064, 0.7061}})); auto g1 = Sequential({ + myProd, myConv, myBN }); g1 -> setName("fuseBNGraph"); - myProd -> addChild(myConv); // set graph input - - myProdOp -> setDataType(DataType::Float32); - myProdOp -> setBackend("cpu"); g1 -> compile("cpu", DataType::Float32); auto s = SequentialScheduler(g1); @@ -107,7 +104,7 @@ TEST_CASE("[core/recipies] FuseBatchNorm", "[recipies][FuseBatchNorm]") { std::shared_ptr<Tensor> res2 = std::make_shared<Tensor>(*(myConvOp -> getOutput(0))); REQUIRE(g1 -> outputNodes().size() == 1); - REQUIRE(g1 -> inputNodes().size() == 1); + REQUIRE(g1 -> inputNodes().size() == 0); bool eq = true; for (std::size_t i = 0; i < res1->size(); ++i) { eq &= std::abs(res1->get<float>(i) - res2->get<float>(i)) < 1.0e-06; diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp index 2440fa8264aecdbc718f24a44fb6af782cdfa053..8779933fc7fc7c07305f1018f9469895026f05e4 100644 --- a/unit_tests/scheduler/Test_Scheduler.cpp +++ b/unit_tests/scheduler/Test_Scheduler.cpp @@ -218,14 +218,15 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") { auto add2 = Add(2, "add2"); auto bias = Producer(biasTensor, "bias"); auto init = Producer(initTensor, "init"); + auto input = Producer(in, "input"); std::shared_ptr<GraphView> g = Sequential({add1, mem, add2}); init->addChild(mem, 0, 1); mem->addChild(add1, 1, 1); bias->addChild(add2, 0, 1); - add1->getOperator()->setInput(0, in); + input->addChild(add1, 0, 0); // Update GraphView inputs/outputs following previous connections: - g->add({mem, add1, add2, init, bias}); + g->add({mem, add1, add2, init, bias, input}); g->setBackend("cpu"); g->setDataType(Aidge::DataType::Int32);