diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index 0fdaa166c6c2499bc69186e3bc36cad969ee75e6..87b9d2c900f8c5718020cc916d5bf1c35f967d31 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -198,8 +198,8 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat graphView->add(addNodeProd); //debug code: - // Hard coded visualisation and fix of cast with incorrect type - Log::debug("one======"); + // Hard coded visualisation and fix of cast with incorrect type===== + Log::debug("debug======"); for (const auto nde : quantizeMicro->getNodes()){ if(nde->type() == "QuantizeLinear"){ Log::debug("{} ==================",nde->name()); @@ -212,10 +212,9 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat Log::debug("newdtype {}", std::static_pointer_cast<OperatorTensor>(nde2->getOperator())->getOutput(0)->dataType()); } } - } } - //end debug code + //end debug code======== std::shared_ptr<GraphView> replacedGraph = std::make_shared<GraphView>(); replacedGraph->add(node); @@ -238,7 +237,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat if(wholeQlinearMatches.size()<1) Log::warn("No quantized convolutions found"); - Log::info("found : {} ",wholeQlinearMatches.size()); for (const auto match : wholeQlinearMatches) { bool hasBias = false; @@ -267,11 +265,16 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat if (QoperatorFormat && hasBias){ //bias and bias scaling factor have to be modified so it corresponds to ONNX's bias scaling factor formula: biasSF = inputSF * weightSF - //TEMP: placeholder while quantizer node is not present at the input of convolution node - const std::shared_ptr<Tensor> inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1}); - const std::shared_ptr<Tensor> weightSFTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getParent(0)->getParent(1)->getOperator())->getOutput(0); - inputSFTensor->setDataType(weightSFTensor->dataType()); + + std::shared_ptr<Tensor> inputSFTensor; + if(node->getParent(0)->getParent(0)->type() == "QuantizeLinear"){ + inputSFTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(0)->getParent(0)->getParent(1)->getOperator())->getOutput(0); + } + else{ + inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1}); + inputSFTensor->setDataType(weightSFTensor->dataType()); + } const std::shared_ptr<Node> biasProd = node->getParent(2)->getParent(0)->getParent(0); const std::shared_ptr<Node> biasSFProd = node->getParent(2)->getParent(0)->getParent(1); @@ -348,7 +351,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat const std::shared_ptr<Node> quantizeLinearB = qlinearMetaOp->getParent(8); const auto quantizeNodes = std::static_pointer_cast<MetaOperator_Op>(quantizeLinearB->getOperator())->getMicroGraph()->getNodes(); - //TODO verify that this change does not impact calculations + //TODO: correct overflow and differences when quantization is performed in Int32 and uint8 (may need to fold in int32 or float and skip this quantizelinear node entirely) for (const auto node : quantizeNodes){ const std::string nodeOPtype= node->type(); if(nodeOPtype == "Cast" ){ @@ -379,8 +382,10 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat graphView->addChild(dequantGraph,std::pair<NodePtr, IOIndex_t>(node, IOIndex_t(0)),std::pair<NodePtr, IOIndex_t>(dequantMetaOp, IOIndex_t(0))); } + graphView->setBackend("cpu");//TODO get dynamically + //TODO: Bias must be always folded, it may be interesting to just fold when possible instead of giving the choice if(foldWeights){ //Fold quantize linear of weights and bias, leaving the quantized producer const std::set<SinglePassGraphMatching::MatchingResult> foldQuantize = SinglePassGraphMatching(graphView).match(