qlinearconv biasSF correctly taking into account inputSF

3c90c384 · Lucas Lopez · 3233a331 · 3c90c384
Commit 3c90c384 authored 3 months ago by Lucas Lopez
--- a/src/recipes/ONNXRecipes.cpp
+++ b/src/recipes/ONNXRecipes.cpp
@@ -198,8 +198,8 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat
            graphView->add(addNodeProd);

            //debug code:
-            // Hard coded visualisation and fix of cast with incorrect type
-            Log::debug("one======");
+            // Hard coded visualisation and fix of cast with incorrect type=====
+            Log::debug("debug======");
            for (const auto nde : quantizeMicro->getNodes()){
                if(nde->type() == "QuantizeLinear"){
                    Log::debug("{} ==================",nde->name());
@@ -212,10 +212,9 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat
                            Log::debug("newdtype {}", std::static_pointer_cast<OperatorTensor>(nde2->getOperator())->getOutput(0)->dataType());
                        }
                    }
-                    
                }
            }
-            //end debug code
+            //end debug code========
            
            std::shared_ptr<GraphView> replacedGraph = std::make_shared<GraphView>();
            replacedGraph->add(node);
@@ -238,7 +237,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat

    if(wholeQlinearMatches.size()<1) Log::warn("No quantized convolutions found");

-    Log::info("found : {} ",wholeQlinearMatches.size());
    for (const auto match : wholeQlinearMatches) {
        bool hasBias = false;

@@ -267,11 +265,16 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat
                if (QoperatorFormat && hasBias){
                    //bias and bias scaling factor have to be modified so it corresponds to ONNX's bias scaling factor formula: biasSF = inputSF * weightSF

-                    //TEMP: placeholder while quantizer node is not present at the input of convolution node
-                    const std::shared_ptr<Tensor> inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1});
-
                    const std::shared_ptr<Tensor> weightSFTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getParent(0)->getParent(1)->getOperator())->getOutput(0);
-                    inputSFTensor->setDataType(weightSFTensor->dataType());
+
+                    std::shared_ptr<Tensor> inputSFTensor;
+                    if(node->getParent(0)->getParent(0)->type() == "QuantizeLinear"){
+                        inputSFTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(0)->getParent(0)->getParent(1)->getOperator())->getOutput(0);
+                    }
+                    else{
+                        inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1});
+                        inputSFTensor->setDataType(weightSFTensor->dataType());
+                    }

                    const std::shared_ptr<Node> biasProd = node->getParent(2)->getParent(0)->getParent(0);
                    const std::shared_ptr<Node> biasSFProd = node->getParent(2)->getParent(0)->getParent(1);
@@ -348,7 +351,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat
                const std::shared_ptr<Node> quantizeLinearB = qlinearMetaOp->getParent(8);
                const auto quantizeNodes = std::static_pointer_cast<MetaOperator_Op>(quantizeLinearB->getOperator())->getMicroGraph()->getNodes();

-                //TODO verify that this change does not impact calculations
+                //TODO: correct overflow and differences when quantization is performed in Int32 and uint8 (may need to fold in int32 or float and skip this quantizelinear node entirely)
                for (const auto node : quantizeNodes){
                    const std::string nodeOPtype= node->type();
                    if(nodeOPtype == "Cast" ){
@@ -379,8 +382,10 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat

        graphView->addChild(dequantGraph,std::pair<NodePtr, IOIndex_t>(node, IOIndex_t(0)),std::pair<NodePtr, IOIndex_t>(dequantMetaOp, IOIndex_t(0)));
    }
+
    graphView->setBackend("cpu");//TODO get dynamically

+    //TODO: Bias must be always folded, it may be interesting to just fold when possible instead of giving the choice
    if(foldWeights){
        //Fold quantize linear of weights and bias, leaving the quantized producer
        const std::set<SinglePassGraphMatching::MatchingResult> foldQuantize = SinglePassGraphMatching(graphView).match(