From 3c90c3841fdd9dec94120a24b8d02490386f06a2 Mon Sep 17 00:00:00 2001
From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr>
Date: Fri, 21 Feb 2025 15:27:26 +0000
Subject: [PATCH] qlinearconv biasSF correctly taking into account inputSF

---
 src/recipes/ONNXRecipes.cpp | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp
index 0fdaa16..87b9d2c 100644
--- a/src/recipes/ONNXRecipes.cpp
+++ b/src/recipes/ONNXRecipes.cpp
@@ -198,8 +198,8 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat
             graphView->add(addNodeProd);
 
             //debug code:
-            // Hard coded visualisation and fix of cast with incorrect type
-            Log::debug("one======");
+            // Hard coded visualisation and fix of cast with incorrect type=====
+            Log::debug("debug======");
             for (const auto nde : quantizeMicro->getNodes()){
                 if(nde->type() == "QuantizeLinear"){
                     Log::debug("{} ==================",nde->name());
@@ -212,10 +212,9 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat
                             Log::debug("newdtype {}", std::static_pointer_cast<OperatorTensor>(nde2->getOperator())->getOutput(0)->dataType());
                         }
                     }
-                    
                 }
             }
-            //end debug code
+            //end debug code========
             
             std::shared_ptr<GraphView> replacedGraph = std::make_shared<GraphView>();
             replacedGraph->add(node);
@@ -238,7 +237,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat
 
     if(wholeQlinearMatches.size()<1) Log::warn("No quantized convolutions found");
 
-    Log::info("found : {} ",wholeQlinearMatches.size());
     for (const auto match : wholeQlinearMatches) {
         bool hasBias = false;
 
@@ -267,11 +265,16 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat
                 if (QoperatorFormat && hasBias){
                     //bias and bias scaling factor have to be modified so it corresponds to ONNX's bias scaling factor formula: biasSF = inputSF * weightSF
 
-                    //TEMP: placeholder while quantizer node is not present at the input of convolution node
-                    const std::shared_ptr<Tensor> inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1});
-
                     const std::shared_ptr<Tensor> weightSFTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getParent(0)->getParent(1)->getOperator())->getOutput(0);
-                    inputSFTensor->setDataType(weightSFTensor->dataType());
+
+                    std::shared_ptr<Tensor> inputSFTensor;
+                    if(node->getParent(0)->getParent(0)->type() == "QuantizeLinear"){
+                        inputSFTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(0)->getParent(0)->getParent(1)->getOperator())->getOutput(0);
+                    }
+                    else{
+                        inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1});
+                        inputSFTensor->setDataType(weightSFTensor->dataType());
+                    }
 
                     const std::shared_ptr<Node> biasProd = node->getParent(2)->getParent(0)->getParent(0);
                     const std::shared_ptr<Node> biasSFProd = node->getParent(2)->getParent(0)->getParent(1);
@@ -348,7 +351,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat
                 const std::shared_ptr<Node> quantizeLinearB = qlinearMetaOp->getParent(8);
                 const auto quantizeNodes = std::static_pointer_cast<MetaOperator_Op>(quantizeLinearB->getOperator())->getMicroGraph()->getNodes();
 
-                //TODO verify that this change does not impact calculations
+                //TODO: correct overflow and differences when quantization is performed in Int32 and uint8 (may need to fold in int32 or float and skip this quantizelinear node entirely)
                 for (const auto node : quantizeNodes){
                     const std::string nodeOPtype= node->type();
                     if(nodeOPtype == "Cast" ){
@@ -379,8 +382,10 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat
 
         graphView->addChild(dequantGraph,std::pair<NodePtr, IOIndex_t>(node, IOIndex_t(0)),std::pair<NodePtr, IOIndex_t>(dequantMetaOp, IOIndex_t(0)));
     }
+
     graphView->setBackend("cpu");//TODO get dynamically
 
+    //TODO: Bias must be always folded, it may be interesting to just fold when possible instead of giving the choice
     if(foldWeights){
         //Fold quantize linear of weights and bias, leaving the quantized producer
         const std::set<SinglePassGraphMatching::MatchingResult> foldQuantize = SinglePassGraphMatching(graphView).match(
-- 
GitLab