ptq rework (single-shift related functions)

dcb0db5e · Benjamin Halimi · f8b03835 · dcb0db5e · dcb0db5e
Commit dcb0db5e authored 1 month ago by Benjamin Halimi
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -398,17 +398,12 @@ static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> node)
    return currNode;
 }

-void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double ratio, std::shared_ptr<GraphView> graphView)
-{
-    Log::warn(" INSERT SCALING : DUMMY ! ");
-}
-
 void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, std::shared_ptr<GraphView> graphView)
 {
    std::string scalingNodeName = makeUniqueName(producerNode->name() + "_ProducerScaling", graphView);
    std::shared_ptr<Node> scalingNode = BaseQuantizer(1.0, scalingNodeName);;
    addAttr(scalingNode, "isProducerScaling");
-    // XXX XXX XXX addAttr(scalingNode, "isScaling");
+    // XXX XXX XXX addAttr(scalingNode, "isScaling") ? NO !!!

    scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
    scalingNode->getOperator()->setBackend(determineBackend(producerNode)); // XXX use the producer parent instead ???
@@ -1095,7 +1090,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
                    rescaling /= inputIsUnsigned  ? unsignedMax : signedMax;
                    rescaling *= outputIsUnsigned ? unsignedMax : signedMax;

-                    // XXX XXX XXX
+                    // XXX OK
                    //double currScalingFactor = getScalingFactor(quantizerNode);
                    //updateScalingFactor(quantizerNode, currScalingFactor * rescaling);
                    multiplyScalingFactor(node, rescaling);
@@ -1111,7 +1106,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
    }
 }

-static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits)
+static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, bool noQuant)
 {
    // XXX Use the signMap to increase the resolution when possible ...
    double signedMax = (1 << (nbBits - 1)) - 1;    
@@ -1122,7 +1117,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
    {
        // The appropriate strategy is to check if the Quantizer is not   
        // preceded by an Weighted node (that is not forking), and insert  
-        // a coeff node (Compensation) if so ...
+        // a mul node (Compensation) before it if so ...
 
        if (node->type() == "Quantizer")
        {
@@ -1159,14 +1154,25 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u

                // Adapt the scaling factor value accordingly

-                double currScalingFactor = getScalingFactor(node); 
-                updateScalingFactor(node, currScalingFactor / signedMax); 
+                multiplyScalingFactor(node, 1.0 / signedMax); // XXX XXX XXX OK
+
+                // Insert a Quantizer for the coeffProducer that will handle  
+                // the single-shift approximation via it's scalingFactor ...
+
+                insertScalingBelowProducer(coeffProducer, graphView);
+                
+                if (!noQuant) 
+                {
+                    // XXX XXX XXX double check this ...
+                    std::shared_ptr<Node> coeffQuantizer = mulNode->getParent(1);
+                    appendRoundClip(coeffQuantizer, -(signedMax + 1), signedMax);
+                }
            }
        }
    }
 }

-void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant)
+static void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView)
 {
    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);

@@ -1177,28 +1183,22 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
            std::shared_ptr<Node> linearNode = node->getParent(0);

            double base = getScalingFactor(node);
-
            double approx = std::pow(2, std::ceil(std::log2(base)));
+            double ratio = approx / base;

-            updateScalingFactor(node, approx);
+            // set the scaling factor value to the approximation ...

-            double ratio = base / approx;
+            multiplyScalingFactor(node, ratio);

-            insertScalingBelowProducer(linearNode->getParent(1), ratio, graphView);
-            if (!noQuant)
-                insertRoundBelowProducer(linearNode->getParent(1), graphView);
+            // compensate the ratio using the previous node weigths ...

+            multiplyScalingFactor(linearNode->getParent(1), 1.0 / ratio);
            if (nodeHasBias(linearNode))
-            {
-                insertScalingBelowProducer(linearNode->getParent(2), ratio, graphView);
-                if (!noQuant)
-                    insertRoundBelowProducer(linearNode->getParent(2), graphView);
-            }
+                multiplyScalingFactor(linearNode->getParent(2), 1.0 / ratio);
        }
    }
 }

-
 static void printScalingFactors(std::shared_ptr<GraphView> graphView)
 {
    for (auto node : retrieveNodeVector(graphView))
@@ -1263,10 +1263,10 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
    if (singleShift)
    {
        Log::notice( " Inserting the compensation nodes ...");
-        insertCompensationNodes(graphView, nbBits);
+        insertCompensationNodes(graphView, nbBits, noQuant);

        Log::notice(" Performing the Single-Shift approximation ...");
-        performSingleShiftApproximation(graphView, noQuant);
+        performSingleShiftApproximation(graphView);
    }

    if (verbose)

--- a/src/operator/PTQMetaOps.cpp
+++ b/src/operator/PTQMetaOps.cpp
@@ -43,6 +43,12 @@ std::shared_ptr<Node> BaseQuantizer(double scalingFactor, const std::string& nam
    std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor");
    scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);

+    // TODO : the above should be replaced by :
+/*
+    std::shared_ptr<Node> scalingFactorProducer = Producer(scalingFactorTensor);
+    scalingFactorProducer->addChild(mulNode, 0, 1);
+*/
+
    std::shared_ptr<GraphView> graphView = Sequential({mulNode});
    graphView->add(scalingFactorProducer);

@@ -84,55 +90,8 @@ void multiplyScalingFactor(std::shared_ptr<Aidge::Node> scalingNode, double coef

    auto producer = mulNode->getParent(1);
    producer->getOperator()->setOutput(0, newScalingFactorTensor);
-    // XXX old way : mulNode->input(1).first->getOperator()->setOutput(0, resultTensor);
-}
-
-/*
-void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double clipMax)
-{
-    // Create the new nodes
-
-    std::string name = metaOpNode->name();
-
-    std::shared_ptr<Node> roundNode = Round(name + "_RoundQuant");
-    std::shared_ptr<Node> clipNode = Clip(name + "_ClipQuant", clipMin, clipMax);
-
-    // Retreive the previous microGraph
-
-    auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (metaOpNode->getOperator());
-    auto microGraph = metaOperatorOp->getMicroGraph();
-
-    // Get the Mul node from the microGraph
-
-    std::shared_ptr<Node> mulNode = nullptr;
-    for (auto node : microGraph->getNodes())
-        if (node->type() == "Mul")
-            mulNode = node;
-
-    // Save the backend and datatype
-
-    auto mulOp = std::static_pointer_cast<OperatorTensor> (mulNode->getOperator()); 
-    auto backend = mulOp->getInput(0)->backend();
-    auto dataType = mulOp->getInput(0)->dataType();
-
-    // Create the new microGraph
-
-    std::shared_ptr<GraphView> prevGraphView = Sequential({mulNode});
-    prevGraphView->add(mulNode->getParent(1)); // add the producer
-
-    auto prevGraphViewClone = prevGraphView->clone();
-    std::shared_ptr<GraphView> newGraphView = Sequential({prevGraphViewClone, roundNode, clipNode});
-    
-    // Replace the old microGraph
-
-    microGraph->replace(prevGraphView, newGraphView);
-
-    // Set the backend and datatype
-
-    microGraph->setBackend(backend);
-    microGraph->setDataType(dataType);
+    // XXX prev way : mulNode->input(1).first->getOperator()->setOutput(0, resultTensor);
 }
-*/

 void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double clipMax)
 {
@@ -203,32 +162,41 @@ void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double cl
 }


-std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name)
+void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor)
+{
+    // TODO : implement or remove the function ...
+
+    Log::error(" updateScalingFactor() : not yet implemented ... ");
+}
+
+double getScalingFactor(std::shared_ptr<Node> quantizerNode)
 {
-    // create the nodes
+    // Retreive the previous microGraph

-    std::shared_ptr<Node> mulNode =  Mul((!name.empty()) ? name + "_MulQuant" : "");
-    std::shared_ptr<Node> roundNode = Round((!name.empty()) ? name + "_RoundQuant" : "");
-    std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_ClipQuant" : "", clipMin, clipMax);
+    auto quantizerOp = std::static_pointer_cast<MetaOperator_Op> (quantizerNode->getOperator());
+    auto microGraph = quantizerOp->getMicroGraph();

-    // connect the scaling factor producer
+    // Get the Mul node from the microGraph

-    std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
-    std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor");
-    scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
+    std::shared_ptr<Node> mulNode = nullptr;
+    for (auto node : microGraph->getNodes())
+        if (node->type() == "Mul")
+            mulNode = node;

-    // create the metaop graph
+    auto mulOp = std::static_pointer_cast<OperatorTensor> (mulNode->getOperator()); 

-    std::shared_ptr<GraphView> graphView = Sequential({mulNode, roundNode, clipNode});
-    std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ???
+    // Retreive the scaling factor

-    // return the metaop
+    auto scalingFactorTensor = mulOp->getInput(1);

-    std::shared_ptr<Node> metaopNode = MetaOperator("Quantizer", connectedGraphView, {}, name); // XXX alternative prototype
+    std::shared_ptr<Tensor> fallback;
+    const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
+    double scalingFactor = localTensor.get<double>(0);

-    return metaopNode;
+    return scalingFactor;
 }

+/*
 static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType)
 {
    std::shared_ptr<Node> mulNode = nullptr;
@@ -238,66 +206,49 @@ static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, st

    return mulNode;
 }
+*/

-void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor)
+void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max)
 {
-    if(metaOpNode->type() != "Scaling" && metaOpNode->type() != "Quantizer")
-        Log::warn("Cannot update the scaling factor on Node of type {}", metaOpNode->type());
-
-    std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
+    auto quantizerOp = std::static_pointer_cast<MetaOperator_Op> (quantizerNode->getOperator());
+    auto microGraph = quantizerOp->getMicroGraph();

-    std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(metaOpNode->getOperator());
+    std::shared_ptr<Node> clipNode = nullptr;
+    for (auto node : microGraph->getNodes())
+        if (node->type() == "Clip")
+            clipNode = node;

-    std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul");
+    // TODO : assert that we've got not a nullptr ...

-    if (!mulNode)
-        Log::warn("Invalid PTQ MetaOperator, no Mul node found inside ! ");
+    auto clipOp = std::static_pointer_cast<Clip_Op> (clipNode->getOperator()); 

-    mulNode->input(1).first->getOperator()->setOutput(0, scalingFactorTensor);
-}
+    // set the attributes

-double getScalingFactor(std::shared_ptr<Node> MetaOpNode)
-{
-    if (MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") {
-        Log::warn("Cannot get the scaling factor on Node of type {}", MetaOpNode->type());
-        return 0;
-    }
+    clipOp->max() = max;
+    clipOp->min() = min;

-    std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(MetaOpNode->getOperator());
+    // Retreive the previous min/max tensors 

-    std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul");
+    auto minTensor = std::static_pointer_cast<OperatorTensor>(clipNode->getOperator())->getInput(1);
+    auto maxTensor = std::static_pointer_cast<OperatorTensor>(clipNode->getOperator())->getInput(2);

-    if (!mulNode) {
-        Log::warn("Invalid PTQ MetaOperator, no Mul found inside node of type {}", MetaOpNode->type());
-        return 0;
-    }
+    // Create the new min/max tensors

-    auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1);
-    std::shared_ptr<Tensor> fallback;
-    const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
+    std::shared_ptr<Tensor> newMinTensor = std::make_shared<Tensor>(min);
+    newMinTensor->setBackend(minTensor->backend());
+    newMinTensor->setDataType(minTensor->dataType());

-    return localTensor.get<double>(0);
-}
-
-
-void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max)
-{
-    if (quantizerNode->type() != "Quantizer") {
-        Log::warn("Cannot set the clipping range on Node of type {}", quantizerNode->type());
-        return;
-    }
+    std::shared_ptr<Tensor> newMaxTensor = std::make_shared<Tensor>(max);
+    newMaxTensor->setBackend(maxTensor->backend());
+    newMaxTensor->setDataType(maxTensor->dataType());

-    std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op> (quantizerNode->getOperator());
+    // Set the tensors of the producer

-    std::shared_ptr<Node> clipNode = getSubNode(metaOp->getMicroGraph(), "Clip");
+    auto minProducer = clipNode->getParent(1);
+    minProducer->getOperator()->setOutput(0, newMinTensor);

-    if (!clipNode) {
-        Log::warn("Invalid PTQ MetaOperator, no Clip found inside node of type {}", quantizerNode->type());
-        return;
-    }
-
-    std::shared_ptr<Clip_Op> clipOp = std::static_pointer_cast<Clip_Op>(clipNode->getOperator());
-    clipOp->max() = max;
-    clipOp->min() = min;
+    auto maxProducer = clipNode->getParent(2);
+    maxProducer->getOperator()->setOutput(0, newMaxTensor);
 }
+
 }
\ No newline at end of file