wip (mainly appendRoundClip)

f8b03835 · Benjamin Halimi · 5cc3f175 · f8b03835 · f8b03835 · f8b03835
Commit f8b03835 authored 1 month ago by Benjamin Halimi
--- a/include/aidge/operator/PTQMetaOps.hpp
+++ b/include/aidge/operator/PTQMetaOps.hpp
@@ -20,6 +20,10 @@
 namespace Aidge {
    std::shared_ptr<Aidge::Node> BaseQuantizer(double scalingFactor, const std::string& name);
+    void multiplyScalingFactor(std::shared_ptr<Aidge::Node> scalingNode, double coeff);
+    void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double clipMax);
 /// @brief Quantizer acts as a meta-operator to handle scaling operations in the PTQ, replacing the Scaling Operator.
 /// This operator is composed of a sequence of [Mul] -> [Clip] -> [Round] operations.

--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -207,39 +207,6 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren
    return index;
 }
-void multiplyScalingFactor(std::shared_ptr<Aidge::Node> scalingNode, double coeff)
-{
-    auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (scalingNode->getOperator());
-    // Get the Mul node from the microGraph
-    std::shared_ptr<Node> mulNode = nullptr;
-    auto microGraph = metaOperatorOp->getMicroGraph();
-    for (auto node : microGraph->getNodes())
-        if (node->type() == "Mul")
-            mulNode = node;
-    // Retreive the previous scaling factor
-    auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1);
-    std::shared_ptr<Tensor> fallback;
-    const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
-    double prevScalingFactor = localTensor.get<double>(0);    
-    // Create the new scaling factor tensor
-    std::shared_ptr<Tensor> newScalingFactorTensor = std::make_shared<Tensor>(prevScalingFactor * coeff);
-    newScalingFactorTensor->setBackend(scalingFactorTensor->backend());
-    newScalingFactorTensor->setDataType(scalingFactorTensor->dataType());
-    // Set the tensor of the producer
-    auto producer = mulNode->getParent(1);
-    producer->getOperator()->setOutput(0, newScalingFactorTensor);
-    // XXX old way : mulNode->input(1).first->getOperator()->setOutput(0, resultTensor);
-}
 // Utility function that insert a node below another one already connected 
 static void insertChildren(std::shared_ptr<Node> parent, std::shared_ptr<Node> newNode, std::shared_ptr<GraphView> graphView) 
 {
@@ -273,6 +240,7 @@ static void insertChildren(std::shared_ptr<Node> parent, std::shared_ptr<Node> n
 bool insertRoundBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphView> graphView)
 {
+/*    
    if (hasAttr(node, "isProducerScaling") && node->type() != "Round")
    {
        std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round");
@@ -285,6 +253,9 @@ bool insertRoundBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphV
        return true;
    }
    return false;
+*/
+    Log::warn(" ROUND : DUMMY ! ");
+    return true;
 }
 double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor)
@@ -429,7 +400,7 @@ static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> node)
 void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double ratio, std::shared_ptr<GraphView> graphView)
 {
-    Log::warn(" DUMMY ! ");
+    Log::warn(" INSERT SCALING : DUMMY ! ");
 }
 void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, std::shared_ptr<GraphView> graphView)
@@ -882,7 +853,7 @@ std::unordered_map<std::shared_ptr<Node>, std::pair<bool, bool>> computeSignMap(
    std::pair<bool, bool> unsignedPair(true, true);
    for (std::shared_ptr<Node> node : graphView->getNodes())
-        if (node->type() != "Producer")
+        if (node->type() != "Producer") // XXX XXX XXX we should use nodeVector instead ...
            signMap.insert(std::make_pair(node, unsignedPair));
    // ITERATE OVER THE GRAPH
@@ -1015,6 +986,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
        signMap = computeSignMap(graphView, verbose);
    else
    {
+        // XXX XXX XXX we should use the (retreive) node vector
        std::pair<bool, bool> signedPair(false, false);
        for (std::shared_ptr<Node> node : graphView->getNodes())
            if (node->type() != "Producer")
@@ -1030,11 +1002,11 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
        if (isAffine(node))
        {
            // Rescale the weight tensor
-            std::shared_ptr<Tensor> weightTensor = getWeightTensor(node);
+            multiplyScalingFactor(node->getParent(1), signedMax);
-            insertScalingBelowProducer(node->getParent(1),signedMax,graphView);
+            // UUU Quantize the Producer !!!
            if (!noQuant)
-                insertRoundBelowProducer(node->getParent(1),graphView);
+                appendRoundClip(node->getParent(1), -(signedMax + 1), signedMax);
            // Rescale the bias tensor
            if (nodeHasBias(node))  
@@ -1042,11 +1014,12 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
                bool inputIsUnsigned = signMap[node].first;
                double rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax;
-                std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
+                multiplyScalingFactor(node->getParent(2), rescaling);
-                insertScalingBelowProducer(node->getParent(2),rescaling,graphView);
+                // XXX TODO : enhance this ! 
+                int biasMax = (1 << (12 + nbBits));
                if (!noQuant)
-                    insertRoundBelowProducer(node->getParent(2),graphView);
+                    appendRoundClip(node->getParent(2), -(biasMax + 1), biasMax);
            }
            // Compensate the rescaling using the next Scaling node
@@ -1061,7 +1034,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
            std::shared_ptr<Node> scalingNode = getUniqueChild(node); // TODO : assert if scalingNode is a Scaling ...
-            multiplyScalingFactor(scalingNode,rescaling) ;          
+            multiplyScalingFactor(scalingNode, rescaling);          
        }
        if (isMerging(node))
@@ -1080,7 +1053,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
            if (node->type() == "MatMul")
                rescaling /= inputIsUnsigned ? unsignedMax : signedMax;
-            multiplyScalingFactor(scalingNode, rescaling) ;          
+            multiplyScalingFactor(scalingNode, rescaling);          
        }
        if (isNotQuantized(node))
@@ -1096,7 +1069,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
        // Handle the Scaling Nodes ...
-        if (hasAttr(node, "isScaling"))
+        if (hasAttr(node, "isScaling")) 
        {
            // Don't touch the scalings that precede non-linearities ...
@@ -1107,20 +1080,13 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
            if (!noQuant && !precedesNonLinearNode) 
            {  
-                // Replace the Scaling Node by a Quantizer
+                // Old : Replace the Scaling Node by a Quantizer
-                auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1);
+                appendRoundClip(node, -(signedMax + 1), signedMax);
-                std::shared_ptr<Tensor> fallback;
-                const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
-                double oldScalingFactor = localTensor.get<double>(0); //!\\ 
-                std::shared_ptr<Node> quantizerNode = Quantizer(oldScalingFactor, -(signedMax + 1), signedMax, node->name());
-                quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
-                quantizerNode->getOperator()->setBackend(determineBackend(node));
-                graphView->replace({node, node->getParent(1)}, {quantizerNode});
                if (optimizeSigns)
                {
+/*
                    double rescaling = 1.0;
                    bool inputIsUnsigned  = signMap[node].first;
@@ -1129,11 +1095,16 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
                    rescaling /= inputIsUnsigned  ? unsignedMax : signedMax;
                    rescaling *= outputIsUnsigned ? unsignedMax : signedMax;
-                    double currScalingFactor = getScalingFactor(quantizerNode);
+                    // XXX XXX XXX
-                    updateScalingFactor(quantizerNode, currScalingFactor * rescaling);
+                    //double currScalingFactor = getScalingFactor(quantizerNode);
+                    //updateScalingFactor(quantizerNode, currScalingFactor * rescaling);
+                    multiplyScalingFactor(node, rescaling);
+                    // XXX XXX XXX HERE : Fix this !!!
                    if(outputIsUnsigned)
-                        setClipRange(quantizerNode, 0, unsignedMax);                 
+                        setClipRange(quantizerNode, 0, unsignedMax); 
+*/              
                }
            }
        }
@@ -1334,7 +1305,8 @@ void clearBiases(std::shared_ptr<GraphView> graphView)
        if (node->type() == "FC" || node->type() == "Conv2D") {
            std::shared_ptr<Tensor> biasTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2);
            //rescaleTensor(biasTensor, 0);
-            insertScalingBelowProducer(node->getParent(2), 0, graphView);
+            //insertScalingBelowProducer(node->getParent(2), 0, graphView);
+            multiplyScalingFactor(node->getParent(2), 0);
        }
    }
 }

--- a/src/operator/PTQMetaOps.cpp
+++ b/src/operator/PTQMetaOps.cpp
@@ -35,7 +35,7 @@ namespace Aidge
 std::shared_ptr<Node> BaseQuantizer(double scalingFactor, const std::string& name)
 {
-    std::shared_ptr<Node> mulNode =  Mul(name.empty() ? "" : name + "_MulQuant");
+    std::shared_ptr<Node> mulNode =  Mul(name + "_MulQuant");
    // Scaling Factor Producer
@@ -54,6 +54,155 @@ std::shared_ptr<Node> BaseQuantizer(double scalingFactor, const std::string& nam
    return metaopNode;
 }
+void multiplyScalingFactor(std::shared_ptr<Aidge::Node> scalingNode, double coeff)
+{
+    auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (scalingNode->getOperator());
+    // Get the Mul node from the microGraph
+    std::shared_ptr<Node> mulNode = nullptr;
+    auto microGraph = metaOperatorOp->getMicroGraph();
+    for (auto node : microGraph->getNodes())
+        if (node->type() == "Mul")
+            mulNode = node;
+    // Retreive the previous scaling factor
+    auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1);
+    std::shared_ptr<Tensor> fallback;
+    const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
+    double prevScalingFactor = localTensor.get<double>(0);    
+    // Create the new scaling factor tensor
+    std::shared_ptr<Tensor> newScalingFactorTensor = std::make_shared<Tensor>(prevScalingFactor * coeff);
+    newScalingFactorTensor->setBackend(scalingFactorTensor->backend());
+    newScalingFactorTensor->setDataType(scalingFactorTensor->dataType());
+    // Set the tensor of the producer
+    auto producer = mulNode->getParent(1);
+    producer->getOperator()->setOutput(0, newScalingFactorTensor);
+    // XXX old way : mulNode->input(1).first->getOperator()->setOutput(0, resultTensor);
+}
+/*
+void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double clipMax)
+{
+    // Create the new nodes
+    std::string name = metaOpNode->name();
+    std::shared_ptr<Node> roundNode = Round(name + "_RoundQuant");
+    std::shared_ptr<Node> clipNode = Clip(name + "_ClipQuant", clipMin, clipMax);
+    // Retreive the previous microGraph
+    auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (metaOpNode->getOperator());
+    auto microGraph = metaOperatorOp->getMicroGraph();
+    // Get the Mul node from the microGraph
+    std::shared_ptr<Node> mulNode = nullptr;
+    for (auto node : microGraph->getNodes())
+        if (node->type() == "Mul")
+            mulNode = node;
+    // Save the backend and datatype
+    auto mulOp = std::static_pointer_cast<OperatorTensor> (mulNode->getOperator()); 
+    auto backend = mulOp->getInput(0)->backend();
+    auto dataType = mulOp->getInput(0)->dataType();
+    // Create the new microGraph
+    std::shared_ptr<GraphView> prevGraphView = Sequential({mulNode});
+    prevGraphView->add(mulNode->getParent(1)); // add the producer
+    auto prevGraphViewClone = prevGraphView->clone();
+    std::shared_ptr<GraphView> newGraphView = Sequential({prevGraphViewClone, roundNode, clipNode});
+    // Replace the old microGraph
+    microGraph->replace(prevGraphView, newGraphView);
+    // Set the backend and datatype
+    microGraph->setBackend(backend);
+    microGraph->setDataType(dataType);
+}
+*/
+void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double clipMax)
+{
+    // Retreive the previous microGraph
+    auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (metaOpNode->getOperator());
+    auto microGraph = metaOperatorOp->getMicroGraph();
+    // Get the Mul node from the microGraph
+    std::shared_ptr<Node> mulNode = nullptr;
+    for (auto node : microGraph->getNodes())
+        if (node->type() == "Mul")
+            mulNode = node;
+    auto mulOp = std::static_pointer_cast<OperatorTensor> (mulNode->getOperator()); 
+    // save the backend and datatype
+    auto backend = mulOp->getInput(0)->backend();
+    auto dataType = mulOp->getInput(0)->dataType();
+    // create the new microGraph nodes
+    auto newMulNode = Mul(); 
+    auto roundNode = Round();
+    auto clipNode = Clip(""); //, clipMin, clipMax);
+    auto newCoeffNode = mulNode->getParent(1)->clone(); // UUU Producer(coeffTensor);
+    // create the new micrograph
+    std::shared_ptr<GraphView> newMicroGraph = Sequential({newMulNode, roundNode, clipNode});  
+    newCoeffNode->addChild(newMulNode, 0, 1); // 1 was not specified !!!
+    newMicroGraph->add(newCoeffNode);
+    // manually connect the IOs !!!
+    auto newMulOp = std::static_pointer_cast<OperatorTensor> (newMulNode->getOperator()); 
+    newMulOp->setInput(0, mulOp->getInput(0));   // MANDATORY (because we need an input tensor)
+    auto clipOp = std::static_pointer_cast<Clip_Op> (clipNode->getOperator()); 
+    clipOp->setOutput(0, mulOp->getOutput(0)); // MANDATORY ? YES !!!  
+    // Connect the clip min and max tensors
+    auto minTensor = std::make_shared<Tensor>(clipMin);
+    auto maxTensor = std::make_shared<Tensor>(clipMax);
+    auto minNode = Producer(minTensor);
+    auto maxNode = Producer(maxTensor);   
+    minNode->addChild(clipNode, 0, 1);
+    maxNode->addChild(clipNode, 0, 2);
+    newMicroGraph->add(minNode);
+    newMicroGraph->add(maxNode);
+    // set backend
+    newMicroGraph->setBackend(backend);
+    newMicroGraph->setDataType(dataType);
+    // reset the scheduling
+    SequentialScheduler scheduler(newMicroGraph);
+    scheduler.resetScheduling();
+    //scheduler.generateScheduling();
+    // set the micrograph
+    *microGraph = *newMicroGraph;
+}
 std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name)
 {
    // create the nodes