initial commit (quantizer insertion + normalize params)

cdcb260f · Benjamin Halimi · 3f669a98 · cdcb260f · cdcb260f · cdcb260f
Commit cdcb260f authored 3 months ago by Benjamin Halimi
--- a/include/aidge/operator/PTQMetaOps.hpp
+++ b/include/aidge/operator/PTQMetaOps.hpp
@@ -19,6 +19,10 @@

 namespace Aidge {

+    // XXX XXX XXX
+    std::shared_ptr<Aidge::Node> BaseQuantizer(double scalingFactor, const std::string& name);
+
+
 /// @brief Quantizer acts as a meta-operator to handle scaling operations in the PTQ, replacing the Scaling Operator.
 /// This operator is composed of a sequence of [Mul] -> [Clip] -> [Round] operations.
 ///

--- a/include/aidge/quantization/PTQ/PTQ.hpp
+++ b/include/aidge/quantization/PTQ/PTQ.hpp
@@ -89,15 +89,12 @@ namespace Aidge {
    std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> graphView, bool newSchedule = true, bool verbose = false);
    
    /**
-     * @brief Inserts a scaling node below the given producer node in the graph view. 
-     *        If the node is already a producer scaling node, it accumulates the scaling factor by multiplyins its value directly.
-     *
+     * @brief Inserts a scaling node below the given producer node in the graphView. 
     * @param node A shared pointer to the producer node where the scaling node will be inserted (below).
-     * @param scalingFactor The scaling factor to apply.
     * @param graphView A shared pointer to the graph view in which the nodes are located.
     * @return True if the scaling node was successfully inserted or the scaling factor was accumulated; False otherwise.
     */
-    bool insertScalingBelowProducer(std::shared_ptr<Node> node, double scalingFactor, std::shared_ptr<GraphView> graphView);
+    void insertScalingBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphView> graphView);

    /**
     * @brief Inserts a rounding node below the given producer (also below its ows producerScaling) node in the graph view. 

--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -78,6 +78,7 @@ std::shared_ptr<Aidge::Tensor> getLocalTensor(std::shared_ptr<Node> node)

 void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta)
 {
+/*
    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);

    // Check if the CLE can be applied ...
@@ -137,6 +138,7 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD
        }
    }
    while (maxRangeDelta > targetDelta);
+*/
 }

 }
\ No newline at end of file
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -34,6 +34,9 @@
 #include "aidge/recipes/Recipes.hpp"
 #include "aidge/recipes/QuantRecipes.hpp"

+#include "aidge/operator/MetaOperator.hpp"
+
+
 namespace Aidge
 {

@@ -204,8 +207,9 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren
    return index;
 }

-void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff)
+void multiplyScalingFactor(std::shared_ptr<Aidge::Node> scalingNode, double coeff)
 {
+/*
    AIDGE_ASSERT(node->type() == "Mul" && hasAttr(node, "isProducerScaling") || hasAttr(node, "isScaling"),
        "Cannot update the scaling factor on Node of type {} with no scaling tag", node->type());
    
@@ -217,6 +221,37 @@ void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff)

    std::shared_ptr<Tensor> resultTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff});
    node->input(1).first->getOperator()->setOutput(0, resultTensor);
+*/
+
+    auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (scalingNode->getOperator());
+
+    // Get the Mul node from the microGraph
+
+    std::shared_ptr<Node> mulNode = nullptr;
+    auto microGraph = metaOperatorOp->getMicroGraph();
+    for (auto node : microGraph->getNodes())
+        if (node->type() == "Mul")
+            mulNode = node;
+
+    // Retreive the previous scaling factor
+
+    auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1);
+
+    std::shared_ptr<Tensor> fallback;
+    const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
+    double prevScalingFactor = localTensor.get<double>(0);    
+
+    // Create the new scaling factor tensor
+
+    std::shared_ptr<Tensor> newScalingFactorTensor = std::make_shared<Tensor>(prevScalingFactor * coeff);
+    newScalingFactorTensor->setBackend(scalingFactorTensor->backend());
+    newScalingFactorTensor->setDataType(scalingFactorTensor->dataType());
+
+    // Set the tensor of the producer
+
+    auto producer = mulNode->getParent(1);
+    producer->getOperator()->setOutput(0, newScalingFactorTensor);
+    // XXX old way : mulNode->input(1).first->getOperator()->setOutput(0, resultTensor);
 }

 // Utility function that insert a node below another one already connected 
@@ -303,28 +338,6 @@ double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor)
    return localFlatTensor.get<double>(maxIndex);
 }

-
-// TODO : pass nodeVector by reference ...
-static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::shared_ptr<Node>> nodeVector, std::string nodeType)
-{
-    std::vector<std::shared_ptr<Node>> remainingNodes;
-    for (std::shared_ptr<Node> node : nodeVector)
-        if (node->type() != nodeType)
-            remainingNodes.push_back(node);
-
-    return remainingNodes;
-}
-
-static std::vector<std::shared_ptr<Node>> removeProdScalingNodes(std::vector<std::shared_ptr<Node>> nodeVector)
-{
-    std::vector<std::shared_ptr<Node>> remainingNodes;
-    for (std::shared_ptr<Node> node : nodeVector)
-        if (!hasAttr(node, "isProducerScaling"))
-            remainingNodes.push_back(node);
-
-    return remainingNodes;
-}
-
 static void fixScheduling(std::vector<std::shared_ptr<Node>>& nodeVector) {

    std::vector<std::shared_ptr<Node>> correctedVector;
@@ -344,22 +357,42 @@ static void fixScheduling(std::vector<std::shared_ptr<Node>>& nodeVector) {

 static std::shared_ptr<Tensor> getWeightTensor(std::shared_ptr<Node> node)
 {
-    return std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1);
+    std::shared_ptr<Node> producer = node->getParent(1);
+
+    if (producer->type() == "BaseQuantizer") 
+        producer = producer->getParent(0);
+
+    return std::static_pointer_cast<OperatorTensor>(producer->getOperator())->getOutput(0);
 }

 static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node)
 {
-    return std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2);
+    std::shared_ptr<Node> producer = node->getParent(2);
+
+    if (producer->type() == "BaseQuantizer") 
+        producer = producer->getParent(0);
+
+    return std::static_pointer_cast<OperatorTensor>(producer->getOperator())->getOutput(0);
 }

 std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> graphView, bool newSchedule, bool verbose)
 {
    std::vector<std::shared_ptr<Node>> nodeVector = graphView->getOrderedNodes();
   
+    // Remove duplicate nodes. Is it still needed ???
+
    fixScheduling(nodeVector); 

-    nodeVector = removeMatchingNodes(nodeVector, "Producer");
-    nodeVector = removeProdScalingNodes(nodeVector);
+    // Remove Producers and their Scalings
+
+    std::vector<std::shared_ptr<Node>> remainingNodes;
+    for (std::shared_ptr<Node> node : nodeVector)
+        if ((node->type() != "Producer") && !hasAttr(node, "isProducerScaling"))
+            remainingNodes.push_back(node);
+
+    nodeVector = remainingNodes;
+
+    // Verbose

    if (verbose) 
    {
@@ -383,6 +416,7 @@ static DataType getDataType(std::shared_ptr<Node> node)
    return op->getOutput(0)->dataType();
 }

+/*
 static std::shared_ptr<Aidge::Node> createScalingNode(std::string name, std::vector<std::string> attributes, double value)
 {
    std::shared_ptr<Node> scalingNode = Mul(name);
@@ -401,26 +435,41 @@ static std::shared_ptr<Aidge::Node> createScalingNode(std::string name, std::vec

    return scalingNode;
 }
+*/

-bool insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double scalingFactor, std::shared_ptr<GraphView> graphView)
+// XXX double check this !
+static bool nodeHasBias(std::shared_ptr<Node> node)
 {
-    if (hasAttr(producerNode, "isProducerRounding"))
-    {
-        // In this case we 'bump' the node to the one above him (an actual ProducerScaling)
-        // because the round node is not usable (only used when SSA is enabled)
-        producerNode = producerNode->getParent(0);
+    if (node->getParents().size() == 3) {
+        std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
+        if (biasTensor)
+            return true;
    }
+    return false;
+}

-    if (hasAttr(producerNode, "isProducerScaling"))
-    {
-        // We accumulate the previous scaling factors by multiplying the SF of the ProducerScaling node 
-        // (adding new nodes each time would make the graph unusable)
-        multiplyScalingFactor(producerNode, scalingFactor);
-        return true;
+// TODO: rework this !
+static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> node)
+{
+    std::shared_ptr<Node> currNode = node;
+    while(!hasAttr(currNode, "isScaling")) {
+        if (currNode->getParents().size() == 0) {
+            Log::warn(" Warning : No previous Scaling node were found ! ");
+            break;
+        }
+        currNode = currNode->getParents()[0];
    }
+    return currNode;
+}

-    AIDGE_ASSERT(producerNode->type() == "Producer", " Cannot apply a scaling factor on node of type: {} which is not a Producer", producerNode->type());
-   
+void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double ratio, std::shared_ptr<GraphView> graphView)
+{
+    Log::warn(" DUMMY ! ");
+}
+
+void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, std::shared_ptr<GraphView> graphView)
+{
+/*
    std::string scalingNodeName = makeUniqueName(producerNode->name() + "_ProducerScaling", graphView);
    std::shared_ptr<Node> scalingNode = createScalingNode(scalingNodeName, {"isProducerScaling"}, scalingFactor);

@@ -429,8 +478,35 @@ bool insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double scali

    insertChildren(producerNode, scalingNode, graphView);
    graphView->add(scalingNode->getParent(1)); // add the scaling factor producer
+*/

-    return true;
+    std::string scalingNodeName = makeUniqueName(producerNode->name() + "_ProducerScaling", graphView);
+    std::shared_ptr<Node> scalingNode = BaseQuantizer(1.0, scalingNodeName);;
+    addAttr(scalingNode, "isProducerScaling");
+    // XXX XXX XXX addAttr(scalingNode, "isScaling");
+
+    scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
+    scalingNode->getOperator()->setBackend(determineBackend(producerNode)); // XXX use the producer parent instead ???
+
+    insertChildren(producerNode, scalingNode, graphView);
+
+    // XXX XXX XXX is it needed ?
+    // graphView->add(scalingNode->getParent(1));
+}
+
+void insertProducerScalingNodes(std::shared_ptr<GraphView> graphView)
+{
+    std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
+
+    for (std::shared_ptr<Node> node : nodeSet)
+    {
+        if (isAffine(node))
+        {
+            insertScalingBelowProducer(node->getParent(1), graphView);
+            if (nodeHasBias(node))
+                insertScalingBelowProducer(node->getParent(2), graphView);
+        }
+    }
 }

 // XXX HERE : Branches containing only Seamless nodes should be considered as residual too !!!
@@ -458,47 +534,45 @@ void insertResidualScalingNodes(std::shared_ptr<GraphView> graphView)
                    Log::info(" ### inserting multiplicative node ...");

                    std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView);
-                    std::shared_ptr<Node> residualNode = createScalingNode(residualNodeName, {"isScaling", "isResidual"}, 1.0);
+                    
+                    // XXX XXX XXX 
+                    // std::shared_ptr<Node> residualNode = createScalingNode(residualNodeName, {"isScaling", "isResidual"}, 1.0);
+                    std::shared_ptr<Node> residualNode = BaseQuantizer(1.0, residualNodeName);
+                    addAttr(residualNode, "isScaling");
+                    addAttr(residualNode, "isResidual");

                    residualNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
                    residualNode->getOperator()->setBackend(determineBackend(parentNode));

                    graphView->insertParent(node, residualNode, i, 0, 0);
-                    graphView->add(residualNode->getParent(1)); // add the scaling factor producer

+                    // XXX XXX XXX is it needed ? no more !
+                    // graphView->add(residualNode->getParent(1)); 
                }
            }
        }
    }
 }

-static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> node)
-{
-    std::shared_ptr<Node> currNode = node;
-    while(!hasAttr(currNode, "isScaling"))
-    {
-        if (currNode->getParents().size() == 0)
-        {
-            Log::warn(" Warning : No previous Scaling node were found ! ");
-            break;
-        }
-        currNode = currNode->getParents()[0];
-    }
-    return currNode;
-}
-
 void insertScalingNodes(std::shared_ptr<GraphView> graphView)
 {
+    insertProducerScalingNodes(graphView);
    insertResidualScalingNodes(graphView);

    std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();

    for (std::shared_ptr<Node> parentNode : nodeSet)
    {
+        // Insert a Scaling node after each node that have to be quantized
+
        if (isAffine(parentNode) || isMerging(parentNode) || isNotQuantized(parentNode))
        {
            std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView);
-            std::shared_ptr<Node> scalingNode = createScalingNode(scalingNodeName, {"isScaling"}, 1.0);
+
+            // XXX XXX XXX
+            // std::shared_ptr<Node> scalingNode = createScalingNode(scalingNodeName, {"isScaling"}, 1.0);
+            std::shared_ptr<Node> scalingNode = BaseQuantizer(1.0, scalingNodeName);
+            addAttr(scalingNode, "isScaling");

            scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
            scalingNode->getOperator()->setBackend(determineBackend(parentNode));
@@ -506,12 +580,12 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
            if (parentNode->getChildren().size() > 0) {
                insertChildren(parentNode, scalingNode, graphView);
            } else {
-                // Log::info(" last node reached ! ");
                parentNode->addChild(scalingNode, 0, 0);
                graphView->add(scalingNode);
            }
-
-            graphView->add(scalingNode->getParent(1)); // add the scaling factor producer
+            
+            // XXX XXX XXX is it needed ? no more
+            // graphView->add(scalingNode->getParent(1)); 

            // In the case the node is a non-linear operator we want to add an extra
            // scaling node before it to rescale it's input ...
@@ -519,29 +593,24 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
            if (isNotQuantized(parentNode))
            {
                std::string prevScalingNodeName = makeUniqueName(parentNode->name() + "_PrevScaling", graphView);
-                std::shared_ptr<Node> prevScalingNode = createScalingNode(prevScalingNodeName, {"isScaling"}, 1.0);
+
+                // XXX XXX XXX
+                // std::shared_ptr<Node> prevScalingNode = createScalingNode(prevScalingNodeName, {"isScaling"}, 1.0);
+                std::shared_ptr<Node> prevScalingNode = BaseQuantizer(1.0, prevScalingNodeName);
+                addAttr(prevScalingNode, "isScaling");

                prevScalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
                prevScalingNode->getOperator()->setBackend(determineBackend(parentNode));

                graphView->insertParent(parentNode, prevScalingNode, 0, 0, 0);
-                graphView->add(prevScalingNode->getParent(1)); // add the scaling factor producer
+
+                // XXX XXX XXX is it needed ? no more !
+                // graphView->add(prevScalingNode->getParent(1));
            }
        }
    }
 }

-// XXX double check this !
-static bool nodeHasBias(std::shared_ptr<Node> node)
-{
-    if (node->getParents().size() == 3) {
-        std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
-        if (biasTensor)
-            return true;
-    }
-    return false;
-}
-
 void normalizeParameters(std::shared_ptr<GraphView> graphView)
 {
    // CREATE THE ACCUMULATED RATIO MAP ///////////////////////////////////////
@@ -574,11 +643,12 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
            // Rescale the weight tensor
            
            std::shared_ptr<Tensor> weightTensor = getWeightTensor(node);
-            double scaling = getTensorAbsoluteMax(weightTensor);
-            double ratio = 1.0 / scaling;
+
+            double ratio = 1.0 / getTensorAbsoluteMax(weightTensor);

            //rescaleTensor(weightTensor, ratio);
-            insertScalingBelowProducer(node->getParent(1), ratio, graphView);
+            // XXX XXX XXX insertScalingBelowProducer(node->getParent(1), ratio, graphView);
+            multiplyScalingFactor(node->getParent(1), ratio);

            // Accumulate the ratio

@@ -595,7 +665,8 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
            {
                std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
                //rescaleTensor(biasTensor, accumulatedRatios[node] );
-                insertScalingBelowProducer(node->getParent(2), accumulatedRatios[node], graphView);
+                // XXX XXX XXX insertScalingBelowProducer(node->getParent(2), accumulatedRatios[node], graphView);
+                multiplyScalingFactor(node->getParent(2), accumulatedRatios[node]);
            }
        }


--- a/src/operator/PTQMetaOps.cpp
+++ b/src/operator/PTQMetaOps.cpp
@@ -30,10 +30,30 @@
 #include "aidge/operator/OperatorTensor.hpp"
 #include "aidge/utils/Log.hpp"

-
 namespace Aidge
 {

+std::shared_ptr<Node> BaseQuantizer(double scalingFactor, const std::string& name)
+{
+    std::shared_ptr<Node> mulNode =  Mul(name.empty() ? "" : name + "_MulQuant");
+
+    // Scaling Factor Producer
+
+    std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
+    std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor");
+    scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
+
+    std::shared_ptr<GraphView> graphView = Sequential({mulNode});
+    graphView->add(scalingFactorProducer);
+
+    // alternative : capture the Producer ...
+    // std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); 
+
+    std::shared_ptr<Node> metaopNode = MetaOperator("BaseQuantizer", graphView, {}, name); // XXX alternative prototype -> 
+
+    return metaopNode;
+}
+
 std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name)
 {
    // create the nodes