diff --git a/include/aidge/operator/PTQMetaOps.hpp b/include/aidge/operator/PTQMetaOps.hpp index 9ca76fbd40b9366aa82c6521fba931d284da137a..85bf0f6cfc876c15da1ee4f31809948c6efdbc98 100644 --- a/include/aidge/operator/PTQMetaOps.hpp +++ b/include/aidge/operator/PTQMetaOps.hpp @@ -19,6 +19,10 @@ namespace Aidge { + // XXX XXX XXX + std::shared_ptr<Aidge::Node> BaseQuantizer(double scalingFactor, const std::string& name); + + /// @brief Quantizer acts as a meta-operator to handle scaling operations in the PTQ, replacing the Scaling Operator. /// This operator is composed of a sequence of [Mul] -> [Clip] -> [Round] operations. /// diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index 1c911801c543cac8cb464acaab80e6061703e6e7..5a15f06ccd7922fc931cd1d05af507588cec5162 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -89,15 +89,12 @@ namespace Aidge { std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> graphView, bool newSchedule = true, bool verbose = false); /** - * @brief Inserts a scaling node below the given producer node in the graph view. - * If the node is already a producer scaling node, it accumulates the scaling factor by multiplyins its value directly. - * + * @brief Inserts a scaling node below the given producer node in the graphView. * @param node A shared pointer to the producer node where the scaling node will be inserted (below). - * @param scalingFactor The scaling factor to apply. * @param graphView A shared pointer to the graph view in which the nodes are located. * @return True if the scaling node was successfully inserted or the scaling factor was accumulated; False otherwise. */ - bool insertScalingBelowProducer(std::shared_ptr<Node> node, double scalingFactor, std::shared_ptr<GraphView> graphView); + void insertScalingBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphView> graphView); /** * @brief Inserts a rounding node below the given producer (also below its ows producerScaling) node in the graph view. diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 57787a8951a513cd0dc8660c6ef3a99b63e74729..74bbc90c3937db70444d4ad6e8f1b3a51bd80529 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -78,6 +78,7 @@ std::shared_ptr<Aidge::Tensor> getLocalTensor(std::shared_ptr<Node> node) void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta) { +/* std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); // Check if the CLE can be applied ... @@ -137,6 +138,7 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD } } while (maxRangeDelta > targetDelta); +*/ } } \ No newline at end of file diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 0eecc450d7567b8eb0421cd95251ba8ace447a7e..c79cd4eba1292fd4a86531df508bae46c5f7734c 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -34,6 +34,9 @@ #include "aidge/recipes/Recipes.hpp" #include "aidge/recipes/QuantRecipes.hpp" +#include "aidge/operator/MetaOperator.hpp" + + namespace Aidge { @@ -204,8 +207,9 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren return index; } -void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff) +void multiplyScalingFactor(std::shared_ptr<Aidge::Node> scalingNode, double coeff) { +/* AIDGE_ASSERT(node->type() == "Mul" && hasAttr(node, "isProducerScaling") || hasAttr(node, "isScaling"), "Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); @@ -217,6 +221,37 @@ void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff) std::shared_ptr<Tensor> resultTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); node->input(1).first->getOperator()->setOutput(0, resultTensor); +*/ + + auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (scalingNode->getOperator()); + + // Get the Mul node from the microGraph + + std::shared_ptr<Node> mulNode = nullptr; + auto microGraph = metaOperatorOp->getMicroGraph(); + for (auto node : microGraph->getNodes()) + if (node->type() == "Mul") + mulNode = node; + + // Retreive the previous scaling factor + + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1); + + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double prevScalingFactor = localTensor.get<double>(0); + + // Create the new scaling factor tensor + + std::shared_ptr<Tensor> newScalingFactorTensor = std::make_shared<Tensor>(prevScalingFactor * coeff); + newScalingFactorTensor->setBackend(scalingFactorTensor->backend()); + newScalingFactorTensor->setDataType(scalingFactorTensor->dataType()); + + // Set the tensor of the producer + + auto producer = mulNode->getParent(1); + producer->getOperator()->setOutput(0, newScalingFactorTensor); + // XXX old way : mulNode->input(1).first->getOperator()->setOutput(0, resultTensor); } // Utility function that insert a node below another one already connected @@ -303,28 +338,6 @@ double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) return localFlatTensor.get<double>(maxIndex); } - -// TODO : pass nodeVector by reference ... -static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::shared_ptr<Node>> nodeVector, std::string nodeType) -{ - std::vector<std::shared_ptr<Node>> remainingNodes; - for (std::shared_ptr<Node> node : nodeVector) - if (node->type() != nodeType) - remainingNodes.push_back(node); - - return remainingNodes; -} - -static std::vector<std::shared_ptr<Node>> removeProdScalingNodes(std::vector<std::shared_ptr<Node>> nodeVector) -{ - std::vector<std::shared_ptr<Node>> remainingNodes; - for (std::shared_ptr<Node> node : nodeVector) - if (!hasAttr(node, "isProducerScaling")) - remainingNodes.push_back(node); - - return remainingNodes; -} - static void fixScheduling(std::vector<std::shared_ptr<Node>>& nodeVector) { std::vector<std::shared_ptr<Node>> correctedVector; @@ -344,22 +357,42 @@ static void fixScheduling(std::vector<std::shared_ptr<Node>>& nodeVector) { static std::shared_ptr<Tensor> getWeightTensor(std::shared_ptr<Node> node) { - return std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Node> producer = node->getParent(1); + + if (producer->type() == "BaseQuantizer") + producer = producer->getParent(0); + + return std::static_pointer_cast<OperatorTensor>(producer->getOperator())->getOutput(0); } static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node) { - return std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2); + std::shared_ptr<Node> producer = node->getParent(2); + + if (producer->type() == "BaseQuantizer") + producer = producer->getParent(0); + + return std::static_pointer_cast<OperatorTensor>(producer->getOperator())->getOutput(0); } std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> graphView, bool newSchedule, bool verbose) { std::vector<std::shared_ptr<Node>> nodeVector = graphView->getOrderedNodes(); + // Remove duplicate nodes. Is it still needed ??? + fixScheduling(nodeVector); - nodeVector = removeMatchingNodes(nodeVector, "Producer"); - nodeVector = removeProdScalingNodes(nodeVector); + // Remove Producers and their Scalings + + std::vector<std::shared_ptr<Node>> remainingNodes; + for (std::shared_ptr<Node> node : nodeVector) + if ((node->type() != "Producer") && !hasAttr(node, "isProducerScaling")) + remainingNodes.push_back(node); + + nodeVector = remainingNodes; + + // Verbose if (verbose) { @@ -383,6 +416,7 @@ static DataType getDataType(std::shared_ptr<Node> node) return op->getOutput(0)->dataType(); } +/* static std::shared_ptr<Aidge::Node> createScalingNode(std::string name, std::vector<std::string> attributes, double value) { std::shared_ptr<Node> scalingNode = Mul(name); @@ -401,26 +435,41 @@ static std::shared_ptr<Aidge::Node> createScalingNode(std::string name, std::vec return scalingNode; } +*/ -bool insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double scalingFactor, std::shared_ptr<GraphView> graphView) +// XXX double check this ! +static bool nodeHasBias(std::shared_ptr<Node> node) { - if (hasAttr(producerNode, "isProducerRounding")) - { - // In this case we 'bump' the node to the one above him (an actual ProducerScaling) - // because the round node is not usable (only used when SSA is enabled) - producerNode = producerNode->getParent(0); + if (node->getParents().size() == 3) { + std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); + if (biasTensor) + return true; } + return false; +} - if (hasAttr(producerNode, "isProducerScaling")) - { - // We accumulate the previous scaling factors by multiplying the SF of the ProducerScaling node - // (adding new nodes each time would make the graph unusable) - multiplyScalingFactor(producerNode, scalingFactor); - return true; +// TODO: rework this ! +static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> node) +{ + std::shared_ptr<Node> currNode = node; + while(!hasAttr(currNode, "isScaling")) { + if (currNode->getParents().size() == 0) { + Log::warn(" Warning : No previous Scaling node were found ! "); + break; + } + currNode = currNode->getParents()[0]; } + return currNode; +} - AIDGE_ASSERT(producerNode->type() == "Producer", " Cannot apply a scaling factor on node of type: {} which is not a Producer", producerNode->type()); - +void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double ratio, std::shared_ptr<GraphView> graphView) +{ + Log::warn(" DUMMY ! "); +} + +void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, std::shared_ptr<GraphView> graphView) +{ +/* std::string scalingNodeName = makeUniqueName(producerNode->name() + "_ProducerScaling", graphView); std::shared_ptr<Node> scalingNode = createScalingNode(scalingNodeName, {"isProducerScaling"}, scalingFactor); @@ -429,8 +478,35 @@ bool insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double scali insertChildren(producerNode, scalingNode, graphView); graphView->add(scalingNode->getParent(1)); // add the scaling factor producer +*/ - return true; + std::string scalingNodeName = makeUniqueName(producerNode->name() + "_ProducerScaling", graphView); + std::shared_ptr<Node> scalingNode = BaseQuantizer(1.0, scalingNodeName);; + addAttr(scalingNode, "isProducerScaling"); + // XXX XXX XXX addAttr(scalingNode, "isScaling"); + + scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + scalingNode->getOperator()->setBackend(determineBackend(producerNode)); // XXX use the producer parent instead ??? + + insertChildren(producerNode, scalingNode, graphView); + + // XXX XXX XXX is it needed ? + // graphView->add(scalingNode->getParent(1)); +} + +void insertProducerScalingNodes(std::shared_ptr<GraphView> graphView) +{ + std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); + + for (std::shared_ptr<Node> node : nodeSet) + { + if (isAffine(node)) + { + insertScalingBelowProducer(node->getParent(1), graphView); + if (nodeHasBias(node)) + insertScalingBelowProducer(node->getParent(2), graphView); + } + } } // XXX HERE : Branches containing only Seamless nodes should be considered as residual too !!! @@ -458,47 +534,45 @@ void insertResidualScalingNodes(std::shared_ptr<GraphView> graphView) Log::info(" ### inserting multiplicative node ..."); std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); - std::shared_ptr<Node> residualNode = createScalingNode(residualNodeName, {"isScaling", "isResidual"}, 1.0); + + // XXX XXX XXX + // std::shared_ptr<Node> residualNode = createScalingNode(residualNodeName, {"isScaling", "isResidual"}, 1.0); + std::shared_ptr<Node> residualNode = BaseQuantizer(1.0, residualNodeName); + addAttr(residualNode, "isScaling"); + addAttr(residualNode, "isResidual"); residualNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) residualNode->getOperator()->setBackend(determineBackend(parentNode)); graphView->insertParent(node, residualNode, i, 0, 0); - graphView->add(residualNode->getParent(1)); // add the scaling factor producer + // XXX XXX XXX is it needed ? no more ! + // graphView->add(residualNode->getParent(1)); } } } } } -static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> node) -{ - std::shared_ptr<Node> currNode = node; - while(!hasAttr(currNode, "isScaling")) - { - if (currNode->getParents().size() == 0) - { - Log::warn(" Warning : No previous Scaling node were found ! "); - break; - } - currNode = currNode->getParents()[0]; - } - return currNode; -} - void insertScalingNodes(std::shared_ptr<GraphView> graphView) { + insertProducerScalingNodes(graphView); insertResidualScalingNodes(graphView); std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); for (std::shared_ptr<Node> parentNode : nodeSet) { + // Insert a Scaling node after each node that have to be quantized + if (isAffine(parentNode) || isMerging(parentNode) || isNotQuantized(parentNode)) { std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); - std::shared_ptr<Node> scalingNode = createScalingNode(scalingNodeName, {"isScaling"}, 1.0); + + // XXX XXX XXX + // std::shared_ptr<Node> scalingNode = createScalingNode(scalingNodeName, {"isScaling"}, 1.0); + std::shared_ptr<Node> scalingNode = BaseQuantizer(1.0, scalingNodeName); + addAttr(scalingNode, "isScaling"); scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) scalingNode->getOperator()->setBackend(determineBackend(parentNode)); @@ -506,12 +580,12 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) if (parentNode->getChildren().size() > 0) { insertChildren(parentNode, scalingNode, graphView); } else { - // Log::info(" last node reached ! "); parentNode->addChild(scalingNode, 0, 0); graphView->add(scalingNode); } - - graphView->add(scalingNode->getParent(1)); // add the scaling factor producer + + // XXX XXX XXX is it needed ? no more + // graphView->add(scalingNode->getParent(1)); // In the case the node is a non-linear operator we want to add an extra // scaling node before it to rescale it's input ... @@ -519,29 +593,24 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) if (isNotQuantized(parentNode)) { std::string prevScalingNodeName = makeUniqueName(parentNode->name() + "_PrevScaling", graphView); - std::shared_ptr<Node> prevScalingNode = createScalingNode(prevScalingNodeName, {"isScaling"}, 1.0); + + // XXX XXX XXX + // std::shared_ptr<Node> prevScalingNode = createScalingNode(prevScalingNodeName, {"isScaling"}, 1.0); + std::shared_ptr<Node> prevScalingNode = BaseQuantizer(1.0, prevScalingNodeName); + addAttr(prevScalingNode, "isScaling"); prevScalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) prevScalingNode->getOperator()->setBackend(determineBackend(parentNode)); graphView->insertParent(parentNode, prevScalingNode, 0, 0, 0); - graphView->add(prevScalingNode->getParent(1)); // add the scaling factor producer + + // XXX XXX XXX is it needed ? no more ! + // graphView->add(prevScalingNode->getParent(1)); } } } } -// XXX double check this ! -static bool nodeHasBias(std::shared_ptr<Node> node) -{ - if (node->getParents().size() == 3) { - std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - if (biasTensor) - return true; - } - return false; -} - void normalizeParameters(std::shared_ptr<GraphView> graphView) { // CREATE THE ACCUMULATED RATIO MAP /////////////////////////////////////// @@ -574,11 +643,12 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) // Rescale the weight tensor std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - double scaling = getTensorAbsoluteMax(weightTensor); - double ratio = 1.0 / scaling; + + double ratio = 1.0 / getTensorAbsoluteMax(weightTensor); //rescaleTensor(weightTensor, ratio); - insertScalingBelowProducer(node->getParent(1), ratio, graphView); + // XXX XXX XXX insertScalingBelowProducer(node->getParent(1), ratio, graphView); + multiplyScalingFactor(node->getParent(1), ratio); // Accumulate the ratio @@ -595,7 +665,8 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) { std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); //rescaleTensor(biasTensor, accumulatedRatios[node] ); - insertScalingBelowProducer(node->getParent(2), accumulatedRatios[node], graphView); + // XXX XXX XXX insertScalingBelowProducer(node->getParent(2), accumulatedRatios[node], graphView); + multiplyScalingFactor(node->getParent(2), accumulatedRatios[node]); } } diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index c70a7726c143ed4cd028099f849de25a16ab11d3..9dae53174f1cd754a5134ffea77fabe743d50940 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -30,10 +30,30 @@ #include "aidge/operator/OperatorTensor.hpp" #include "aidge/utils/Log.hpp" - namespace Aidge { +std::shared_ptr<Node> BaseQuantizer(double scalingFactor, const std::string& name) +{ + std::shared_ptr<Node> mulNode = Mul(name.empty() ? "" : name + "_MulQuant"); + + // Scaling Factor Producer + + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); + std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); + + std::shared_ptr<GraphView> graphView = Sequential({mulNode}); + graphView->add(scalingFactorProducer); + + // alternative : capture the Producer ... + // std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); + + std::shared_ptr<Node> metaopNode = MetaOperator("BaseQuantizer", graphView, {}, name); // XXX alternative prototype -> + + return metaopNode; +} + std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name) { // create the nodes