diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index fe717bbdbae7d5bb3bd74fe65124bfce8f59da2c..637407c0433b46908600d33364eb379aaace8307 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -50,6 +50,7 @@ bool isMerging(std::shared_ptr<Node> node) { return (mergingNodeTypes.find(node->type()) != mergingNodeTypes.end()); } + static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> parentNode) { int index = 0; @@ -58,27 +59,27 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren return index; } - -void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) +void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff) { AIDGE_ASSERT(node->type() == "Mul" && (node->attributes()->hasAttr("quantization.ptq.isProducerScaling") || node->attributes()->hasAttr("quantization.ptq.isScaling")), "Cannot update the scaling factor on Node of type {} with no scaling tag",node->type()); auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); double previousScalingFactor = localTensor.get<double>(0); - std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); - node->input(1).first->getOperator()->setOutput(0, finalTensor); + + std::shared_ptr<Tensor> resultTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); + node->input(1).first->getOperator()->setOutput(0, resultTensor); } -/* Util function to insert a node below another one already connected */ -void insertNodeBetween(std::shared_ptr<Node> parent, - std::shared_ptr<Node> newNode, - std::shared_ptr<GraphView> graphView) + +// Utility function that insert a node below another one already connected +static void insertChildren(std::shared_ptr<Node> parent, std::shared_ptr<Node> newNode, std::shared_ptr<GraphView> graphView) { // Checking the parents always have at least 1 children - AIDGE_ASSERT(parent->getChildren().size() > 0, "The parent node must have at least one child to insert a new node."); + AIDGE_ASSERT(parent->getChildren().size() > 0, " Parent node must have at least one child to insert a new node ! "); - // Retrieve children connection indexes + // Retreive children connection indexes std::vector<std::shared_ptr<Node>> nextNodes = parent->getChildren(0); std::vector<int> inputIndices(nextNodes.size()); for (std::size_t i = 0; i < nextNodes.size(); i++) { @@ -99,54 +100,20 @@ void insertNodeBetween(std::shared_ptr<Node> parent, graphView->add(newNode); } -bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView) +bool insertRoundBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphView> graphView) { - if(node->attributes()->hasAttr("quantization.ptq.isProducerScaling") && node->type() != "Round") + if (node->attributes()->hasAttr("quantization.ptq.isProducerScaling") && node->type() != "Round") { std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) roundNode->getOperator()->setBackend(node->getOperator()->backend()); - insertNodeBetween(node,roundNode,graphView); - roundNode->attributes()->addAttr("quantization.ptq.isProducerRounding",0.0); + insertChildren(node, roundNode, graphView); + roundNode->attributes()->addAttr("quantization.ptq.isProducerRounding", 0.0); return true; } return false; } -bool insertScalingBelowProducer(std::shared_ptr<Node> node,double scalingFactor, std::shared_ptr<GraphView> graphView) -{ - if(node->attributes()->hasAttr("quantization.ptq.isProducerRounding")) - { - //In this case we 'bump' the node to the one above him (an actual ProducerScaling) - // because the round node is not usable (only used when SSA is enabled) - node = node->getParent(0); - } - if(node->attributes()->hasAttr("quantization.ptq.isProducerScaling")) - { - // We accumulate the multiples scaling factors by multiplying the SF of the ProducerScaling node - // (adding new nodes each time would make the graph unusable) - multiplyScalingFactor(node,scalingFactor); - return true; - } - AIDGE_ASSERT(node->type() == "Producer","Cannot apply a scaling factor on node of type: {} which is not a producer", node->type()); - std::string scalingNodeName = makeUniqueName(node->name() + "_ProducerScaling", graphView); - - std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName); - scalingNode->attributes()->addAttr("quantization.ptq.isProducerScaling",0.0); - - std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); - std::shared_ptr<Node> scalingFactorProducer = addProducer(scalingNode, 1, {1}, "Factor"); - scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); - graphView->add(scalingFactorProducer); - - scalingNode->getOperator()->setDataType(DataType::Float64); - std::string producerBackend = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getOutput(0)->backend(); - scalingNode->getOperator()->setBackend(producerBackend); - - insertNodeBetween(node, scalingNode, graphView); - - return true; -} bool checkArchitecture(std::shared_ptr<GraphView> graphView) { @@ -212,6 +179,7 @@ static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::s return remainingNodes; } + static std::vector<std::shared_ptr<Node>> removeProdScalingNodes(std::vector<std::shared_ptr<Node>> nodeVector) { std::vector<std::shared_ptr<Node>> remainingNodes; @@ -291,8 +259,7 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView) std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) - if (node->type() == "BatchNorm") - { + if (node->type() == "BatchNorm") { containsBatchNorm = true; break; } @@ -310,8 +277,58 @@ static DataType getDataType(std::shared_ptr<Node> node) return op->getOutput(0)->dataType(); } +static std::shared_ptr<Aidge::Node> createScalingNode(std::string name, std::vector<std::string> attributes, double value, std::shared_ptr<GraphView> graphView) +{ + std::shared_ptr<Node> scalingNode = Mul(name); + + for (std::string attr : attributes) + scalingNode->attributes()->addAttr("quantization.ptq." + attr, 0.0); + + // Add the scaling factor as a producer of the node + + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {value}); + std::shared_ptr<Node> scalingFactorProducer = addProducer(scalingNode, 1, {1}, "ScalingFactor"); + + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); + + graphView->add(scalingFactorProducer); + + return scalingNode; +} + +bool insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double scalingFactor, std::shared_ptr<GraphView> graphView) +{ + if (producerNode->attributes()->hasAttr("quantization.ptq.isProducerRounding")) + { + // In this case we 'bump' the node to the one above him (an actual ProducerScaling) + // because the round node is not usable (only used when SSA is enabled) + producerNode = producerNode->getParent(0); + } + + if (producerNode->attributes()->hasAttr("quantization.ptq.isProducerScaling")) + { + // We accumulate the previous scaling factors by multiplying the SF of the ProducerScaling node + // (adding new nodes each time would make the graph unusable) + multiplyScalingFactor(producerNode, scalingFactor); + return true; + } + + AIDGE_ASSERT(producerNode->type() == "Producer", " Cannot apply a scaling factor on node of type: {} which is not a Producer", producerNode->type()); + + std::string scalingNodeName = makeUniqueName(producerNode->name() + "_ProducerScaling", graphView); + std::shared_ptr<Node> scalingNode = createScalingNode(scalingNodeName, {"isProducerScaling"}, scalingFactor, graphView); + + scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + auto producerOp = std::static_pointer_cast<OperatorTensor>(producerNode->getOperator()); + scalingNode->getOperator()->setBackend(producerOp->getOutput(0)->backend()); + + insertChildren(producerNode, scalingNode, graphView); + + return true; +} + // XXX HERE : Branches containing only Seamless nodes should be considered as residual too !!! -void insertResidualNodes(std::shared_ptr<GraphView> graphView) +void insertResidualScalingNodes(std::shared_ptr<GraphView> graphView) { // TODO: double check this ... @@ -330,92 +347,91 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) if (parentIsForking) { // temporary verbose ... + Log::info(" ### found residual branch at index {}", i); Log::info(" ### inserting multiplicative node ..."); std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); - std::shared_ptr<Node> residualNode = Mul(residualNodeName); - residualNode->attributes()->addAttr("quantization.ptq.isScaling", 0.0); - residualNode->attributes()->addAttr("quantization.ptq.isResidual", 0.0); - - //Adding the SF as a producer of the node - std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {1.0}); - std::shared_ptr<Node> scalingFactorProducer = addProducer(residualNode, 1, {1}, "ScalingFactor"); - scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); + std::shared_ptr<Node> residualNode = createScalingNode(residualNodeName, {"isScaling", "isResidual"}, 1.0, graphView); residualNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) residualNode->getOperator()->setBackend(parentNode->getOperator()->backend()); graphView->insertParent(node, residualNode, i, 0, 0); - graphView->add(scalingFactorProducer); } } } } } +static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> node) +{ + std::shared_ptr<Node> currNode = node; + while(!currNode->attributes()->hasAttr("quantization.ptq.isScaling")) + { + if (currNode->getParents().size() == 0) + { + Log::warn(" Warning : No previous Scaling node were found ! "); + break; + } + currNode = currNode->getParents()[0]; + } + return currNode; +} void insertScalingNodes(std::shared_ptr<GraphView> graphView) { - insertResidualNodes(graphView); + insertResidualScalingNodes(graphView); std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); for (std::shared_ptr<Node> parentNode : nodeSet) { - if (isAffine(parentNode) || isMerging(parentNode)) + if (isAffine(parentNode) || isMerging(parentNode) || (parentNode->type() == "Sigmoid")) { std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); - //std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName); - - //Adding Mul operator with tag "quantization.ptq.isScaling" - std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName); - scalingNode->attributes()->addAttr("quantization.ptq.isScaling",0.0); - - //Adding the SF as a producer of the node - std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {1.0}); - std::shared_ptr<Node> scalingFactorProducer = addProducer(scalingNode, 1, {1}, "ScalingFactor"); - scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); + std::shared_ptr<Node> scalingNode = createScalingNode(scalingNodeName, {"isScaling"}, 1.0, graphView); scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) scalingNode->getOperator()->setBackend(parentNode->getOperator()->backend()); - if (parentNode->getChildren().size() > 0) - { - insertNodeBetween(parentNode,scalingNode,graphView); - graphView->add(scalingFactorProducer); - } - else - { + if (parentNode->getChildren().size() > 0) { + insertChildren(parentNode, scalingNode, graphView); + } else { // Log::info(" last node reached ! "); parentNode->addChild(scalingNode, 0, 0); - graphView->add(scalingFactorProducer); graphView->add(scalingNode); } - } - } -} -static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> mergingNode) -{ - std::shared_ptr<Node> currNode = mergingNode; - while(!currNode->attributes()->hasAttr("quantization.ptq.isScaling")) - { - if (currNode->getParents().size() == 0) - { - Log::warn(" Warning : No previous Scaling node were found ! "); - break; + // Non linear function handling starts here ! + + if (parentNode->type() == "Sigmoid") + { + // If the parent is a forking Scaling node, we need an extra Scaling + // node to completely isolate the non linearity ... + + std::shared_ptr<Node> prevScalingNode = getPreviousScalingNode(parentNode); + bool prevScalingNodeIsForking = (prevScalingNode->getChildren().size() > 1); + + if (prevScalingNodeIsForking) + { + std::string prevScalingNodeName = makeUniqueName(parentNode->name() + "_PrevScaling", graphView); + prevScalingNode = createScalingNode(prevScalingNodeName, {"isScaling"}, 1.0, graphView); + + prevScalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + prevScalingNode->getOperator()->setBackend(parentNode->getOperator()->backend()); + + graphView->insertParent(parentNode, prevScalingNode, 0, 0, 0); + } + } } - currNode = currNode->getParents()[0]; } - return currNode; } // XXX double check this ! static bool nodeHasBias(std::shared_ptr<Node> node) { - if (node->getParents().size() == 3) - { + if (node->getParents().size() == 3) { std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); if (biasTensor) return true; @@ -453,19 +469,19 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) if (isAffine(node)) { // Rescale the weight tensor + std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); double scaling = getTensorAbsoluteMax(weightTensor); double ratio = 1.0 / scaling; + //rescaleTensor(weightTensor, ratio); insertScalingBelowProducer(node->getParent(1), ratio, graphView); // Accumulate the ratio - if (node == firstNode) - { + + if (node == firstNode) { accumulatedRatios[node] = ratio; - } - else - { + } else { std::shared_ptr<Node> prevNode = node->getParent(0); accumulatedRatios[node] = accumulatedRatios[prevNode] * ratio; } @@ -480,11 +496,30 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) } } + if (node->type() == "Sigmoid") + { + // Gather the previous scaling factor + + std::shared_ptr<Node> prevScalingNode = getPreviousScalingNode(node); + double prevRatio = accumulatedRatios[prevScalingNode]; + + // Cancel the accumulated ratio + + multiplyScalingFactor(prevScalingNode, 1 / prevRatio); + + // Revert the canceling by using the next scaling node + + accumulatedRatios[node] = prevRatio; + std::shared_ptr<Node> nextScalingNode = node->getChildren(0)[0]; + multiplyScalingFactor(nextScalingNode, prevRatio); + } + if (isMerging(node)) { std::vector<std::shared_ptr<Node>> mergingNodes = node->getParents(); // Compute the max ratio ... + double maxRatio = 0; for (std::shared_ptr<Node> mergingNode : mergingNodes) { @@ -503,7 +538,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); - multiplyScalingFactor(scalingNode,1/rescaling); + multiplyScalingFactor(scalingNode, 1 / rescaling); accumulatedRatios[mergingNode] /= rescaling; // optional ... } @@ -963,39 +998,47 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u for (std::shared_ptr<Node> node : nodeVector) { - // A merging node is always followed by a Quantizer node at this point + // The appropriate strategy is to check if the Quantizer is not + // preceded by an Weighted node (that is not forking), and insert + // a coeff node (Compensation) if so ... + + if (node->type() == "Quantizer") + { + // Note : this works because a Quantizer has only one Parent ... - if (node->type() == "Quantizer" && (node->attributes()->hasAttr("quantization.ptq.isResidual") || !isAffine(node->getParent(0)))) - { + std::shared_ptr<Node> parentNode = node->getParent(0); + bool parentHasWeight = isAffine(parentNode); + bool parentIsForking = (parentNode->getChildren().size() > 1); - // check if the Quantizer is a residual one, and insert a compensation node if so ... - // create and insert the multplicative node before the Quantizer + if (parentIsForking || !parentHasWeight) // insert a Compensation Node ... + { + // Create and insert the multplicative node before the Quantizer - std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); - std::shared_ptr<Node> mulNode = Mul(mulNodeName); - - mulNode->attributes()->addAttr("quantization.ptq.isCompensation",0.0); - mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) - mulNode->getOperator()->setBackend(node->getOperator()->backend()); + std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); + std::shared_ptr<Node> mulNode = Mul(mulNodeName); + + mulNode->attributes()->addAttr("quantization.ptq.isCompensation", 0.0); + mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + mulNode->getOperator()->setBackend(node->getOperator()->backend()); - graphView->insertParent(node, mulNode, 0, 0, 0); + graphView->insertParent(node, mulNode, 0, 0, 0); - // Add the coeff producer to the multiplier node + // Add the coeff producer to the multiplier node - std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); - std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax}); - coeffProducer->getOperator()->setOutput(0, coeffTensor); + std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); + std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax}); + coeffProducer->getOperator()->setOutput(0, coeffTensor); - coeffProducer->getOperator()->setDataType(DataType::Float64); - coeffProducer->getOperator()->setBackend(node->getOperator()->backend()); + coeffProducer->getOperator()->setDataType(DataType::Float64); + coeffProducer->getOperator()->setBackend(node->getOperator()->backend()); - graphView->add(coeffProducer); // needed ? + graphView->add(coeffProducer); // needed ? - // Adapt the scaling factor value accordingly + // Adapt the scaling factor value accordingly - double currScalingFactor = getScalingFactor(node); - updateScalingFactor(node, currScalingFactor / signedMax); - + double currScalingFactor = getScalingFactor(node); + updateScalingFactor(node, currScalingFactor / signedMax); + } } } } @@ -1006,33 +1049,33 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool for (std::shared_ptr<Node> node : nodeVector) { - if (isAffine(node) || (node->type() == "Mul" && node->attributes()->hasAttr("quantization.ptq.isCompensation"))) + if (node->type() == "Quantizer") { - std::shared_ptr<Node> scalingNode = (*node->getChildren().begin()); // TODO : use index = 0 + std::shared_ptr<Node> linearNode = node->getParent(0); - double base = getScalingFactor(scalingNode); + double base = getScalingFactor(node); double approx = std::pow(2, std::ceil(std::log2(base))); - updateScalingFactor(scalingNode,approx); + updateScalingFactor(node, approx); double ratio = base / approx; - insertScalingBelowProducer(node->getParent(1),ratio,graphView); + insertScalingBelowProducer(linearNode->getParent(1), ratio, graphView); if (!noQuant) - insertRoundBelowProducer(node->getParent(1),graphView); + insertRoundBelowProducer(linearNode->getParent(1), graphView); - if (nodeHasBias(node)) + if (nodeHasBias(linearNode)) { - insertScalingBelowProducer(node->getParent(2),ratio,graphView); - + insertScalingBelowProducer(linearNode->getParent(2), ratio, graphView); if (!noQuant) - insertRoundBelowProducer(node->getParent(2),graphView); + insertRoundBelowProducer(linearNode->getParent(2), graphView); } } } } + static void printScalingFactors(std::shared_ptr<GraphView> graphView) { for (auto node : retrieveNodeVector(graphView)) @@ -1060,48 +1103,48 @@ static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std: void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) { - Log::info(" === QUANT PTQ 0.2.21 === "); + Log::notice(" === QUANT PTQ 0.2.21 === "); graphView->setBackend("cpu"); - DataType initialDataType = (inputDataSet[0])->dataType(); - setupDataType(graphView, inputDataSet, DataType::Float64); - if (!checkArchitecture(graphView)) return; - Log::info(" Preparing the network for the PTQ ... "); + DataType initialDataType = (inputDataSet[0])->dataType(); + setupDataType(graphView, inputDataSet, DataType::Float64); + + Log::notice(" Preparing the network for the PTQ ... "); prepareNetwork(graphView); - Log::info(" Inserting the scaling nodes ..."); + Log::notice(" Inserting the scaling nodes ..."); insertScalingNodes(graphView); crossLayerEqualization(graphView); - Log::info(" Normalizing the parameters ..."); + Log::notice(" Normalizing the parameters ..."); normalizeParameters(graphView); - Log::info(" Computing the value ranges ..."); + Log::notice(" Computing the value ranges ..."); std::unordered_map<std::shared_ptr<Node>, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda); //Log::info(" === RANGES (BEFORE ADJUST) ==="); - Log::info(" Optimizing the clipping values ..."); + Log::notice(" Optimizing the clipping values ..."); valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose); //Log:debug("=== RANGES (AFTER ADJUST) ==="); //printRanges(graphView, valueRanges); - Log::info(" Normalizing the activations ..."); + Log::notice(" Normalizing the activations ..."); normalizeActivations(graphView, valueRanges); - Log::info(" Quantizing the normalized network ..."); + Log::notice(" Quantizing the normalized network ..."); quantizeNormalizedNetwork(graphView, nbBits, noQuant, optimizeSigns, verbose); if (singleShift) { - Log::info( " Inserting the compensation nodes ..."); + Log::notice( " Inserting the compensation nodes ..."); insertCompensationNodes(graphView, nbBits); - Log::info(" Performing the Single-Shift approximation ..."); + Log::notice(" Performing the Single-Shift approximation ..."); performSingleShiftApproximation(graphView, noQuant); } @@ -1111,11 +1154,11 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (useCuda) graphView->setBackend("cuda"); - Log::info(" Reseting the scheduler ..."); + Log::notice(" Reseting the scheduler ..."); SequentialScheduler scheduler(graphView); scheduler.resetScheduling(); - Log::info(" Network is quantized !"); + Log::notice(" Network is quantized !"); } std::unordered_map<std::string, double> getWeightRanges(std::shared_ptr<GraphView> graphView)