diff --git a/include/aidge/operator/PTQMetaOps.hpp b/include/aidge/operator/PTQMetaOps.hpp index 33240fa2b4db0663f304cb5641bdbb8982ceaf3c..97e601b44cb1f70771a248b68a4f5c5017d3fd3e 100644 --- a/include/aidge/operator/PTQMetaOps.hpp +++ b/include/aidge/operator/PTQMetaOps.hpp @@ -20,6 +20,10 @@ namespace Aidge { std::shared_ptr<Aidge::Node> BaseQuantizer(double scalingFactor, const std::string& name); + void multiplyScalingFactor(std::shared_ptr<Aidge::Node> scalingNode, double coeff); + void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double clipMax); + + /// @brief Quantizer acts as a meta-operator to handle scaling operations in the PTQ, replacing the Scaling Operator. /// This operator is composed of a sequence of [Mul] -> [Clip] -> [Round] operations. diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index edc624bffa26f7a15ace8332ecbe98eb920aa121..acdcdc986d81e8bc4f5f0629132380ad423a74fc 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -207,39 +207,6 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren return index; } -void multiplyScalingFactor(std::shared_ptr<Aidge::Node> scalingNode, double coeff) -{ - auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (scalingNode->getOperator()); - - // Get the Mul node from the microGraph - - std::shared_ptr<Node> mulNode = nullptr; - auto microGraph = metaOperatorOp->getMicroGraph(); - for (auto node : microGraph->getNodes()) - if (node->type() == "Mul") - mulNode = node; - - // Retreive the previous scaling factor - - auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1); - - std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); - double prevScalingFactor = localTensor.get<double>(0); - - // Create the new scaling factor tensor - - std::shared_ptr<Tensor> newScalingFactorTensor = std::make_shared<Tensor>(prevScalingFactor * coeff); - newScalingFactorTensor->setBackend(scalingFactorTensor->backend()); - newScalingFactorTensor->setDataType(scalingFactorTensor->dataType()); - - // Set the tensor of the producer - - auto producer = mulNode->getParent(1); - producer->getOperator()->setOutput(0, newScalingFactorTensor); - // XXX old way : mulNode->input(1).first->getOperator()->setOutput(0, resultTensor); -} - // Utility function that insert a node below another one already connected static void insertChildren(std::shared_ptr<Node> parent, std::shared_ptr<Node> newNode, std::shared_ptr<GraphView> graphView) { @@ -273,6 +240,7 @@ static void insertChildren(std::shared_ptr<Node> parent, std::shared_ptr<Node> n bool insertRoundBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphView> graphView) { +/* if (hasAttr(node, "isProducerScaling") && node->type() != "Round") { std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); @@ -285,6 +253,9 @@ bool insertRoundBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphV return true; } return false; +*/ + Log::warn(" ROUND : DUMMY ! "); + return true; } double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) @@ -429,7 +400,7 @@ static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> node) void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double ratio, std::shared_ptr<GraphView> graphView) { - Log::warn(" DUMMY ! "); + Log::warn(" INSERT SCALING : DUMMY ! "); } void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, std::shared_ptr<GraphView> graphView) @@ -882,7 +853,7 @@ std::unordered_map<std::shared_ptr<Node>, std::pair<bool, bool>> computeSignMap( std::pair<bool, bool> unsignedPair(true, true); for (std::shared_ptr<Node> node : graphView->getNodes()) - if (node->type() != "Producer") + if (node->type() != "Producer") // XXX XXX XXX we should use nodeVector instead ... signMap.insert(std::make_pair(node, unsignedPair)); // ITERATE OVER THE GRAPH @@ -1015,6 +986,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ signMap = computeSignMap(graphView, verbose); else { + // XXX XXX XXX we should use the (retreive) node vector std::pair<bool, bool> signedPair(false, false); for (std::shared_ptr<Node> node : graphView->getNodes()) if (node->type() != "Producer") @@ -1030,11 +1002,11 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ if (isAffine(node)) { // Rescale the weight tensor - std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - insertScalingBelowProducer(node->getParent(1),signedMax,graphView); + multiplyScalingFactor(node->getParent(1), signedMax); + // UUU Quantize the Producer !!! if (!noQuant) - insertRoundBelowProducer(node->getParent(1),graphView); + appendRoundClip(node->getParent(1), -(signedMax + 1), signedMax); // Rescale the bias tensor if (nodeHasBias(node)) @@ -1042,11 +1014,12 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ bool inputIsUnsigned = signMap[node].first; double rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax; - std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - insertScalingBelowProducer(node->getParent(2),rescaling,graphView); + multiplyScalingFactor(node->getParent(2), rescaling); + // XXX TODO : enhance this ! + int biasMax = (1 << (12 + nbBits)); if (!noQuant) - insertRoundBelowProducer(node->getParent(2),graphView); + appendRoundClip(node->getParent(2), -(biasMax + 1), biasMax); } // Compensate the rescaling using the next Scaling node @@ -1061,7 +1034,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = getUniqueChild(node); // TODO : assert if scalingNode is a Scaling ... - multiplyScalingFactor(scalingNode,rescaling) ; + multiplyScalingFactor(scalingNode, rescaling); } if (isMerging(node)) @@ -1080,7 +1053,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ if (node->type() == "MatMul") rescaling /= inputIsUnsigned ? unsignedMax : signedMax; - multiplyScalingFactor(scalingNode, rescaling) ; + multiplyScalingFactor(scalingNode, rescaling); } if (isNotQuantized(node)) @@ -1096,7 +1069,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ // Handle the Scaling Nodes ... - if (hasAttr(node, "isScaling")) + if (hasAttr(node, "isScaling")) { // Don't touch the scalings that precede non-linearities ... @@ -1107,20 +1080,13 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ if (!noQuant && !precedesNonLinearNode) { - // Replace the Scaling Node by a Quantizer + // Old : Replace the Scaling Node by a Quantizer - auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); - std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); - double oldScalingFactor = localTensor.get<double>(0); //!\\ - - std::shared_ptr<Node> quantizerNode = Quantizer(oldScalingFactor, -(signedMax + 1), signedMax, node->name()); - quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) - quantizerNode->getOperator()->setBackend(determineBackend(node)); - graphView->replace({node, node->getParent(1)}, {quantizerNode}); + appendRoundClip(node, -(signedMax + 1), signedMax); if (optimizeSigns) { +/* double rescaling = 1.0; bool inputIsUnsigned = signMap[node].first; @@ -1129,11 +1095,16 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ rescaling /= inputIsUnsigned ? unsignedMax : signedMax; rescaling *= outputIsUnsigned ? unsignedMax : signedMax; - double currScalingFactor = getScalingFactor(quantizerNode); - updateScalingFactor(quantizerNode, currScalingFactor * rescaling); + // XXX XXX XXX + //double currScalingFactor = getScalingFactor(quantizerNode); + //updateScalingFactor(quantizerNode, currScalingFactor * rescaling); + multiplyScalingFactor(node, rescaling); + // XXX XXX XXX HERE : Fix this !!! + if(outputIsUnsigned) - setClipRange(quantizerNode, 0, unsignedMax); + setClipRange(quantizerNode, 0, unsignedMax); +*/ } } } @@ -1334,7 +1305,8 @@ void clearBiases(std::shared_ptr<GraphView> graphView) if (node->type() == "FC" || node->type() == "Conv2D") { std::shared_ptr<Tensor> biasTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2); //rescaleTensor(biasTensor, 0); - insertScalingBelowProducer(node->getParent(2), 0, graphView); + //insertScalingBelowProducer(node->getParent(2), 0, graphView); + multiplyScalingFactor(node->getParent(2), 0); } } } diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index 9dae53174f1cd754a5134ffea77fabe743d50940..a5d49cc630269e97a5e5e9979a4c4d3c90b28f3f 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -35,7 +35,7 @@ namespace Aidge std::shared_ptr<Node> BaseQuantizer(double scalingFactor, const std::string& name) { - std::shared_ptr<Node> mulNode = Mul(name.empty() ? "" : name + "_MulQuant"); + std::shared_ptr<Node> mulNode = Mul(name + "_MulQuant"); // Scaling Factor Producer @@ -54,6 +54,155 @@ std::shared_ptr<Node> BaseQuantizer(double scalingFactor, const std::string& nam return metaopNode; } +void multiplyScalingFactor(std::shared_ptr<Aidge::Node> scalingNode, double coeff) +{ + auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (scalingNode->getOperator()); + + // Get the Mul node from the microGraph + + std::shared_ptr<Node> mulNode = nullptr; + auto microGraph = metaOperatorOp->getMicroGraph(); + for (auto node : microGraph->getNodes()) + if (node->type() == "Mul") + mulNode = node; + + // Retreive the previous scaling factor + + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1); + + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double prevScalingFactor = localTensor.get<double>(0); + + // Create the new scaling factor tensor + + std::shared_ptr<Tensor> newScalingFactorTensor = std::make_shared<Tensor>(prevScalingFactor * coeff); + newScalingFactorTensor->setBackend(scalingFactorTensor->backend()); + newScalingFactorTensor->setDataType(scalingFactorTensor->dataType()); + + // Set the tensor of the producer + + auto producer = mulNode->getParent(1); + producer->getOperator()->setOutput(0, newScalingFactorTensor); + // XXX old way : mulNode->input(1).first->getOperator()->setOutput(0, resultTensor); +} + +/* +void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double clipMax) +{ + // Create the new nodes + + std::string name = metaOpNode->name(); + + std::shared_ptr<Node> roundNode = Round(name + "_RoundQuant"); + std::shared_ptr<Node> clipNode = Clip(name + "_ClipQuant", clipMin, clipMax); + + // Retreive the previous microGraph + + auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (metaOpNode->getOperator()); + auto microGraph = metaOperatorOp->getMicroGraph(); + + // Get the Mul node from the microGraph + + std::shared_ptr<Node> mulNode = nullptr; + for (auto node : microGraph->getNodes()) + if (node->type() == "Mul") + mulNode = node; + + // Save the backend and datatype + + auto mulOp = std::static_pointer_cast<OperatorTensor> (mulNode->getOperator()); + auto backend = mulOp->getInput(0)->backend(); + auto dataType = mulOp->getInput(0)->dataType(); + + // Create the new microGraph + + std::shared_ptr<GraphView> prevGraphView = Sequential({mulNode}); + prevGraphView->add(mulNode->getParent(1)); // add the producer + + auto prevGraphViewClone = prevGraphView->clone(); + std::shared_ptr<GraphView> newGraphView = Sequential({prevGraphViewClone, roundNode, clipNode}); + + // Replace the old microGraph + + microGraph->replace(prevGraphView, newGraphView); + + // Set the backend and datatype + + microGraph->setBackend(backend); + microGraph->setDataType(dataType); +} +*/ + +void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double clipMax) +{ + // Retreive the previous microGraph + + auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (metaOpNode->getOperator()); + auto microGraph = metaOperatorOp->getMicroGraph(); + + // Get the Mul node from the microGraph + + std::shared_ptr<Node> mulNode = nullptr; + for (auto node : microGraph->getNodes()) + if (node->type() == "Mul") + mulNode = node; + + auto mulOp = std::static_pointer_cast<OperatorTensor> (mulNode->getOperator()); + + // save the backend and datatype + + auto backend = mulOp->getInput(0)->backend(); + auto dataType = mulOp->getInput(0)->dataType(); + + // create the new microGraph nodes + + auto newMulNode = Mul(); + auto roundNode = Round(); + auto clipNode = Clip(""); //, clipMin, clipMax); + auto newCoeffNode = mulNode->getParent(1)->clone(); // UUU Producer(coeffTensor); + + // create the new micrograph + + std::shared_ptr<GraphView> newMicroGraph = Sequential({newMulNode, roundNode, clipNode}); + newCoeffNode->addChild(newMulNode, 0, 1); // 1 was not specified !!! + newMicroGraph->add(newCoeffNode); + + // manually connect the IOs !!! + + auto newMulOp = std::static_pointer_cast<OperatorTensor> (newMulNode->getOperator()); + newMulOp->setInput(0, mulOp->getInput(0)); // MANDATORY (because we need an input tensor) + auto clipOp = std::static_pointer_cast<Clip_Op> (clipNode->getOperator()); + clipOp->setOutput(0, mulOp->getOutput(0)); // MANDATORY ? YES !!! + + // Connect the clip min and max tensors + + auto minTensor = std::make_shared<Tensor>(clipMin); + auto maxTensor = std::make_shared<Tensor>(clipMax); + auto minNode = Producer(minTensor); + auto maxNode = Producer(maxTensor); + minNode->addChild(clipNode, 0, 1); + maxNode->addChild(clipNode, 0, 2); + newMicroGraph->add(minNode); + newMicroGraph->add(maxNode); + + // set backend + + newMicroGraph->setBackend(backend); + newMicroGraph->setDataType(dataType); + + // reset the scheduling + + SequentialScheduler scheduler(newMicroGraph); + scheduler.resetScheduling(); + //scheduler.generateScheduling(); + + // set the micrograph + + *microGraph = *newMicroGraph; +} + + std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name) { // create the nodes