diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index acdcdc986d81e8bc4f5f0629132380ad423a74fc..f622ae5c7ec02cb7dd8f3c53287774b3f1a8e59a 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -398,17 +398,12 @@ static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> node) return currNode; } -void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double ratio, std::shared_ptr<GraphView> graphView) -{ - Log::warn(" INSERT SCALING : DUMMY ! "); -} - void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, std::shared_ptr<GraphView> graphView) { std::string scalingNodeName = makeUniqueName(producerNode->name() + "_ProducerScaling", graphView); std::shared_ptr<Node> scalingNode = BaseQuantizer(1.0, scalingNodeName);; addAttr(scalingNode, "isProducerScaling"); - // XXX XXX XXX addAttr(scalingNode, "isScaling"); + // XXX XXX XXX addAttr(scalingNode, "isScaling") ? NO !!! scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) scalingNode->getOperator()->setBackend(determineBackend(producerNode)); // XXX use the producer parent instead ??? @@ -1095,7 +1090,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ rescaling /= inputIsUnsigned ? unsignedMax : signedMax; rescaling *= outputIsUnsigned ? unsignedMax : signedMax; - // XXX XXX XXX + // XXX OK //double currScalingFactor = getScalingFactor(quantizerNode); //updateScalingFactor(quantizerNode, currScalingFactor * rescaling); multiplyScalingFactor(node, rescaling); @@ -1111,7 +1106,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ } } -static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits) +static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, bool noQuant) { // XXX Use the signMap to increase the resolution when possible ... double signedMax = (1 << (nbBits - 1)) - 1; @@ -1122,7 +1117,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u { // The appropriate strategy is to check if the Quantizer is not // preceded by an Weighted node (that is not forking), and insert - // a coeff node (Compensation) if so ... + // a mul node (Compensation) before it if so ... if (node->type() == "Quantizer") { @@ -1159,14 +1154,25 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u // Adapt the scaling factor value accordingly - double currScalingFactor = getScalingFactor(node); - updateScalingFactor(node, currScalingFactor / signedMax); + multiplyScalingFactor(node, 1.0 / signedMax); // XXX XXX XXX OK + + // Insert a Quantizer for the coeffProducer that will handle + // the single-shift approximation via it's scalingFactor ... + + insertScalingBelowProducer(coeffProducer, graphView); + + if (!noQuant) + { + // XXX XXX XXX double check this ... + std::shared_ptr<Node> coeffQuantizer = mulNode->getParent(1); + appendRoundClip(coeffQuantizer, -(signedMax + 1), signedMax); + } } } } } -void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant) +static void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView) { std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); @@ -1177,28 +1183,22 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool std::shared_ptr<Node> linearNode = node->getParent(0); double base = getScalingFactor(node); - double approx = std::pow(2, std::ceil(std::log2(base))); + double ratio = approx / base; - updateScalingFactor(node, approx); + // set the scaling factor value to the approximation ... - double ratio = base / approx; + multiplyScalingFactor(node, ratio); - insertScalingBelowProducer(linearNode->getParent(1), ratio, graphView); - if (!noQuant) - insertRoundBelowProducer(linearNode->getParent(1), graphView); + // compensate the ratio using the previous node weigths ... + multiplyScalingFactor(linearNode->getParent(1), 1.0 / ratio); if (nodeHasBias(linearNode)) - { - insertScalingBelowProducer(linearNode->getParent(2), ratio, graphView); - if (!noQuant) - insertRoundBelowProducer(linearNode->getParent(2), graphView); - } + multiplyScalingFactor(linearNode->getParent(2), 1.0 / ratio); } } } - static void printScalingFactors(std::shared_ptr<GraphView> graphView) { for (auto node : retrieveNodeVector(graphView)) @@ -1263,10 +1263,10 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (singleShift) { Log::notice( " Inserting the compensation nodes ..."); - insertCompensationNodes(graphView, nbBits); + insertCompensationNodes(graphView, nbBits, noQuant); Log::notice(" Performing the Single-Shift approximation ..."); - performSingleShiftApproximation(graphView, noQuant); + performSingleShiftApproximation(graphView); } if (verbose) diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index a5d49cc630269e97a5e5e9979a4c4d3c90b28f3f..8febfd5ba0512dd88a83a37496b111ebd9a7c7ae 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -43,6 +43,12 @@ std::shared_ptr<Node> BaseQuantizer(double scalingFactor, const std::string& nam std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); + // TODO : the above should be replaced by : +/* + std::shared_ptr<Node> scalingFactorProducer = Producer(scalingFactorTensor); + scalingFactorProducer->addChild(mulNode, 0, 1); +*/ + std::shared_ptr<GraphView> graphView = Sequential({mulNode}); graphView->add(scalingFactorProducer); @@ -84,55 +90,8 @@ void multiplyScalingFactor(std::shared_ptr<Aidge::Node> scalingNode, double coef auto producer = mulNode->getParent(1); producer->getOperator()->setOutput(0, newScalingFactorTensor); - // XXX old way : mulNode->input(1).first->getOperator()->setOutput(0, resultTensor); -} - -/* -void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double clipMax) -{ - // Create the new nodes - - std::string name = metaOpNode->name(); - - std::shared_ptr<Node> roundNode = Round(name + "_RoundQuant"); - std::shared_ptr<Node> clipNode = Clip(name + "_ClipQuant", clipMin, clipMax); - - // Retreive the previous microGraph - - auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (metaOpNode->getOperator()); - auto microGraph = metaOperatorOp->getMicroGraph(); - - // Get the Mul node from the microGraph - - std::shared_ptr<Node> mulNode = nullptr; - for (auto node : microGraph->getNodes()) - if (node->type() == "Mul") - mulNode = node; - - // Save the backend and datatype - - auto mulOp = std::static_pointer_cast<OperatorTensor> (mulNode->getOperator()); - auto backend = mulOp->getInput(0)->backend(); - auto dataType = mulOp->getInput(0)->dataType(); - - // Create the new microGraph - - std::shared_ptr<GraphView> prevGraphView = Sequential({mulNode}); - prevGraphView->add(mulNode->getParent(1)); // add the producer - - auto prevGraphViewClone = prevGraphView->clone(); - std::shared_ptr<GraphView> newGraphView = Sequential({prevGraphViewClone, roundNode, clipNode}); - - // Replace the old microGraph - - microGraph->replace(prevGraphView, newGraphView); - - // Set the backend and datatype - - microGraph->setBackend(backend); - microGraph->setDataType(dataType); + // XXX prev way : mulNode->input(1).first->getOperator()->setOutput(0, resultTensor); } -*/ void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double clipMax) { @@ -203,32 +162,41 @@ void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double cl } -std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name) +void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) +{ + // TODO : implement or remove the function ... + + Log::error(" updateScalingFactor() : not yet implemented ... "); +} + +double getScalingFactor(std::shared_ptr<Node> quantizerNode) { - // create the nodes + // Retreive the previous microGraph - std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_MulQuant" : ""); - std::shared_ptr<Node> roundNode = Round((!name.empty()) ? name + "_RoundQuant" : ""); - std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_ClipQuant" : "", clipMin, clipMax); + auto quantizerOp = std::static_pointer_cast<MetaOperator_Op> (quantizerNode->getOperator()); + auto microGraph = quantizerOp->getMicroGraph(); - // connect the scaling factor producer + // Get the Mul node from the microGraph - std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); - std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); - scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); + std::shared_ptr<Node> mulNode = nullptr; + for (auto node : microGraph->getNodes()) + if (node->type() == "Mul") + mulNode = node; - // create the metaop graph + auto mulOp = std::static_pointer_cast<OperatorTensor> (mulNode->getOperator()); - std::shared_ptr<GraphView> graphView = Sequential({mulNode, roundNode, clipNode}); - std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ??? + // Retreive the scaling factor - // return the metaop + auto scalingFactorTensor = mulOp->getInput(1); - std::shared_ptr<Node> metaopNode = MetaOperator("Quantizer", connectedGraphView, {}, name); // XXX alternative prototype + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double scalingFactor = localTensor.get<double>(0); - return metaopNode; + return scalingFactor; } +/* static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType) { std::shared_ptr<Node> mulNode = nullptr; @@ -238,66 +206,49 @@ static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, st return mulNode; } +*/ -void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) +void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max) { - if(metaOpNode->type() != "Scaling" && metaOpNode->type() != "Quantizer") - Log::warn("Cannot update the scaling factor on Node of type {}", metaOpNode->type()); - - std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); + auto quantizerOp = std::static_pointer_cast<MetaOperator_Op> (quantizerNode->getOperator()); + auto microGraph = quantizerOp->getMicroGraph(); - std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(metaOpNode->getOperator()); + std::shared_ptr<Node> clipNode = nullptr; + for (auto node : microGraph->getNodes()) + if (node->type() == "Clip") + clipNode = node; - std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); + // TODO : assert that we've got not a nullptr ... - if (!mulNode) - Log::warn("Invalid PTQ MetaOperator, no Mul node found inside ! "); + auto clipOp = std::static_pointer_cast<Clip_Op> (clipNode->getOperator()); - mulNode->input(1).first->getOperator()->setOutput(0, scalingFactorTensor); -} + // set the attributes -double getScalingFactor(std::shared_ptr<Node> MetaOpNode) -{ - if (MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") { - Log::warn("Cannot get the scaling factor on Node of type {}", MetaOpNode->type()); - return 0; - } + clipOp->max() = max; + clipOp->min() = min; - std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(MetaOpNode->getOperator()); + // Retreive the previous min/max tensors - std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); + auto minTensor = std::static_pointer_cast<OperatorTensor>(clipNode->getOperator())->getInput(1); + auto maxTensor = std::static_pointer_cast<OperatorTensor>(clipNode->getOperator())->getInput(2); - if (!mulNode) { - Log::warn("Invalid PTQ MetaOperator, no Mul found inside node of type {}", MetaOpNode->type()); - return 0; - } + // Create the new min/max tensors - auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1); - std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + std::shared_ptr<Tensor> newMinTensor = std::make_shared<Tensor>(min); + newMinTensor->setBackend(minTensor->backend()); + newMinTensor->setDataType(minTensor->dataType()); - return localTensor.get<double>(0); -} - - -void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max) -{ - if (quantizerNode->type() != "Quantizer") { - Log::warn("Cannot set the clipping range on Node of type {}", quantizerNode->type()); - return; - } + std::shared_ptr<Tensor> newMaxTensor = std::make_shared<Tensor>(max); + newMaxTensor->setBackend(maxTensor->backend()); + newMaxTensor->setDataType(maxTensor->dataType()); - std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op> (quantizerNode->getOperator()); + // Set the tensors of the producer - std::shared_ptr<Node> clipNode = getSubNode(metaOp->getMicroGraph(), "Clip"); + auto minProducer = clipNode->getParent(1); + minProducer->getOperator()->setOutput(0, newMinTensor); - if (!clipNode) { - Log::warn("Invalid PTQ MetaOperator, no Clip found inside node of type {}", quantizerNode->type()); - return; - } - - std::shared_ptr<Clip_Op> clipOp = std::static_pointer_cast<Clip_Op>(clipNode->getOperator()); - clipOp->max() = max; - clipOp->min() = min; + auto maxProducer = clipNode->getParent(2); + maxProducer->getOperator()->setOutput(0, newMaxTensor); } + } \ No newline at end of file