Skip to content
Snippets Groups Projects
Commit dcb0db5e authored by Benjamin Halimi's avatar Benjamin Halimi
Browse files

ptq rework (single-shift related functions)

parent f8b03835
No related branches found
No related tags found
2 merge requests!54Update 0.3.1 -> 0.4.0,!50Enhancement : Quantizer only PTQ
Pipeline #69329 failed
......@@ -398,17 +398,12 @@ static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> node)
return currNode;
}
void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double ratio, std::shared_ptr<GraphView> graphView)
{
Log::warn(" INSERT SCALING : DUMMY ! ");
}
void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, std::shared_ptr<GraphView> graphView)
{
std::string scalingNodeName = makeUniqueName(producerNode->name() + "_ProducerScaling", graphView);
std::shared_ptr<Node> scalingNode = BaseQuantizer(1.0, scalingNodeName);;
addAttr(scalingNode, "isProducerScaling");
// XXX XXX XXX addAttr(scalingNode, "isScaling");
// XXX XXX XXX addAttr(scalingNode, "isScaling") ? NO !!!
scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
scalingNode->getOperator()->setBackend(determineBackend(producerNode)); // XXX use the producer parent instead ???
......@@ -1095,7 +1090,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
rescaling /= inputIsUnsigned ? unsignedMax : signedMax;
rescaling *= outputIsUnsigned ? unsignedMax : signedMax;
// XXX XXX XXX
// XXX OK
//double currScalingFactor = getScalingFactor(quantizerNode);
//updateScalingFactor(quantizerNode, currScalingFactor * rescaling);
multiplyScalingFactor(node, rescaling);
......@@ -1111,7 +1106,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
}
}
static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits)
static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, bool noQuant)
{
// XXX Use the signMap to increase the resolution when possible ...
double signedMax = (1 << (nbBits - 1)) - 1;
......@@ -1122,7 +1117,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
{
// The appropriate strategy is to check if the Quantizer is not
// preceded by an Weighted node (that is not forking), and insert
// a coeff node (Compensation) if so ...
// a mul node (Compensation) before it if so ...
if (node->type() == "Quantizer")
{
......@@ -1159,14 +1154,25 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
// Adapt the scaling factor value accordingly
double currScalingFactor = getScalingFactor(node);
updateScalingFactor(node, currScalingFactor / signedMax);
multiplyScalingFactor(node, 1.0 / signedMax); // XXX XXX XXX OK
// Insert a Quantizer for the coeffProducer that will handle
// the single-shift approximation via it's scalingFactor ...
insertScalingBelowProducer(coeffProducer, graphView);
if (!noQuant)
{
// XXX XXX XXX double check this ...
std::shared_ptr<Node> coeffQuantizer = mulNode->getParent(1);
appendRoundClip(coeffQuantizer, -(signedMax + 1), signedMax);
}
}
}
}
}
void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant)
static void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView)
{
std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
......@@ -1177,28 +1183,22 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
std::shared_ptr<Node> linearNode = node->getParent(0);
double base = getScalingFactor(node);
double approx = std::pow(2, std::ceil(std::log2(base)));
double ratio = approx / base;
updateScalingFactor(node, approx);
// set the scaling factor value to the approximation ...
double ratio = base / approx;
multiplyScalingFactor(node, ratio);
insertScalingBelowProducer(linearNode->getParent(1), ratio, graphView);
if (!noQuant)
insertRoundBelowProducer(linearNode->getParent(1), graphView);
// compensate the ratio using the previous node weigths ...
multiplyScalingFactor(linearNode->getParent(1), 1.0 / ratio);
if (nodeHasBias(linearNode))
{
insertScalingBelowProducer(linearNode->getParent(2), ratio, graphView);
if (!noQuant)
insertRoundBelowProducer(linearNode->getParent(2), graphView);
}
multiplyScalingFactor(linearNode->getParent(2), 1.0 / ratio);
}
}
}
static void printScalingFactors(std::shared_ptr<GraphView> graphView)
{
for (auto node : retrieveNodeVector(graphView))
......@@ -1263,10 +1263,10 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
if (singleShift)
{
Log::notice( " Inserting the compensation nodes ...");
insertCompensationNodes(graphView, nbBits);
insertCompensationNodes(graphView, nbBits, noQuant);
Log::notice(" Performing the Single-Shift approximation ...");
performSingleShiftApproximation(graphView, noQuant);
performSingleShiftApproximation(graphView);
}
if (verbose)
......
......@@ -43,6 +43,12 @@ std::shared_ptr<Node> BaseQuantizer(double scalingFactor, const std::string& nam
std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor");
scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
// TODO : the above should be replaced by :
/*
std::shared_ptr<Node> scalingFactorProducer = Producer(scalingFactorTensor);
scalingFactorProducer->addChild(mulNode, 0, 1);
*/
std::shared_ptr<GraphView> graphView = Sequential({mulNode});
graphView->add(scalingFactorProducer);
......@@ -84,55 +90,8 @@ void multiplyScalingFactor(std::shared_ptr<Aidge::Node> scalingNode, double coef
auto producer = mulNode->getParent(1);
producer->getOperator()->setOutput(0, newScalingFactorTensor);
// XXX old way : mulNode->input(1).first->getOperator()->setOutput(0, resultTensor);
}
/*
void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double clipMax)
{
// Create the new nodes
std::string name = metaOpNode->name();
std::shared_ptr<Node> roundNode = Round(name + "_RoundQuant");
std::shared_ptr<Node> clipNode = Clip(name + "_ClipQuant", clipMin, clipMax);
// Retreive the previous microGraph
auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (metaOpNode->getOperator());
auto microGraph = metaOperatorOp->getMicroGraph();
// Get the Mul node from the microGraph
std::shared_ptr<Node> mulNode = nullptr;
for (auto node : microGraph->getNodes())
if (node->type() == "Mul")
mulNode = node;
// Save the backend and datatype
auto mulOp = std::static_pointer_cast<OperatorTensor> (mulNode->getOperator());
auto backend = mulOp->getInput(0)->backend();
auto dataType = mulOp->getInput(0)->dataType();
// Create the new microGraph
std::shared_ptr<GraphView> prevGraphView = Sequential({mulNode});
prevGraphView->add(mulNode->getParent(1)); // add the producer
auto prevGraphViewClone = prevGraphView->clone();
std::shared_ptr<GraphView> newGraphView = Sequential({prevGraphViewClone, roundNode, clipNode});
// Replace the old microGraph
microGraph->replace(prevGraphView, newGraphView);
// Set the backend and datatype
microGraph->setBackend(backend);
microGraph->setDataType(dataType);
// XXX prev way : mulNode->input(1).first->getOperator()->setOutput(0, resultTensor);
}
*/
void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double clipMax)
{
......@@ -203,32 +162,41 @@ void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double cl
}
std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name)
void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor)
{
// TODO : implement or remove the function ...
Log::error(" updateScalingFactor() : not yet implemented ... ");
}
double getScalingFactor(std::shared_ptr<Node> quantizerNode)
{
// create the nodes
// Retreive the previous microGraph
std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_MulQuant" : "");
std::shared_ptr<Node> roundNode = Round((!name.empty()) ? name + "_RoundQuant" : "");
std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_ClipQuant" : "", clipMin, clipMax);
auto quantizerOp = std::static_pointer_cast<MetaOperator_Op> (quantizerNode->getOperator());
auto microGraph = quantizerOp->getMicroGraph();
// connect the scaling factor producer
// Get the Mul node from the microGraph
std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor");
scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
std::shared_ptr<Node> mulNode = nullptr;
for (auto node : microGraph->getNodes())
if (node->type() == "Mul")
mulNode = node;
// create the metaop graph
auto mulOp = std::static_pointer_cast<OperatorTensor> (mulNode->getOperator());
std::shared_ptr<GraphView> graphView = Sequential({mulNode, roundNode, clipNode});
std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ???
// Retreive the scaling factor
// return the metaop
auto scalingFactorTensor = mulOp->getInput(1);
std::shared_ptr<Node> metaopNode = MetaOperator("Quantizer", connectedGraphView, {}, name); // XXX alternative prototype
std::shared_ptr<Tensor> fallback;
const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
double scalingFactor = localTensor.get<double>(0);
return metaopNode;
return scalingFactor;
}
/*
static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType)
{
std::shared_ptr<Node> mulNode = nullptr;
......@@ -238,66 +206,49 @@ static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, st
return mulNode;
}
*/
void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor)
void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max)
{
if(metaOpNode->type() != "Scaling" && metaOpNode->type() != "Quantizer")
Log::warn("Cannot update the scaling factor on Node of type {}", metaOpNode->type());
std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
auto quantizerOp = std::static_pointer_cast<MetaOperator_Op> (quantizerNode->getOperator());
auto microGraph = quantizerOp->getMicroGraph();
std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(metaOpNode->getOperator());
std::shared_ptr<Node> clipNode = nullptr;
for (auto node : microGraph->getNodes())
if (node->type() == "Clip")
clipNode = node;
std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul");
// TODO : assert that we've got not a nullptr ...
if (!mulNode)
Log::warn("Invalid PTQ MetaOperator, no Mul node found inside ! ");
auto clipOp = std::static_pointer_cast<Clip_Op> (clipNode->getOperator());
mulNode->input(1).first->getOperator()->setOutput(0, scalingFactorTensor);
}
// set the attributes
double getScalingFactor(std::shared_ptr<Node> MetaOpNode)
{
if (MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") {
Log::warn("Cannot get the scaling factor on Node of type {}", MetaOpNode->type());
return 0;
}
clipOp->max() = max;
clipOp->min() = min;
std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(MetaOpNode->getOperator());
// Retreive the previous min/max tensors
std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul");
auto minTensor = std::static_pointer_cast<OperatorTensor>(clipNode->getOperator())->getInput(1);
auto maxTensor = std::static_pointer_cast<OperatorTensor>(clipNode->getOperator())->getInput(2);
if (!mulNode) {
Log::warn("Invalid PTQ MetaOperator, no Mul found inside node of type {}", MetaOpNode->type());
return 0;
}
// Create the new min/max tensors
auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1);
std::shared_ptr<Tensor> fallback;
const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
std::shared_ptr<Tensor> newMinTensor = std::make_shared<Tensor>(min);
newMinTensor->setBackend(minTensor->backend());
newMinTensor->setDataType(minTensor->dataType());
return localTensor.get<double>(0);
}
void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max)
{
if (quantizerNode->type() != "Quantizer") {
Log::warn("Cannot set the clipping range on Node of type {}", quantizerNode->type());
return;
}
std::shared_ptr<Tensor> newMaxTensor = std::make_shared<Tensor>(max);
newMaxTensor->setBackend(maxTensor->backend());
newMaxTensor->setDataType(maxTensor->dataType());
std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op> (quantizerNode->getOperator());
// Set the tensors of the producer
std::shared_ptr<Node> clipNode = getSubNode(metaOp->getMicroGraph(), "Clip");
auto minProducer = clipNode->getParent(1);
minProducer->getOperator()->setOutput(0, newMinTensor);
if (!clipNode) {
Log::warn("Invalid PTQ MetaOperator, no Clip found inside node of type {}", quantizerNode->type());
return;
}
std::shared_ptr<Clip_Op> clipOp = std::static_pointer_cast<Clip_Op>(clipNode->getOperator());
clipOp->max() = max;
clipOp->min() = min;
auto maxProducer = clipNode->getParent(2);
maxProducer->getOperator()->setOutput(0, newMaxTensor);
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment