Skip to content
Snippets Groups Projects
Commit f8b03835 authored by Benjamin Halimi's avatar Benjamin Halimi
Browse files

wip (mainly appendRoundClip)

parent 5cc3f175
No related branches found
No related tags found
2 merge requests!54Update 0.3.1 -> 0.4.0,!50Enhancement : Quantizer only PTQ
Pipeline #69018 failed
...@@ -20,6 +20,10 @@ ...@@ -20,6 +20,10 @@
namespace Aidge { namespace Aidge {
std::shared_ptr<Aidge::Node> BaseQuantizer(double scalingFactor, const std::string& name); std::shared_ptr<Aidge::Node> BaseQuantizer(double scalingFactor, const std::string& name);
void multiplyScalingFactor(std::shared_ptr<Aidge::Node> scalingNode, double coeff);
void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double clipMax);
/// @brief Quantizer acts as a meta-operator to handle scaling operations in the PTQ, replacing the Scaling Operator. /// @brief Quantizer acts as a meta-operator to handle scaling operations in the PTQ, replacing the Scaling Operator.
/// This operator is composed of a sequence of [Mul] -> [Clip] -> [Round] operations. /// This operator is composed of a sequence of [Mul] -> [Clip] -> [Round] operations.
......
...@@ -207,39 +207,6 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren ...@@ -207,39 +207,6 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren
return index; return index;
} }
void multiplyScalingFactor(std::shared_ptr<Aidge::Node> scalingNode, double coeff)
{
auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (scalingNode->getOperator());
// Get the Mul node from the microGraph
std::shared_ptr<Node> mulNode = nullptr;
auto microGraph = metaOperatorOp->getMicroGraph();
for (auto node : microGraph->getNodes())
if (node->type() == "Mul")
mulNode = node;
// Retreive the previous scaling factor
auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1);
std::shared_ptr<Tensor> fallback;
const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
double prevScalingFactor = localTensor.get<double>(0);
// Create the new scaling factor tensor
std::shared_ptr<Tensor> newScalingFactorTensor = std::make_shared<Tensor>(prevScalingFactor * coeff);
newScalingFactorTensor->setBackend(scalingFactorTensor->backend());
newScalingFactorTensor->setDataType(scalingFactorTensor->dataType());
// Set the tensor of the producer
auto producer = mulNode->getParent(1);
producer->getOperator()->setOutput(0, newScalingFactorTensor);
// XXX old way : mulNode->input(1).first->getOperator()->setOutput(0, resultTensor);
}
// Utility function that insert a node below another one already connected // Utility function that insert a node below another one already connected
static void insertChildren(std::shared_ptr<Node> parent, std::shared_ptr<Node> newNode, std::shared_ptr<GraphView> graphView) static void insertChildren(std::shared_ptr<Node> parent, std::shared_ptr<Node> newNode, std::shared_ptr<GraphView> graphView)
{ {
...@@ -273,6 +240,7 @@ static void insertChildren(std::shared_ptr<Node> parent, std::shared_ptr<Node> n ...@@ -273,6 +240,7 @@ static void insertChildren(std::shared_ptr<Node> parent, std::shared_ptr<Node> n
bool insertRoundBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphView> graphView) bool insertRoundBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphView> graphView)
{ {
/*
if (hasAttr(node, "isProducerScaling") && node->type() != "Round") if (hasAttr(node, "isProducerScaling") && node->type() != "Round")
{ {
std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round");
...@@ -285,6 +253,9 @@ bool insertRoundBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphV ...@@ -285,6 +253,9 @@ bool insertRoundBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphV
return true; return true;
} }
return false; return false;
*/
Log::warn(" ROUND : DUMMY ! ");
return true;
} }
double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor)
...@@ -429,7 +400,7 @@ static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> node) ...@@ -429,7 +400,7 @@ static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> node)
void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double ratio, std::shared_ptr<GraphView> graphView) void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double ratio, std::shared_ptr<GraphView> graphView)
{ {
Log::warn(" DUMMY ! "); Log::warn(" INSERT SCALING : DUMMY ! ");
} }
void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, std::shared_ptr<GraphView> graphView) void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, std::shared_ptr<GraphView> graphView)
...@@ -882,7 +853,7 @@ std::unordered_map<std::shared_ptr<Node>, std::pair<bool, bool>> computeSignMap( ...@@ -882,7 +853,7 @@ std::unordered_map<std::shared_ptr<Node>, std::pair<bool, bool>> computeSignMap(
std::pair<bool, bool> unsignedPair(true, true); std::pair<bool, bool> unsignedPair(true, true);
for (std::shared_ptr<Node> node : graphView->getNodes()) for (std::shared_ptr<Node> node : graphView->getNodes())
if (node->type() != "Producer") if (node->type() != "Producer") // XXX XXX XXX we should use nodeVector instead ...
signMap.insert(std::make_pair(node, unsignedPair)); signMap.insert(std::make_pair(node, unsignedPair));
// ITERATE OVER THE GRAPH // ITERATE OVER THE GRAPH
...@@ -1015,6 +986,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ ...@@ -1015,6 +986,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
signMap = computeSignMap(graphView, verbose); signMap = computeSignMap(graphView, verbose);
else else
{ {
// XXX XXX XXX we should use the (retreive) node vector
std::pair<bool, bool> signedPair(false, false); std::pair<bool, bool> signedPair(false, false);
for (std::shared_ptr<Node> node : graphView->getNodes()) for (std::shared_ptr<Node> node : graphView->getNodes())
if (node->type() != "Producer") if (node->type() != "Producer")
...@@ -1030,11 +1002,11 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ ...@@ -1030,11 +1002,11 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
if (isAffine(node)) if (isAffine(node))
{ {
// Rescale the weight tensor // Rescale the weight tensor
std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); multiplyScalingFactor(node->getParent(1), signedMax);
insertScalingBelowProducer(node->getParent(1),signedMax,graphView);
// UUU Quantize the Producer !!!
if (!noQuant) if (!noQuant)
insertRoundBelowProducer(node->getParent(1),graphView); appendRoundClip(node->getParent(1), -(signedMax + 1), signedMax);
// Rescale the bias tensor // Rescale the bias tensor
if (nodeHasBias(node)) if (nodeHasBias(node))
...@@ -1042,11 +1014,12 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ ...@@ -1042,11 +1014,12 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
bool inputIsUnsigned = signMap[node].first; bool inputIsUnsigned = signMap[node].first;
double rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax; double rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax;
std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); multiplyScalingFactor(node->getParent(2), rescaling);
insertScalingBelowProducer(node->getParent(2),rescaling,graphView);
// XXX TODO : enhance this !
int biasMax = (1 << (12 + nbBits));
if (!noQuant) if (!noQuant)
insertRoundBelowProducer(node->getParent(2),graphView); appendRoundClip(node->getParent(2), -(biasMax + 1), biasMax);
} }
// Compensate the rescaling using the next Scaling node // Compensate the rescaling using the next Scaling node
...@@ -1061,7 +1034,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ ...@@ -1061,7 +1034,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
std::shared_ptr<Node> scalingNode = getUniqueChild(node); // TODO : assert if scalingNode is a Scaling ... std::shared_ptr<Node> scalingNode = getUniqueChild(node); // TODO : assert if scalingNode is a Scaling ...
multiplyScalingFactor(scalingNode,rescaling) ; multiplyScalingFactor(scalingNode, rescaling);
} }
if (isMerging(node)) if (isMerging(node))
...@@ -1080,7 +1053,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ ...@@ -1080,7 +1053,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
if (node->type() == "MatMul") if (node->type() == "MatMul")
rescaling /= inputIsUnsigned ? unsignedMax : signedMax; rescaling /= inputIsUnsigned ? unsignedMax : signedMax;
multiplyScalingFactor(scalingNode, rescaling) ; multiplyScalingFactor(scalingNode, rescaling);
} }
if (isNotQuantized(node)) if (isNotQuantized(node))
...@@ -1096,7 +1069,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ ...@@ -1096,7 +1069,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
// Handle the Scaling Nodes ... // Handle the Scaling Nodes ...
if (hasAttr(node, "isScaling")) if (hasAttr(node, "isScaling"))
{ {
// Don't touch the scalings that precede non-linearities ... // Don't touch the scalings that precede non-linearities ...
...@@ -1107,20 +1080,13 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ ...@@ -1107,20 +1080,13 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
if (!noQuant && !precedesNonLinearNode) if (!noQuant && !precedesNonLinearNode)
{ {
// Replace the Scaling Node by a Quantizer // Old : Replace the Scaling Node by a Quantizer
auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); appendRoundClip(node, -(signedMax + 1), signedMax);
std::shared_ptr<Tensor> fallback;
const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
double oldScalingFactor = localTensor.get<double>(0); //!\\
std::shared_ptr<Node> quantizerNode = Quantizer(oldScalingFactor, -(signedMax + 1), signedMax, node->name());
quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
quantizerNode->getOperator()->setBackend(determineBackend(node));
graphView->replace({node, node->getParent(1)}, {quantizerNode});
if (optimizeSigns) if (optimizeSigns)
{ {
/*
double rescaling = 1.0; double rescaling = 1.0;
bool inputIsUnsigned = signMap[node].first; bool inputIsUnsigned = signMap[node].first;
...@@ -1129,11 +1095,16 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ ...@@ -1129,11 +1095,16 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
rescaling /= inputIsUnsigned ? unsignedMax : signedMax; rescaling /= inputIsUnsigned ? unsignedMax : signedMax;
rescaling *= outputIsUnsigned ? unsignedMax : signedMax; rescaling *= outputIsUnsigned ? unsignedMax : signedMax;
double currScalingFactor = getScalingFactor(quantizerNode); // XXX XXX XXX
updateScalingFactor(quantizerNode, currScalingFactor * rescaling); //double currScalingFactor = getScalingFactor(quantizerNode);
//updateScalingFactor(quantizerNode, currScalingFactor * rescaling);
multiplyScalingFactor(node, rescaling);
// XXX XXX XXX HERE : Fix this !!!
if(outputIsUnsigned) if(outputIsUnsigned)
setClipRange(quantizerNode, 0, unsignedMax); setClipRange(quantizerNode, 0, unsignedMax);
*/
} }
} }
} }
...@@ -1334,7 +1305,8 @@ void clearBiases(std::shared_ptr<GraphView> graphView) ...@@ -1334,7 +1305,8 @@ void clearBiases(std::shared_ptr<GraphView> graphView)
if (node->type() == "FC" || node->type() == "Conv2D") { if (node->type() == "FC" || node->type() == "Conv2D") {
std::shared_ptr<Tensor> biasTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2); std::shared_ptr<Tensor> biasTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2);
//rescaleTensor(biasTensor, 0); //rescaleTensor(biasTensor, 0);
insertScalingBelowProducer(node->getParent(2), 0, graphView); //insertScalingBelowProducer(node->getParent(2), 0, graphView);
multiplyScalingFactor(node->getParent(2), 0);
} }
} }
} }
......
...@@ -35,7 +35,7 @@ namespace Aidge ...@@ -35,7 +35,7 @@ namespace Aidge
std::shared_ptr<Node> BaseQuantizer(double scalingFactor, const std::string& name) std::shared_ptr<Node> BaseQuantizer(double scalingFactor, const std::string& name)
{ {
std::shared_ptr<Node> mulNode = Mul(name.empty() ? "" : name + "_MulQuant"); std::shared_ptr<Node> mulNode = Mul(name + "_MulQuant");
// Scaling Factor Producer // Scaling Factor Producer
...@@ -54,6 +54,155 @@ std::shared_ptr<Node> BaseQuantizer(double scalingFactor, const std::string& nam ...@@ -54,6 +54,155 @@ std::shared_ptr<Node> BaseQuantizer(double scalingFactor, const std::string& nam
return metaopNode; return metaopNode;
} }
void multiplyScalingFactor(std::shared_ptr<Aidge::Node> scalingNode, double coeff)
{
auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (scalingNode->getOperator());
// Get the Mul node from the microGraph
std::shared_ptr<Node> mulNode = nullptr;
auto microGraph = metaOperatorOp->getMicroGraph();
for (auto node : microGraph->getNodes())
if (node->type() == "Mul")
mulNode = node;
// Retreive the previous scaling factor
auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1);
std::shared_ptr<Tensor> fallback;
const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
double prevScalingFactor = localTensor.get<double>(0);
// Create the new scaling factor tensor
std::shared_ptr<Tensor> newScalingFactorTensor = std::make_shared<Tensor>(prevScalingFactor * coeff);
newScalingFactorTensor->setBackend(scalingFactorTensor->backend());
newScalingFactorTensor->setDataType(scalingFactorTensor->dataType());
// Set the tensor of the producer
auto producer = mulNode->getParent(1);
producer->getOperator()->setOutput(0, newScalingFactorTensor);
// XXX old way : mulNode->input(1).first->getOperator()->setOutput(0, resultTensor);
}
/*
void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double clipMax)
{
// Create the new nodes
std::string name = metaOpNode->name();
std::shared_ptr<Node> roundNode = Round(name + "_RoundQuant");
std::shared_ptr<Node> clipNode = Clip(name + "_ClipQuant", clipMin, clipMax);
// Retreive the previous microGraph
auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (metaOpNode->getOperator());
auto microGraph = metaOperatorOp->getMicroGraph();
// Get the Mul node from the microGraph
std::shared_ptr<Node> mulNode = nullptr;
for (auto node : microGraph->getNodes())
if (node->type() == "Mul")
mulNode = node;
// Save the backend and datatype
auto mulOp = std::static_pointer_cast<OperatorTensor> (mulNode->getOperator());
auto backend = mulOp->getInput(0)->backend();
auto dataType = mulOp->getInput(0)->dataType();
// Create the new microGraph
std::shared_ptr<GraphView> prevGraphView = Sequential({mulNode});
prevGraphView->add(mulNode->getParent(1)); // add the producer
auto prevGraphViewClone = prevGraphView->clone();
std::shared_ptr<GraphView> newGraphView = Sequential({prevGraphViewClone, roundNode, clipNode});
// Replace the old microGraph
microGraph->replace(prevGraphView, newGraphView);
// Set the backend and datatype
microGraph->setBackend(backend);
microGraph->setDataType(dataType);
}
*/
void appendRoundClip(std::shared_ptr<Node> metaOpNode, double clipMin, double clipMax)
{
// Retreive the previous microGraph
auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (metaOpNode->getOperator());
auto microGraph = metaOperatorOp->getMicroGraph();
// Get the Mul node from the microGraph
std::shared_ptr<Node> mulNode = nullptr;
for (auto node : microGraph->getNodes())
if (node->type() == "Mul")
mulNode = node;
auto mulOp = std::static_pointer_cast<OperatorTensor> (mulNode->getOperator());
// save the backend and datatype
auto backend = mulOp->getInput(0)->backend();
auto dataType = mulOp->getInput(0)->dataType();
// create the new microGraph nodes
auto newMulNode = Mul();
auto roundNode = Round();
auto clipNode = Clip(""); //, clipMin, clipMax);
auto newCoeffNode = mulNode->getParent(1)->clone(); // UUU Producer(coeffTensor);
// create the new micrograph
std::shared_ptr<GraphView> newMicroGraph = Sequential({newMulNode, roundNode, clipNode});
newCoeffNode->addChild(newMulNode, 0, 1); // 1 was not specified !!!
newMicroGraph->add(newCoeffNode);
// manually connect the IOs !!!
auto newMulOp = std::static_pointer_cast<OperatorTensor> (newMulNode->getOperator());
newMulOp->setInput(0, mulOp->getInput(0)); // MANDATORY (because we need an input tensor)
auto clipOp = std::static_pointer_cast<Clip_Op> (clipNode->getOperator());
clipOp->setOutput(0, mulOp->getOutput(0)); // MANDATORY ? YES !!!
// Connect the clip min and max tensors
auto minTensor = std::make_shared<Tensor>(clipMin);
auto maxTensor = std::make_shared<Tensor>(clipMax);
auto minNode = Producer(minTensor);
auto maxNode = Producer(maxTensor);
minNode->addChild(clipNode, 0, 1);
maxNode->addChild(clipNode, 0, 2);
newMicroGraph->add(minNode);
newMicroGraph->add(maxNode);
// set backend
newMicroGraph->setBackend(backend);
newMicroGraph->setDataType(dataType);
// reset the scheduling
SequentialScheduler scheduler(newMicroGraph);
scheduler.resetScheduling();
//scheduler.generateScheduling();
// set the micrograph
*microGraph = *newMicroGraph;
}
std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name) std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name)
{ {
// create the nodes // create the nodes
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment