Skip to content
Snippets Groups Projects
Commit cdcb260f authored by Benjamin Halimi's avatar Benjamin Halimi
Browse files

initial commit (quantizer insertion + normalize params)

parent 3f669a98
No related branches found
No related tags found
2 merge requests!54Update 0.3.1 -> 0.4.0,!50Enhancement : Quantizer only PTQ
Pipeline #68344 failed
......@@ -19,6 +19,10 @@
namespace Aidge {
// XXX XXX XXX
std::shared_ptr<Aidge::Node> BaseQuantizer(double scalingFactor, const std::string& name);
/// @brief Quantizer acts as a meta-operator to handle scaling operations in the PTQ, replacing the Scaling Operator.
/// This operator is composed of a sequence of [Mul] -> [Clip] -> [Round] operations.
///
......
......@@ -89,15 +89,12 @@ namespace Aidge {
std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> graphView, bool newSchedule = true, bool verbose = false);
/**
* @brief Inserts a scaling node below the given producer node in the graph view.
* If the node is already a producer scaling node, it accumulates the scaling factor by multiplyins its value directly.
*
* @brief Inserts a scaling node below the given producer node in the graphView.
* @param node A shared pointer to the producer node where the scaling node will be inserted (below).
* @param scalingFactor The scaling factor to apply.
* @param graphView A shared pointer to the graph view in which the nodes are located.
* @return True if the scaling node was successfully inserted or the scaling factor was accumulated; False otherwise.
*/
bool insertScalingBelowProducer(std::shared_ptr<Node> node, double scalingFactor, std::shared_ptr<GraphView> graphView);
void insertScalingBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphView> graphView);
/**
* @brief Inserts a rounding node below the given producer (also below its ows producerScaling) node in the graph view.
......
......@@ -78,6 +78,7 @@ std::shared_ptr<Aidge::Tensor> getLocalTensor(std::shared_ptr<Node> node)
void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta)
{
/*
std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
// Check if the CLE can be applied ...
......@@ -137,6 +138,7 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD
}
}
while (maxRangeDelta > targetDelta);
*/
}
}
\ No newline at end of file
......@@ -34,6 +34,9 @@
#include "aidge/recipes/Recipes.hpp"
#include "aidge/recipes/QuantRecipes.hpp"
#include "aidge/operator/MetaOperator.hpp"
namespace Aidge
{
......@@ -204,8 +207,9 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren
return index;
}
void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff)
void multiplyScalingFactor(std::shared_ptr<Aidge::Node> scalingNode, double coeff)
{
/*
AIDGE_ASSERT(node->type() == "Mul" && hasAttr(node, "isProducerScaling") || hasAttr(node, "isScaling"),
"Cannot update the scaling factor on Node of type {} with no scaling tag", node->type());
......@@ -217,6 +221,37 @@ void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff)
std::shared_ptr<Tensor> resultTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff});
node->input(1).first->getOperator()->setOutput(0, resultTensor);
*/
auto metaOperatorOp = std::static_pointer_cast<MetaOperator_Op> (scalingNode->getOperator());
// Get the Mul node from the microGraph
std::shared_ptr<Node> mulNode = nullptr;
auto microGraph = metaOperatorOp->getMicroGraph();
for (auto node : microGraph->getNodes())
if (node->type() == "Mul")
mulNode = node;
// Retreive the previous scaling factor
auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1);
std::shared_ptr<Tensor> fallback;
const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
double prevScalingFactor = localTensor.get<double>(0);
// Create the new scaling factor tensor
std::shared_ptr<Tensor> newScalingFactorTensor = std::make_shared<Tensor>(prevScalingFactor * coeff);
newScalingFactorTensor->setBackend(scalingFactorTensor->backend());
newScalingFactorTensor->setDataType(scalingFactorTensor->dataType());
// Set the tensor of the producer
auto producer = mulNode->getParent(1);
producer->getOperator()->setOutput(0, newScalingFactorTensor);
// XXX old way : mulNode->input(1).first->getOperator()->setOutput(0, resultTensor);
}
// Utility function that insert a node below another one already connected
......@@ -303,28 +338,6 @@ double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor)
return localFlatTensor.get<double>(maxIndex);
}
// TODO : pass nodeVector by reference ...
static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::shared_ptr<Node>> nodeVector, std::string nodeType)
{
std::vector<std::shared_ptr<Node>> remainingNodes;
for (std::shared_ptr<Node> node : nodeVector)
if (node->type() != nodeType)
remainingNodes.push_back(node);
return remainingNodes;
}
static std::vector<std::shared_ptr<Node>> removeProdScalingNodes(std::vector<std::shared_ptr<Node>> nodeVector)
{
std::vector<std::shared_ptr<Node>> remainingNodes;
for (std::shared_ptr<Node> node : nodeVector)
if (!hasAttr(node, "isProducerScaling"))
remainingNodes.push_back(node);
return remainingNodes;
}
static void fixScheduling(std::vector<std::shared_ptr<Node>>& nodeVector) {
std::vector<std::shared_ptr<Node>> correctedVector;
......@@ -344,22 +357,42 @@ static void fixScheduling(std::vector<std::shared_ptr<Node>>& nodeVector) {
static std::shared_ptr<Tensor> getWeightTensor(std::shared_ptr<Node> node)
{
return std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1);
std::shared_ptr<Node> producer = node->getParent(1);
if (producer->type() == "BaseQuantizer")
producer = producer->getParent(0);
return std::static_pointer_cast<OperatorTensor>(producer->getOperator())->getOutput(0);
}
static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node)
{
return std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2);
std::shared_ptr<Node> producer = node->getParent(2);
if (producer->type() == "BaseQuantizer")
producer = producer->getParent(0);
return std::static_pointer_cast<OperatorTensor>(producer->getOperator())->getOutput(0);
}
std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> graphView, bool newSchedule, bool verbose)
{
std::vector<std::shared_ptr<Node>> nodeVector = graphView->getOrderedNodes();
// Remove duplicate nodes. Is it still needed ???
fixScheduling(nodeVector);
nodeVector = removeMatchingNodes(nodeVector, "Producer");
nodeVector = removeProdScalingNodes(nodeVector);
// Remove Producers and their Scalings
std::vector<std::shared_ptr<Node>> remainingNodes;
for (std::shared_ptr<Node> node : nodeVector)
if ((node->type() != "Producer") && !hasAttr(node, "isProducerScaling"))
remainingNodes.push_back(node);
nodeVector = remainingNodes;
// Verbose
if (verbose)
{
......@@ -383,6 +416,7 @@ static DataType getDataType(std::shared_ptr<Node> node)
return op->getOutput(0)->dataType();
}
/*
static std::shared_ptr<Aidge::Node> createScalingNode(std::string name, std::vector<std::string> attributes, double value)
{
std::shared_ptr<Node> scalingNode = Mul(name);
......@@ -401,26 +435,41 @@ static std::shared_ptr<Aidge::Node> createScalingNode(std::string name, std::vec
return scalingNode;
}
*/
bool insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double scalingFactor, std::shared_ptr<GraphView> graphView)
// XXX double check this !
static bool nodeHasBias(std::shared_ptr<Node> node)
{
if (hasAttr(producerNode, "isProducerRounding"))
{
// In this case we 'bump' the node to the one above him (an actual ProducerScaling)
// because the round node is not usable (only used when SSA is enabled)
producerNode = producerNode->getParent(0);
if (node->getParents().size() == 3) {
std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
if (biasTensor)
return true;
}
return false;
}
if (hasAttr(producerNode, "isProducerScaling"))
{
// We accumulate the previous scaling factors by multiplying the SF of the ProducerScaling node
// (adding new nodes each time would make the graph unusable)
multiplyScalingFactor(producerNode, scalingFactor);
return true;
// TODO: rework this !
static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> node)
{
std::shared_ptr<Node> currNode = node;
while(!hasAttr(currNode, "isScaling")) {
if (currNode->getParents().size() == 0) {
Log::warn(" Warning : No previous Scaling node were found ! ");
break;
}
currNode = currNode->getParents()[0];
}
return currNode;
}
AIDGE_ASSERT(producerNode->type() == "Producer", " Cannot apply a scaling factor on node of type: {} which is not a Producer", producerNode->type());
void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double ratio, std::shared_ptr<GraphView> graphView)
{
Log::warn(" DUMMY ! ");
}
void insertScalingBelowProducer(std::shared_ptr<Node> producerNode, std::shared_ptr<GraphView> graphView)
{
/*
std::string scalingNodeName = makeUniqueName(producerNode->name() + "_ProducerScaling", graphView);
std::shared_ptr<Node> scalingNode = createScalingNode(scalingNodeName, {"isProducerScaling"}, scalingFactor);
......@@ -429,8 +478,35 @@ bool insertScalingBelowProducer(std::shared_ptr<Node> producerNode, double scali
insertChildren(producerNode, scalingNode, graphView);
graphView->add(scalingNode->getParent(1)); // add the scaling factor producer
*/
return true;
std::string scalingNodeName = makeUniqueName(producerNode->name() + "_ProducerScaling", graphView);
std::shared_ptr<Node> scalingNode = BaseQuantizer(1.0, scalingNodeName);;
addAttr(scalingNode, "isProducerScaling");
// XXX XXX XXX addAttr(scalingNode, "isScaling");
scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
scalingNode->getOperator()->setBackend(determineBackend(producerNode)); // XXX use the producer parent instead ???
insertChildren(producerNode, scalingNode, graphView);
// XXX XXX XXX is it needed ?
// graphView->add(scalingNode->getParent(1));
}
void insertProducerScalingNodes(std::shared_ptr<GraphView> graphView)
{
std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
for (std::shared_ptr<Node> node : nodeSet)
{
if (isAffine(node))
{
insertScalingBelowProducer(node->getParent(1), graphView);
if (nodeHasBias(node))
insertScalingBelowProducer(node->getParent(2), graphView);
}
}
}
// XXX HERE : Branches containing only Seamless nodes should be considered as residual too !!!
......@@ -458,47 +534,45 @@ void insertResidualScalingNodes(std::shared_ptr<GraphView> graphView)
Log::info(" ### inserting multiplicative node ...");
std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView);
std::shared_ptr<Node> residualNode = createScalingNode(residualNodeName, {"isScaling", "isResidual"}, 1.0);
// XXX XXX XXX
// std::shared_ptr<Node> residualNode = createScalingNode(residualNodeName, {"isScaling", "isResidual"}, 1.0);
std::shared_ptr<Node> residualNode = BaseQuantizer(1.0, residualNodeName);
addAttr(residualNode, "isScaling");
addAttr(residualNode, "isResidual");
residualNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
residualNode->getOperator()->setBackend(determineBackend(parentNode));
graphView->insertParent(node, residualNode, i, 0, 0);
graphView->add(residualNode->getParent(1)); // add the scaling factor producer
// XXX XXX XXX is it needed ? no more !
// graphView->add(residualNode->getParent(1));
}
}
}
}
}
static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> node)
{
std::shared_ptr<Node> currNode = node;
while(!hasAttr(currNode, "isScaling"))
{
if (currNode->getParents().size() == 0)
{
Log::warn(" Warning : No previous Scaling node were found ! ");
break;
}
currNode = currNode->getParents()[0];
}
return currNode;
}
void insertScalingNodes(std::shared_ptr<GraphView> graphView)
{
insertProducerScalingNodes(graphView);
insertResidualScalingNodes(graphView);
std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
for (std::shared_ptr<Node> parentNode : nodeSet)
{
// Insert a Scaling node after each node that have to be quantized
if (isAffine(parentNode) || isMerging(parentNode) || isNotQuantized(parentNode))
{
std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView);
std::shared_ptr<Node> scalingNode = createScalingNode(scalingNodeName, {"isScaling"}, 1.0);
// XXX XXX XXX
// std::shared_ptr<Node> scalingNode = createScalingNode(scalingNodeName, {"isScaling"}, 1.0);
std::shared_ptr<Node> scalingNode = BaseQuantizer(1.0, scalingNodeName);
addAttr(scalingNode, "isScaling");
scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
scalingNode->getOperator()->setBackend(determineBackend(parentNode));
......@@ -506,12 +580,12 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
if (parentNode->getChildren().size() > 0) {
insertChildren(parentNode, scalingNode, graphView);
} else {
// Log::info(" last node reached ! ");
parentNode->addChild(scalingNode, 0, 0);
graphView->add(scalingNode);
}
graphView->add(scalingNode->getParent(1)); // add the scaling factor producer
// XXX XXX XXX is it needed ? no more
// graphView->add(scalingNode->getParent(1));
// In the case the node is a non-linear operator we want to add an extra
// scaling node before it to rescale it's input ...
......@@ -519,29 +593,24 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
if (isNotQuantized(parentNode))
{
std::string prevScalingNodeName = makeUniqueName(parentNode->name() + "_PrevScaling", graphView);
std::shared_ptr<Node> prevScalingNode = createScalingNode(prevScalingNodeName, {"isScaling"}, 1.0);
// XXX XXX XXX
// std::shared_ptr<Node> prevScalingNode = createScalingNode(prevScalingNodeName, {"isScaling"}, 1.0);
std::shared_ptr<Node> prevScalingNode = BaseQuantizer(1.0, prevScalingNodeName);
addAttr(prevScalingNode, "isScaling");
prevScalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
prevScalingNode->getOperator()->setBackend(determineBackend(parentNode));
graphView->insertParent(parentNode, prevScalingNode, 0, 0, 0);
graphView->add(prevScalingNode->getParent(1)); // add the scaling factor producer
// XXX XXX XXX is it needed ? no more !
// graphView->add(prevScalingNode->getParent(1));
}
}
}
}
// XXX double check this !
static bool nodeHasBias(std::shared_ptr<Node> node)
{
if (node->getParents().size() == 3) {
std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
if (biasTensor)
return true;
}
return false;
}
void normalizeParameters(std::shared_ptr<GraphView> graphView)
{
// CREATE THE ACCUMULATED RATIO MAP ///////////////////////////////////////
......@@ -574,11 +643,12 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
// Rescale the weight tensor
std::shared_ptr<Tensor> weightTensor = getWeightTensor(node);
double scaling = getTensorAbsoluteMax(weightTensor);
double ratio = 1.0 / scaling;
double ratio = 1.0 / getTensorAbsoluteMax(weightTensor);
//rescaleTensor(weightTensor, ratio);
insertScalingBelowProducer(node->getParent(1), ratio, graphView);
// XXX XXX XXX insertScalingBelowProducer(node->getParent(1), ratio, graphView);
multiplyScalingFactor(node->getParent(1), ratio);
// Accumulate the ratio
......@@ -595,7 +665,8 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
{
std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
//rescaleTensor(biasTensor, accumulatedRatios[node] );
insertScalingBelowProducer(node->getParent(2), accumulatedRatios[node], graphView);
// XXX XXX XXX insertScalingBelowProducer(node->getParent(2), accumulatedRatios[node], graphView);
multiplyScalingFactor(node->getParent(2), accumulatedRatios[node]);
}
}
......
......@@ -30,10 +30,30 @@
#include "aidge/operator/OperatorTensor.hpp"
#include "aidge/utils/Log.hpp"
namespace Aidge
{
std::shared_ptr<Node> BaseQuantizer(double scalingFactor, const std::string& name)
{
std::shared_ptr<Node> mulNode = Mul(name.empty() ? "" : name + "_MulQuant");
// Scaling Factor Producer
std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor");
scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
std::shared_ptr<GraphView> graphView = Sequential({mulNode});
graphView->add(scalingFactorProducer);
// alternative : capture the Producer ...
// std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode);
std::shared_ptr<Node> metaopNode = MetaOperator("BaseQuantizer", graphView, {}, name); // XXX alternative prototype ->
return metaopNode;
}
std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name)
{
// create the nodes
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment