Skip to content
Snippets Groups Projects
Commit 1d1fa3a4 authored by Noam Zerah's avatar Noam Zerah
Browse files

Adding changes

parent f38da961
No related branches found
No related tags found
No related merge requests found
......@@ -29,6 +29,34 @@ namespace Aidge {
/// @return A shared pointer to an instance of the meta-operator node.
std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name);
/// @brief IntQuantizer acts as an extension of the Quantizer meta-operator, enabling seamless integration
/// into computation graphs with a data type other than Float while preserving floating-point precision.
///
/// This operator modifies the provided Quantizer by inserting explicit casting operations before and after
/// the quantization process. It first casts the input to Float64, applies the quantization steps (Mul, Clip, Round),
/// and then casts the result back to the target data type. This ensures compatibility with integer-based computation graphs
/// while maintaining the precision of floating-point operations.
///
/// @param oldQuantizer A shared pointer to the existing Quantizer node that will be adapted.
/// @param targetType The target data type to which the final output should be cast after the quantization process.
/// @param name The name of the meta-operator node created.
/// @return A shared pointer to a new instance of the modified meta-operator node.
std::shared_ptr<Node> IntQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name);
/// @brief BitShiftQuantizer acts as an extension of the Quantizer meta-operator, enabling seamless integration
/// into computation graphs with a data type other than Float while preserving floating-point precision.
///
/// This operator modifies the provided Quantizer by inserting explicit casting operations before and after
/// the quantization process. It first casts the input to Float64, applies the quantization steps (Mul, Clip, Round),
/// and then casts the result back to the target data type. This ensures compatibility with integer-based computation graphs
/// while maintaining the precision of floating-point operations.
///
/// @param oldQuantizer A shared pointer to the existing Quantizer node that will be adapted.
/// @param targetType The target data type to which the final output should be cast after the quantization process.
/// @param name The name of the meta-operator node created.
/// @return A shared pointer to a new instance of the modified meta-operator node.
std::shared_ptr<Node> BitShiftQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name);
/// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter.
/// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation.
/// The meta-operator node must be a PTQ-specific operator, such as a Quantizer or Scaling node.
......
......@@ -93,7 +93,9 @@ void init_PTQ(py::module &m) {
:type verbose: bool
)mydelimiter");
m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("clipping_mode") = Clipping::MAX , py::arg("no_quantization") = false, py::arg("optimize_signs") = false, py::arg("single_shift") = false, py::arg("use_cuda") = false, py::arg("verbose") = false,
m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"),
py::arg("clipping_mode") = Clipping::MAX ,py::arg("target_type") = DataType::Float64 ,py::arg("no_quantization") = true, py::arg("optimize_signs") = false,
py::arg("single_shift") = false, py::arg("use_cuda") = false, py::arg("fold_graph") = true, py::arg("verbose") = false,
R"mydelimiter(
Main quantization routine. Performs every step of the quantization pipeline.
:param network: The GraphView to be quantized.
......
......@@ -29,6 +29,8 @@
#include "aidge/operator/Conv.hpp"
#include "aidge/operator/ArgMax.hpp"
#include "aidge/operator/Reshape.hpp"
#include "aidge/operator/Cast.hpp"
#include "aidge/recipes/Recipes.hpp"
#include "aidge/recipes/QuantRecipes.hpp"
......@@ -195,6 +197,59 @@ static void insertChildren(std::shared_ptr<Node> parent, std::shared_ptr<Node> n
graphView->add(newNode);
}
void applyConstFold(std::shared_ptr<GraphView> &graphView)
{
for (const std::shared_ptr<Node> node : graphView->getNodes())
{
if (node->type() == "Producer" )
{
const auto& producer = std::static_pointer_cast<Producer_Op>(node->getOperator());
producer->constant() = true;
}
}
constantFolding(graphView);
}
bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType targetType, bool singleShift)
{
//We need a deepcopy of the graphs nodes since we will replace some nodes
std::vector<std::shared_ptr<Node>> nodeVector(graphView->getNodes().begin(), graphView->getNodes().end());
for (std::shared_ptr<Node> node : nodeVector)
{
if (node->type() == "Round" && node->attributes()->hasAttr("isProducerRounding"))
{
std::shared_ptr<Aidge::Node> castNode = Cast(targetType,node->name() + "_Cast");
castNode->getOperator()->setDataType(targetType);
castNode->getOperator()->setBackend(node->getOperator()->backend());
insertChildren(node,castNode,graphView);
castNode->attributes()->addAttr("isProducerCasting",0.0);
node->getOperator()->setDataType(DataType::Float64);
}
else if(node->type() == "Quantizer")
{
if(singleShift)
{
std::shared_ptr<Node> newBitShiftQuantizer = BitShiftQuantizer(node,targetType,node->name()+"_BitShift_Quantizer");
newBitShiftQuantizer->getOperator()->setBackend(node->getOperator()->backend());
graphView->replace({node},{newBitShiftQuantizer});
}
else //If single shift is not enabled we keep using the alternative Int Quantizer (which cast the data before and after the regular Quantizer Operations)
{
std::shared_ptr<Node> newIntQuantizer = IntQuantizer(node,targetType,node->name());
newIntQuantizer->getOperator()->setBackend(node->getOperator()->backend());
graphView->replace({node},{newIntQuantizer});
}
}
else if (node->type() != "Producer" &&
!node->attributes()->hasAttr("isProducerScaling"))
{
node->getOperator()->setDataType(targetType);
}
}
return true;
}
bool insertRoundBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphView> graphView)
{
......@@ -1224,11 +1279,19 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
{
setupDataType(graphView, inputDataSet, targetType);
}
if(foldGraph)
{
Log::notice("Applying constant folding recipe to the graph ...");
applyConstFold(graphView);
}
//Mandatory to handle all of the newly added connections!
graphView->updateInputsOutputs();
if (verbose)
printScalingFactors(graphView);
if (useCuda)
graphView->setBackend("cuda");
// graphView->setBackend("cuda");
Log::notice(" Reseting the scheduler ...");
SequentialScheduler scheduler(graphView);
......
......@@ -18,6 +18,8 @@
#include "aidge/operator/Clip.hpp"
#include "aidge/operator/Mul.hpp"
#include "aidge/operator/Round.hpp"
#include "aidge/operator/BitShift.hpp"
#include "aidge/operator/Cast.hpp"
#include "aidge/graph/Node.hpp"
#include "aidge/graph/OpArgs.hpp"
......@@ -33,7 +35,15 @@
namespace Aidge
{
static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType)
{
std::shared_ptr<Node> mulNode = nullptr;
for(std::shared_ptr<Node> node : graphView->getNodes())
if (node->type() == nodeType)
mulNode = node;
return mulNode;
}
std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name)
{
// create the nodes
......@@ -59,15 +69,97 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli
return metaopNode;
}
std::shared_ptr<Node> BitShiftQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name)
{
double scalingFactor = getScalingFactor(oldQuantizer);
static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType)
std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op> (oldQuantizer->getOperator());
std::shared_ptr<Node> oldclipNode = getSubNode(metaOp->getMicroGraph(), "Clip");
if (!oldclipNode) {
Log::warn("Invalid PTQ MetaOperator, no Clip found inside node of type {}", oldQuantizer->type());
return nullptr;
}
std::shared_ptr<Clip_Op> clipOp = std::static_pointer_cast<Clip_Op>(oldclipNode->getOperator());
int shift = std::log2(scalingFactor);
BitShift_Op::BitShiftDirection direction = BitShift_Op::BitShiftDirection::left;
if(shift < 0 )
{
direction = BitShift_Op::BitShiftDirection::right;
shift = -shift;
}
std::shared_ptr<Node> bitShiftNode = BitShift(direction,false,(!name.empty()) ? name + "_MulIQuant" : "");
std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_IClipQuant" : "", clipOp->min(), clipOp->max());
std::shared_ptr<Tensor> bitshiftTensor = std::make_shared<Tensor>(Array1D<int, 1> {shift});
std::shared_ptr<Node> bitshiftProducer = addProducer(bitShiftNode, 1, {1}, "ScalingFactor");
bitshiftProducer->getOperator()->setOutput(0, bitshiftTensor);
bitshiftProducer->attributes()->addAttr("quantization.ptq.ShiftAmount",shift);
bitshiftProducer->getOperator()->setDataType(targetType);
// connect the scaling factor producer
bitShiftNode->getOperator()->setDataType(targetType);
clipNode->getOperator()->setDataType(targetType);
// create the metaop graph
std::shared_ptr<GraphView> graphView = Sequential({bitShiftNode,clipNode});
std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(bitShiftNode); // XXX why not use the graphView ???
// return the metaop
std::shared_ptr<Node> metaopNode = MetaOperator("BitShiftQuantizer", connectedGraphView, {}, name); // XXX alternative prototype
return metaopNode;
}
std::shared_ptr<Node> IntQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name)
{
std::shared_ptr<Node> mulNode = nullptr;
for(std::shared_ptr<Node> node : graphView->getNodes())
if (node->type() == nodeType)
mulNode = node;
double scalingFactor = getScalingFactor(oldQuantizer);
return mulNode;
std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op> (oldQuantizer->getOperator());
std::shared_ptr<Node> oldclipNode = getSubNode(metaOp->getMicroGraph(), "Clip");
if (!oldclipNode) {
Log::warn("Invalid PTQ MetaOperator, no Clip found inside node of type {}", oldQuantizer->type());
return nullptr;
}
std::shared_ptr<Clip_Op> clipOp = std::static_pointer_cast<Clip_Op>(oldclipNode->getOperator());
std::shared_ptr<Node> castPreNode = Cast(DataType::Float64,((!name.empty()) ? name + "_PreCast" : ""));
std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_MulIQuant" : "");
std::shared_ptr<Node> roundNode = Round((!name.empty()) ? name + "_IRoundQuant" : "");
std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_IClipQuant" : "", clipOp->min(), clipOp->max());
std::shared_ptr<Node> castPostNode = Cast(targetType,((!name.empty()) ? name + "_PostCast" : ""));
// connect the scaling factor producer
castPreNode->getOperator()->setDataType(DataType::Float64);
mulNode->getOperator()->setDataType(DataType::Float64);
roundNode->getOperator()->setDataType(DataType::Float64);
clipNode->getOperator()->setDataType(DataType::Float64);
castPostNode->getOperator()->setDataType(targetType);
std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor");
scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
// create the metaop graph
std::shared_ptr<GraphView> graphView = Sequential({castPreNode, mulNode, roundNode, clipNode, castPostNode});
std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ???
// return the metaop
std::shared_ptr<Node> metaopNode = MetaOperator("IntQuantizer", connectedGraphView, {}, name); // XXX alternative prototype
return metaopNode;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment