Adding changes

1d1fa3a4 · Noam Zerah · f38da961 · 1d1fa3a4 · 1d1fa3a4 · 1d1fa3a4
Commit 1d1fa3a4 authored 3 months ago by Noam Zerah
--- a/include/aidge/operator/PTQMetaOps.hpp
+++ b/include/aidge/operator/PTQMetaOps.hpp
@@ -29,6 +29,34 @@ namespace Aidge {
 /// @return A shared pointer to an instance of the meta-operator node.
 std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name);

+/// @brief IntQuantizer acts as an extension of the Quantizer meta-operator, enabling seamless integration 
+///        into computation graphs with a data type other than Float while preserving floating-point precision.
+/// 
+/// This operator modifies the provided Quantizer by inserting explicit casting operations before and after 
+/// the quantization process. It first casts the input to Float64, applies the quantization steps (Mul, Clip, Round), 
+/// and then casts the result back to the target data type. This ensures compatibility with integer-based computation graphs 
+/// while maintaining the precision of floating-point operations.
+///
+/// @param oldQuantizer A shared pointer to the existing Quantizer node that will be adapted.
+/// @param targetType The target data type to which the final output should be cast after the quantization process.
+/// @param name The name of the meta-operator node created.
+/// @return A shared pointer to a new instance of the modified meta-operator node.
+std::shared_ptr<Node> IntQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name);
+
+/// @brief BitShiftQuantizer acts as an extension of the Quantizer meta-operator, enabling seamless integration 
+///        into computation graphs with a data type other than Float while preserving floating-point precision.
+/// 
+/// This operator modifies the provided Quantizer by inserting explicit casting operations before and after 
+/// the quantization process. It first casts the input to Float64, applies the quantization steps (Mul, Clip, Round), 
+/// and then casts the result back to the target data type. This ensures compatibility with integer-based computation graphs 
+/// while maintaining the precision of floating-point operations.
+///
+/// @param oldQuantizer A shared pointer to the existing Quantizer node that will be adapted.
+/// @param targetType The target data type to which the final output should be cast after the quantization process.
+/// @param name The name of the meta-operator node created.
+/// @return A shared pointer to a new instance of the modified meta-operator node.
+std::shared_ptr<Node> BitShiftQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name);
+
 /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter.
 /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation.
 /// The meta-operator node must be a PTQ-specific operator, such as a Quantizer or Scaling node.

--- a/python_binding/pybind_PTQ.cpp
+++ b/python_binding/pybind_PTQ.cpp
@@ -93,7 +93,9 @@ void init_PTQ(py::module &m) {
    :type verbose: bool
    )mydelimiter");

-    m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("clipping_mode") = Clipping::MAX , py::arg("no_quantization") = false, py::arg("optimize_signs") = false, py::arg("single_shift") = false,  py::arg("use_cuda") = false, py::arg("verbose") = false,
+    m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), 
+    py::arg("clipping_mode") = Clipping::MAX ,py::arg("target_type") = DataType::Float64 ,py::arg("no_quantization") = true, py::arg("optimize_signs") = false,
+    py::arg("single_shift") = false,  py::arg("use_cuda") = false, py::arg("fold_graph") = true, py::arg("verbose") = false,
    R"mydelimiter(
    Main quantization routine. Performs every step of the quantization pipeline.
    :param network: The GraphView to be quantized.

--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -29,6 +29,8 @@
 #include "aidge/operator/Conv.hpp"
 #include "aidge/operator/ArgMax.hpp"
 #include "aidge/operator/Reshape.hpp"
+#include "aidge/operator/Cast.hpp"
+

 #include "aidge/recipes/Recipes.hpp"
 #include "aidge/recipes/QuantRecipes.hpp"
@@ -195,6 +197,59 @@ static void insertChildren(std::shared_ptr<Node> parent, std::shared_ptr<Node> n

    graphView->add(newNode);
 }
+void applyConstFold(std::shared_ptr<GraphView> &graphView)
+{
+    for (const std::shared_ptr<Node> node : graphView->getNodes())
+    {
+        if (node->type() == "Producer" )
+        {
+            const auto& producer = std::static_pointer_cast<Producer_Op>(node->getOperator());
+            producer->constant() = true;
+        }
+    }
+    constantFolding(graphView);
+}
+
+bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType targetType, bool singleShift)
+{
+    //We need a deepcopy of the graphs nodes since we will replace some nodes
+    std::vector<std::shared_ptr<Node>> nodeVector(graphView->getNodes().begin(), graphView->getNodes().end());
+
+    for (std::shared_ptr<Node> node : nodeVector)
+    {
+        if (node->type() == "Round" && node->attributes()->hasAttr("isProducerRounding"))
+        {
+            std::shared_ptr<Aidge::Node> castNode =  Cast(targetType,node->name() + "_Cast");
+            castNode->getOperator()->setDataType(targetType);
+            castNode->getOperator()->setBackend(node->getOperator()->backend());
+            insertChildren(node,castNode,graphView);
+            castNode->attributes()->addAttr("isProducerCasting",0.0);
+            node->getOperator()->setDataType(DataType::Float64);
+        }
+        else if(node->type() == "Quantizer")
+        {
+            if(singleShift)
+            {
+                std::shared_ptr<Node> newBitShiftQuantizer = BitShiftQuantizer(node,targetType,node->name()+"_BitShift_Quantizer");
+                newBitShiftQuantizer->getOperator()->setBackend(node->getOperator()->backend());
+                graphView->replace({node},{newBitShiftQuantizer});
+
+            }
+            else //If single shift is not enabled we keep using the alternative Int Quantizer (which cast the data before and after the regular Quantizer Operations) 
+            {
+                std::shared_ptr<Node> newIntQuantizer = IntQuantizer(node,targetType,node->name());
+                newIntQuantizer->getOperator()->setBackend(node->getOperator()->backend());
+                graphView->replace({node},{newIntQuantizer});
+            }
+        }
+        else if (node->type() != "Producer" &&
+        !node->attributes()->hasAttr("isProducerScaling")) 
+        {              
+            node->getOperator()->setDataType(targetType);
+        }   
+    }
+    return true;
+}

 bool insertRoundBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphView> graphView)
 {
@@ -1224,11 +1279,19 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
    {
        setupDataType(graphView, inputDataSet, targetType);
    }
+    if(foldGraph)
+    {
+        Log::notice("Applying constant folding recipe to the graph ...");
+        applyConstFold(graphView);
+    }
+    //Mandatory to handle all of the newly added connections!
+    graphView->updateInputsOutputs();
+
    if (verbose)
        printScalingFactors(graphView);

    if (useCuda)
-        graphView->setBackend("cuda");
+       // graphView->setBackend("cuda");

    Log::notice(" Reseting the scheduler ...");
    SequentialScheduler scheduler(graphView);

--- a/src/operator/PTQMetaOps.cpp
+++ b/src/operator/PTQMetaOps.cpp
@@ -18,6 +18,8 @@
 #include "aidge/operator/Clip.hpp"
 #include "aidge/operator/Mul.hpp"
 #include "aidge/operator/Round.hpp"
+#include "aidge/operator/BitShift.hpp"
+#include "aidge/operator/Cast.hpp"

 #include "aidge/graph/Node.hpp"
 #include "aidge/graph/OpArgs.hpp"
@@ -33,7 +35,15 @@

 namespace Aidge
 {
+static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType)
+{
+    std::shared_ptr<Node> mulNode = nullptr;
+    for(std::shared_ptr<Node> node : graphView->getNodes())
+        if (node->type() == nodeType)
+            mulNode = node;

+    return mulNode;
+}
 std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name)
 {
    // create the nodes
@@ -59,15 +69,97 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli

    return metaopNode;
 }
+std::shared_ptr<Node> BitShiftQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name)
+{
+    double scalingFactor = getScalingFactor(oldQuantizer);

-static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType)
+    std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op> (oldQuantizer->getOperator());
+    std::shared_ptr<Node> oldclipNode = getSubNode(metaOp->getMicroGraph(), "Clip");
+
+    if (!oldclipNode) {
+    Log::warn("Invalid PTQ MetaOperator, no Clip found inside node of type {}", oldQuantizer->type());
+        return nullptr;
+    }
+
+    std::shared_ptr<Clip_Op> clipOp = std::static_pointer_cast<Clip_Op>(oldclipNode->getOperator());
+    int shift = std::log2(scalingFactor);
+    BitShift_Op::BitShiftDirection direction = BitShift_Op::BitShiftDirection::left;
+
+    if(shift < 0 )
+    {
+        direction = BitShift_Op::BitShiftDirection::right;
+        shift = -shift;
+    }
+
+    std::shared_ptr<Node> bitShiftNode = BitShift(direction,false,(!name.empty()) ? name + "_MulIQuant" : "");
+    std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_IClipQuant" : "", clipOp->min(), clipOp->max());
+
+    std::shared_ptr<Tensor> bitshiftTensor = std::make_shared<Tensor>(Array1D<int, 1> {shift});
+    std::shared_ptr<Node> bitshiftProducer = addProducer(bitShiftNode, 1, {1}, "ScalingFactor");
+     
+    bitshiftProducer->getOperator()->setOutput(0, bitshiftTensor);
+    bitshiftProducer->attributes()->addAttr("quantization.ptq.ShiftAmount",shift);
+    bitshiftProducer->getOperator()->setDataType(targetType); 
+
+    // connect the scaling factor producer
+
+    bitShiftNode->getOperator()->setDataType(targetType);
+    clipNode->getOperator()->setDataType(targetType);
+    
+    // create the metaop graph
+
+    std::shared_ptr<GraphView> graphView = Sequential({bitShiftNode,clipNode});
+    std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(bitShiftNode); // XXX why not use the graphView ???
+
+    // return the metaop 
+    std::shared_ptr<Node> metaopNode = MetaOperator("BitShiftQuantizer", connectedGraphView, {}, name); // XXX alternative prototype
+
+    return metaopNode; 
+}
+std::shared_ptr<Node> IntQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name)
 {
-    std::shared_ptr<Node> mulNode = nullptr;
-    for(std::shared_ptr<Node> node : graphView->getNodes())
-        if (node->type() == nodeType)
-            mulNode = node;
+    double scalingFactor = getScalingFactor(oldQuantizer);

-    return mulNode;
+    std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op> (oldQuantizer->getOperator());
+    std::shared_ptr<Node> oldclipNode = getSubNode(metaOp->getMicroGraph(), "Clip");
+
+    if (!oldclipNode) {
+    Log::warn("Invalid PTQ MetaOperator, no Clip found inside node of type {}", oldQuantizer->type());
+        return nullptr;
+    }
+    std::shared_ptr<Clip_Op> clipOp = std::static_pointer_cast<Clip_Op>(oldclipNode->getOperator());
+
+    std::shared_ptr<Node> castPreNode =  Cast(DataType::Float64,((!name.empty()) ? name + "_PreCast" : ""));
+
+    std::shared_ptr<Node> mulNode =  Mul((!name.empty()) ? name + "_MulIQuant" : "");
+    std::shared_ptr<Node> roundNode = Round((!name.empty()) ? name + "_IRoundQuant" : "");
+    std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_IClipQuant" : "", clipOp->min(), clipOp->max());
+
+    std::shared_ptr<Node> castPostNode =  Cast(targetType,((!name.empty()) ? name + "_PostCast" : ""));
+
+    // connect the scaling factor producer
+
+    castPreNode->getOperator()->setDataType(DataType::Float64);
+    mulNode->getOperator()->setDataType(DataType::Float64);
+    roundNode->getOperator()->setDataType(DataType::Float64);
+    clipNode->getOperator()->setDataType(DataType::Float64);
+
+    castPostNode->getOperator()->setDataType(targetType);
+
+    std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
+    std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); 
+    scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
+    
+    // create the metaop graph
+
+    std::shared_ptr<GraphView> graphView = Sequential({castPreNode, mulNode, roundNode, clipNode, castPostNode});
+    std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ???
+
+    // return the metaop 
+
+    std::shared_ptr<Node> metaopNode = MetaOperator("IntQuantizer", connectedGraphView, {}, name); // XXX alternative prototype
+
+    return metaopNode; 
 }