From 87f5891aa200535fd5b8744defed0e1eb00bf73c Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Wed, 8 Jan 2025 10:32:06 +0000
Subject: [PATCH 01/26] improve tensor manipulation routines + enhance
 insertCompensationNodes

---
 src/PTQ/CLE.cpp     |  73 +++++++++++++++++------
 src/PTQ/PTQ.cpp     | 137 ++++++++++++++++++++++++++------------------
 src/QAT/QAT_LSQ.cpp |   9 +--
 3 files changed, 138 insertions(+), 81 deletions(-)

diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index 2c81815..0fe9575 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -19,6 +19,12 @@
 #include "aidge/utils/Log.hpp"
 #include "aidge/operator/OperatorTensor.hpp"
 
+#include "aidge/operator/Mul.hpp"
+#include "aidge/operator/ArgMax.hpp"
+#include "aidge/operator/Abs.hpp"
+#include "aidge/operator/Reshape.hpp"
+#include "aidge/operator/Round.hpp"
+
 namespace Aidge
 {
 
@@ -34,27 +40,58 @@ static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node)
 
 static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
 {
-    // Get the tensor data pointer
-    double * castedTensor = static_cast<double *> (tensor->getImpl()->rawPtr());
-
-    // Rescale the tensor
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        castedTensor[i] *= scaling;
+    auto mulOp = Mul_Op();
+    mulOp.setDataType(tensor->dataType());
+    mulOp.setBackend(tensor->backend());
+
+    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling});
+    scalingTensor->setDataType(tensor->dataType());
+    scalingTensor->setBackend(tensor->backend());
+
+    mulOp.associateInput(0, tensor);
+    mulOp.associateInput(1, scalingTensor);
+
+    mulOp.forward();
+    
+    auto outTensor = mulOp.getOutput(0);
+    *tensor = *outTensor;
+    //tensor->copyCast(*outTensor);
 }
 
-static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
+// TODO : make the retreival of argmax values backend independant (refCastFrom)
+static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor)
 {
-    // Get the tensor data pointer and edit it
-    double * castedTensor = static_cast<double*> (tensor->getImpl()->rawPtr());
-
-    // Get the tensor absolute max value
-    double maxValue = 0.0f;
-    for(std::size_t i = 0; i < tensor->size(); ++i) {
-        if(std::fabs(castedTensor[i]) > maxValue) {
-            maxValue = std::fabs(castedTensor[i]);
-        }
-    }
-    return maxValue;
+    // get the abs tensor
+
+    std::shared_ptr<Tensor> absTensor = std::make_shared<Tensor>(tensor->abs());
+
+    // flatten the abs tensor
+
+    std::int64_t nbElement = tensor->size();
+
+    auto reshapeOp = Reshape_Op({nbElement});
+    reshapeOp.setDataType(tensor->dataType());
+    reshapeOp.setBackend(tensor->backend());
+
+    reshapeOp.associateInput(0, absTensor);
+    reshapeOp.forward();
+    std::shared_ptr<Tensor> flatTensor = reshapeOp.getOutput(0);
+
+    // Get the argmax
+
+    auto argmaxOp = ArgMax_Op(0, true, false);
+    argmaxOp.setDataType(tensor->dataType());
+    argmaxOp.setBackend(tensor->backend());
+
+    argmaxOp.associateInput(0, flatTensor);
+    argmaxOp.forward();
+    std::shared_ptr<Tensor> argmaxTensor = argmaxOp.getOutput(0);
+
+    // Return the max
+
+    int maxIndex = std::round(argmaxTensor->get<double>(0));
+
+    return flatTensor->get<double>(maxIndex);
 }
 
 void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta)
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 0e26313..6e0b29e 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -28,6 +28,12 @@
 #include "aidge/operator/BatchNorm.hpp"
 #include "aidge/operator/Conv.hpp"
 
+#include "aidge/operator/ArgMax.hpp"
+#include "aidge/operator/Abs.hpp"
+#include "aidge/operator/Reshape.hpp"
+#include "aidge/operator/Round.hpp"
+
+
 #include "aidge/recipes/Recipes.hpp"
 #include "aidge/recipes/QuantRecipes.hpp"
 
@@ -66,51 +72,75 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView)
     return true;
 }
 
-static void fillTensor(std::shared_ptr<Tensor> tensor, double value)
+static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
 {
-    // Get the tensor data pointer
-    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
+    auto mulOp = Mul_Op();
+    mulOp.setDataType(tensor->dataType());
+    mulOp.setBackend(tensor->backend());
 
-    // Fill the tensor
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        castedTensor[i] = value;
-}
+    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling});
+    scalingTensor->setDataType(tensor->dataType());
+    scalingTensor->setBackend(tensor->backend());
 
-static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
-{
-    // Get the tensor data pointer
-    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
+    mulOp.associateInput(0, tensor);
+    mulOp.associateInput(1, scalingTensor);
 
-    // Rescale the tensor
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        castedTensor[i] *= scaling;
+    mulOp.forward();
+    
+    auto outTensor = mulOp.getOutput(0);
+    *tensor = *outTensor;
 }
 
 static void roundTensor(std::shared_ptr<Tensor> tensor)
 {
-    // Get the tensor data pointer
-    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
+    auto roundOp = Round_Op();
+    roundOp.setDataType(tensor->dataType());
+    roundOp.setBackend(tensor->backend());
 
-    // Rescale the tensor
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        castedTensor[i] = std::nearbyint(castedTensor[i]);//Round
+    roundOp.associateInput(0, tensor);
+    roundOp.forward();
+    
+    auto outTensor = roundOp.getOutput(0);
+    *tensor = *outTensor;
 }
 
-static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
+// TODO : make the retreival of argmax values backend independant (refCastFrom)
+static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor)
 {
-    // Get the tensor data pointer and edit it
-    double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr());
-
-    // Get the tensor absolute max value
-    double maxValue = 0.0f;
-    for(std::size_t i = 0; i < tensor->size(); ++i) {
-        if(std::fabs(castedTensor[i]) > maxValue) {
-            maxValue = std::fabs(castedTensor[i]);
-        }
-    }
-    return maxValue;
+    // get the abs tensor
+
+    std::shared_ptr<Tensor> absTensor = std::make_shared<Tensor>(tensor->abs());
+
+    // flatten the abs tensor
+
+    std::int64_t nbElement = tensor->size();
+
+    auto reshapeOp = Reshape_Op({nbElement});
+    reshapeOp.setDataType(tensor->dataType());
+    reshapeOp.setBackend(tensor->backend());
+
+    reshapeOp.associateInput(0, absTensor);
+    reshapeOp.forward();
+    std::shared_ptr<Tensor> flatTensor = reshapeOp.getOutput(0);
+
+    // Get the argmax
+
+    auto argmaxOp = ArgMax_Op(0, true, false);
+    argmaxOp.setDataType(tensor->dataType());
+    argmaxOp.setBackend(tensor->backend());
+
+    argmaxOp.associateInput(0, flatTensor);
+    argmaxOp.forward();
+    std::shared_ptr<Tensor> argmaxTensor = argmaxOp.getOutput(0);
+
+    // Return the max
+
+    int maxIndex = std::round(argmaxTensor->get<double>(0));
+
+    return flatTensor->get<double>(maxIndex);
 }
 
+
 // TODO : pass nodeVector by reference ...
 static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::shared_ptr<Node>> nodeVector, std::string nodeType)
 {
@@ -876,50 +906,42 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
-        // A merging node is always followed by a scaling node at this point ...
+        // A merging node is always followed by a Quantizer node at this point
 
         if (node->type() == "Quantizer")
         {   
+            // check if the Quantizer is a residual one, and insert a compensation node if so ...
+
             bool prevNodeIsForking = ((node->getParent(0))->getChildren().size() > 1);
             bool prevNodeIsAffine = isAffine(node->getParent(0));
             bool insertNode = prevNodeIsForking || !prevNodeIsAffine;
 
             if (insertNode)
             {
-                // create and insert the multplicative node
+                // create and insert the multplicative node before the Quantizer
 
                 std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView);
                 std::shared_ptr<Node> mulNode = Mul(mulNodeName);
-
                 mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
                 mulNode->getOperator()->setBackend("cpu");
 
                 graphView->insertParent(node, mulNode, 0, 0, 0);
 
-                // create and insert the producer node
-
-                std::shared_ptr<Tensor> inputTensor = std::static_pointer_cast<Tensor> (mulNode->getOperator()->getRawInput(0));
-                std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>();
+                // Add the coeff producer to the multiplier node
 
-                coeffTensor->setDataType(DataType::Float64); // getDataType(parentNode)
-                coeffTensor->setBackend("cpu"); 
+                std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); 
+                std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax});
+                coeffProducer->getOperator()->setOutput(0, coeffTensor);
 
-                coeffTensor->resize(inputTensor->dims());
-                fillTensor(coeffTensor, 1); 
+                coeffProducer->getOperator()->setDataType(DataType::Float64);
+                coeffProducer->getOperator()->setBackend("cpu"); 
 
-                std::shared_ptr<Node> producerNode = Producer(coeffTensor, makeUniqueName("coeff", graphView));
-                producerNode->addChild(mulNode);
-                graphView->add(producerNode);
+                graphView->add(coeffProducer); // needed ?
 
-                // rescale the coeffs and edit scaling factor
+                // Adapt the scaling factor value accordingly
 
-                fillTensor(coeffTensor, signedMax);
-
-                double currScalingFactor = getScalingFactor(node); // XXX bad naming !
+                double currScalingFactor = getScalingFactor(node); 
                 updateScalingFactor(node, currScalingFactor / signedMax);
-
-                // TODO : double check this !!!
-                //std::cout << getTensorAbsoluteMax(coeffTensor) << std::endl;
             }
         }
     }
@@ -931,7 +953,8 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
-        // Use A meatoperator of type Scaling of MulCompensation instead
+        // TODO : use Compensation nodes instead of Mul nodes
+
         if (isAffine(node) || (node->type() == "Mul"))
         {
             std::shared_ptr<Node> scalingNode = (*node->getChildren().begin());
@@ -940,7 +963,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
 
             double approx = std::pow(2, std::ceil(std::log2(base)));
 
-            updateScalingFactor(scalingNode,approx);
+            updateScalingFactor(scalingNode, approx);
 
             double ratio = base / approx;
 
@@ -954,7 +977,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
                 std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
                 rescaleTensor(biasTensor, ratio);
                 if (!noQuant)
-                roundTensor(biasTensor);
+                    roundTensor(biasTensor);
             }
         }
     }
@@ -1058,8 +1081,8 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     if (useCuda)
         graphView->setBackend("cuda");
 
-    //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
-    //printScalingFactors(graphView);
+    std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
+    printScalingFactors(graphView);
 
     Log::info(" Reseting the scheduler ...");
     SequentialScheduler scheduler(graphView);
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index 9b51e84..a09dbb2 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -89,19 +89,16 @@ void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbB
 static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
 {
     auto backend = tensor->backend();
+
     if (backend == "cuda")
         tensor->setBackend("cpu");
 
-    float acc = 0;
-    float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr());
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        acc += std::abs(castedTensor[i]);
-    acc /= static_cast<float> (tensor->size());
+    float value = (*tensor).abs().mean().get<float>(0);
 
     if (backend == "cuda")
         tensor->setBackend("cuda");
 
-    return acc;
+    return value;
 }
 
 static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda)
-- 
GitLab


From 261345f10db68b69077bef647fd645196c18baf3 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Wed, 8 Jan 2025 10:37:27 +0000
Subject: [PATCH 02/26] comment verbose

---
 src/PTQ/PTQ.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 6e0b29e..7f750f0 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -1081,8 +1081,8 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     if (useCuda)
         graphView->setBackend("cuda");
 
-    std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
-    printScalingFactors(graphView);
+    //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
+    //printScalingFactors(graphView);
 
     Log::info(" Reseting the scheduler ...");
     SequentialScheduler scheduler(graphView);
-- 
GitLab


From 227a9c7e575656ffc7094c0b4e66a42c931d54ee Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Wed, 8 Jan 2025 16:27:21 +0000
Subject: [PATCH 03/26] minor change

---
 src/PTQ/PTQ.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 7f750f0..3677ae0 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -215,6 +215,8 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView)
 {
     removeFlatten(graphView);
 
+    sanitizeNodeNames(graphView);
+
     bool containsBatchNorm = false;
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
@@ -1078,6 +1080,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     //printScalingFactors(graphView);
 
     setupDataType(graphView, inputDataSet, initialDataType);
+
     if (useCuda)
         graphView->setBackend("cuda");
 
-- 
GitLab


From 9998b41f2a26ef738e1fbb829540b6c36dd2a0d3 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Mon, 13 Jan 2025 13:01:34 +0000
Subject: [PATCH 04/26] rework the LSQ code

---
 include/aidge/quantization/QAT/QAT_LSQ.hpp |  18 +-
 python_binding/pybind_QAT_LSQ.cpp          |   5 +-
 src/QAT/QAT_LSQ.cpp                        | 204 +++++++--------------
 3 files changed, 77 insertions(+), 150 deletions(-)

diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp
index 4970be0..d7d03ca 100644
--- a/include/aidge/quantization/QAT/QAT_LSQ.hpp
+++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp
@@ -20,22 +20,14 @@ namespace Aidge {
 namespace QuantLSQ {
 
 /**
- * @brief Insert the LSQ quantizer nodes in a given GraphView
- * @param graphView The GraphView containing the graph to quantize.
+ * @brief Given a GraphView with parameters properly initialized, insert
+ * the LSQ quantizer nodes, and setup the adjustment their step-sizes.
+ * @param graphView The GraphView containing the network to quantize.
  * @param nbBits Number of quantization bits.
- * @param span Fixed output span of the quantizers.
  */
-void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float step_size);
+void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits);
 
-/**
- * @brief Given a GraphView with parameters properly initialized and some calibration data,
- * insert the LSQ quantizer nodes, and adjust their step-sizes.
- * @param graphView The GraphView containing the graph to quantize.
- * @param nbBits Number of quantization bits.
- * @param calibrationData Calibration data used to adjust the spans.
- * @param scale Multiplicative constant applied to the spans.
- */
-void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData);
+void devLSQ(std::shared_ptr<Tensor> tensor);
 
 }
 }
diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp
index 206985e..0b9fcc2 100644
--- a/python_binding/pybind_QAT_LSQ.cpp
+++ b/python_binding/pybind_QAT_LSQ.cpp
@@ -23,8 +23,9 @@ void init_QAT_LSQ(py::module &m) {
 
     auto mQuantLSQ = m.def_submodule("lsq");
 
-    mQuantLSQ.def("insert_quantizers", &QuantLSQ::insertQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("step_size"));
+    mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits"));
+
+    mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor"));
 
-    mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data"));
 }
 } // namespace Aidge
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index a09dbb2..04f2027 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -23,7 +23,42 @@
 
 namespace Aidge {
 
-void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize)
+static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
+{
+    auto valueTensor = (*tensor).abs().mean();
+    std::shared_ptr<Tensor> fallback;
+    const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu");
+    return localTensor.get<float>(0);
+}
+
+// INIT THE STEP SIZE OF A QUANTIZER NODE
+
+static bool initStepSize(std::shared_ptr<Node> quantizer)
+{
+    const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator());
+
+    float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0));
+
+    float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second));
+
+    auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
+
+    // XXX Manage backend here ?
+    stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend());
+    stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType());
+
+    auto stepSizeProducer = quantizer->getParent(1);
+
+    stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor);
+
+    std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl;
+
+    return false;
+}
+
+// INPUT QUANTIZERS INSERTION
+
+static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
     const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
 
@@ -34,177 +69,76 @@ void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbB
         std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
         std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1};
 
-        // INPUT QUANTIZERS INSERTION
+        // Create the input quantizer node
 
-        // TODO : double check this, and use createUniqueName()
-        auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView);  
-        auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName);
+        auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView);  
+        auto quantizerNode = LSQ(signedRange, quantizerName);
 
-        // Set the step size
+        // Init the step-size using the node call stack
 
-        auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator();
-        auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        inputStepSizeOp->setOutput(0, inputStepSizeTensor);
+        quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); });
 
         // Absorb the ReLU when possible ...
 
-        // XXX is this safe ???
-        bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); 
-        // bool nodeHasParent = (linearNode->getParents().size() != 0);
+        bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]);  // XXX is this safe ?
 
         if (nodeHasParent) {
             auto parentNode = linearNode->getParents()[0];
             if (parentNode->type() == "ReLU") {
-                auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator());
-                inputQuantizerOp->range() = unsignedRange;
+                auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator());
+                quantizerOp->range() = unsignedRange;
                 graphView->replace({parentNode}, {}); 
             }
         }
 
-        // We need to handle the case where the linear node is the first one ...
+        // Insert the quantizer in the graphView ...
+        // (We need to handle the case where the linear node is the first one)
 
         if (nodeHasParent) {
-            graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0);
+            graphView->insertParent(linearNode, quantizerNode, 0, 0, 0);
         } else {
-            inputQuantizerNode->addChild(graphView);
-            graphView->add(inputQuantizerNode);
+            quantizerNode->addChild(graphView);
+            graphView->add(quantizerNode);
         }
-
-        // PARAM QUANTIZERS INSERTION
-
-        // TODO : double check this, and use createUniqueName()
-        auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);  
-        auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); 
-        graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0);
-
-        // Set the step size
-
-        auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator();
-        auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        paramStepSizeOp->setOutput(0, paramStepSizeTensor);
     }
-
 }
 
-static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
-{
-    auto backend = tensor->backend();
-
-    if (backend == "cuda")
-        tensor->setBackend("cpu");
-
-    float value = (*tensor).abs().mean().get<float>(0);
-
-    if (backend == "cuda")
-        tensor->setBackend("cuda");
-
-    return value;
-}
+// PARAM QUANTIZERS INSERTION
 
-static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda)
+static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
-    // Propagate the calibration tensor
+    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
 
-    SequentialScheduler scheduler(graphView);
-    scheduler.resetScheduling();
-    scheduler.forward(true, {calibrationData});
+    std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
 
-    // Store the input tensor statistics
+    for (const auto& match : matches) 
+    {       
+        auto linearNode = match.graph->rootNode(); 
 
-    if (useCuda)
-        graphView->setBackend("cpu"); 
+        // TODO : double check this, and use createUniqueName()
+        auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);  
+        auto quantizerNode = LSQ(signedRange, quantizerName); 
 
-    std::map<std::string, float> inputStats;
-    for (auto node : graphView->getNodes())
-    {
-        if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
-        {
-            const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
-            float inputAbsMean = getTensorAbsMean(op->getInput(0));
-            inputStats.insert(std::make_pair(node->name(), inputAbsMean));
-            fmt::println("{} -> {}", node->name(), inputAbsMean);
-        }
-    }
+        // Init the step-size using the node call stack
 
-    if (useCuda)
-        graphView->setBackend("cuda");
+        quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); });
 
-    return inputStats;
-}
+        // Insert the quantizer in the graphView
 
-static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda)
-{
-    if (useCuda)
-        graphView->setBackend("cpu");
-
-    std::map<std::string, float> paramStats;
-    for (auto node : graphView->getNodes())
-    {
-        if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
-        {
-            const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
-            float paramAbsMean = getTensorAbsMean(op->getInput(1));
-            paramStats.insert(std::make_pair(node->name(), paramAbsMean));
-            fmt::println("{} -> {}", node->name(), paramAbsMean);
-        }
+        graphView->insertParent(linearNode, quantizerNode, 1, 0, 0);
     }
-    
-    if (useCuda)
-        graphView->setBackend("cuda");
-
-    return paramStats;
 }
 
-static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats)
+void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
-    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
-
-    for (const auto& match : matches) 
-    {
-        auto linearNode = match.graph->rootNode();
-
-        // INPUT QUANTIZERS STEP-SIZES
-
-        auto inputQuantNode = linearNode->getParent(0);
-        auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator());
-
-        float absMean = inputStats[linearNode->name()];
-        float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second));
-
-        auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator();
-        // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
-        auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        inputStepSizeOp->setOutput(0, inputStepSizeTensor);
-
-        // PARAM QUANTIZERS STEP-SIZES
-
-        auto paramQuantNode = linearNode->getParent(1);
-        auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator());
-
-        absMean = paramStats[linearNode->name()];
-        stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second));
-
-        auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator();
-        // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
-        auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        paramStepSizeOp->setOutput(0, paramStepSizeTensor);
-    }
+    setupInputQuantizers(graphView, nbBits);
+    setupParamQuantizers(graphView, nbBits);
 }
 
-void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData)
+void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor)
 {
-    bool useCuda = (calibrationData->backend() == "cuda");
-
-    // Collect the tensor statisics
-    auto inputStats = collectInputStats(graphView, calibrationData, useCuda);
-
-    auto paramStats = collectParamStats(graphView, useCuda);
-
-    // Insert the quantizers
-    insertQuantizers(graphView, nbBits, 1.0);
-
-    // Adjust the quantizers step-sizes
-    adjustQuantizersStepSizes(graphView, inputStats, paramStats);
+    float mean = (tensor->mean()).get<float> (0);
+    std::cout << " MEAN  = " << mean << std::endl;
 }
 
 }
\ No newline at end of file
-- 
GitLab


From 4f1169676c6d3845d35416a4e3f0e3e98e7d9700 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Wed, 8 Jan 2025 16:07:59 +0000
Subject: [PATCH 05/26] Adding the isScaling tag in the PTQ pipeline in order
 to replace the previous and now deprecated Scaling Metaoperator

---
 aidge_quantization/_version.py                |  4 +
 include/aidge/quantization/PTQ/PTQMetaOps.hpp | 14 +--
 include/aidge/quantization_version.h          |  6 +-
 python_binding/pybind_PTQ.cpp                 |  9 ++
 src/PTQ/Clipping.cpp                          |  2 +-
 src/PTQ/PTQ.cpp                               | 88 ++++++++++++-------
 src/PTQ/PTQMetaOps.cpp                        | 39 ++++----
 7 files changed, 101 insertions(+), 61 deletions(-)
 create mode 100644 aidge_quantization/_version.py

diff --git a/aidge_quantization/_version.py b/aidge_quantization/_version.py
new file mode 100644
index 0000000..d4ec20e
--- /dev/null
+++ b/aidge_quantization/_version.py
@@ -0,0 +1,4 @@
+# file generated by setuptools_scm
+# don't change, don't track in version control
+__version__ = version = '0.2.1.dev60+g8044e79.d20250106'
+__version_tuple__ = version_tuple = (0, 2, 1, 'dev60', 'g8044e79.d20250106')
diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/quantization/PTQ/PTQMetaOps.hpp
index 62fac87..a8028c6 100644
--- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp
+++ b/include/aidge/quantization/PTQ/PTQMetaOps.hpp
@@ -37,13 +37,13 @@ namespace Aidge {
 /// @return A shared pointer to an instance of the meta-operator node.
 std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name);
 
-/// @brief The purpose of Scaling is to encapsulate the Mul operator and tag it as a PTQ node rather than a regular Mul operator.
-/// Therefore, this meta-operator consists solely of a [Mul] operation.
-///
-/// @param scalingFactor The scaling factor to apply to the input (a scalar to multiply the input with).
-/// @param name The name of the meta-operator node created.
-/// @return A shared pointer to an instance of the scaling node.
-std::shared_ptr<Aidge::Node> Scaling(double scalingFactor, const std::string& name = "");
+/// @brief Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. 
+/// This function multiplies the existing scaling factor by a given coefficient. It verifies that the node is of the correct type ("Mul") 
+/// and has the `isScaling` attribute. If these conditions are not met, a warning is logged.
+/// @param node A shared pointer to an `Aidge::Node` object representing the node to modify.
+/// @param coeff  A double representing the multiplication coefficient to apply to the scaling factor.
+void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff);
+
 
 /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter.
 /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation.
diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h
index 546263a..f14a045 100644
--- a/include/aidge/quantization_version.h
+++ b/include/aidge/quantization_version.h
@@ -3,9 +3,9 @@
 
 namespace Aidge {
 static constexpr const int PROJECT_VERSION_MAJOR = 0;
-static constexpr const int PROJECT_VERSION_MINOR = 2;
+static constexpr const int PROJECT_VERSION_MINOR = 3;
 static constexpr const int PROJECT_VERSION_PATCH = 0;
-static constexpr const char * PROJECT_VERSION = "0.2.0";
-static constexpr const char * PROJECT_GIT_HASH = "f50c860";
+static constexpr const char * PROJECT_VERSION = "0.3.0";
+static constexpr const char * PROJECT_GIT_HASH = "8c89214";
 }
 #endif // VERSION_H
diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp
index b5193bd..7f7c57d 100644
--- a/python_binding/pybind_PTQ.cpp
+++ b/python_binding/pybind_PTQ.cpp
@@ -17,6 +17,7 @@
 #include "aidge/quantization/PTQ/Clipping.hpp"
 #include "aidge/quantization/PTQ/CLE.hpp"
 #include "aidge/quantization/PTQ/PTQ.hpp"
+#include "aidge/quantization/PTQ/PTQMetaOps.hpp"
 
 #include "aidge/graph/GraphView.hpp"
 
@@ -48,6 +49,14 @@ void init_PTQ(py::module &m) {
     :type network: :py:class:`aidge_core.GraphView`
     )mydelimiter");
 
+    m.def( "multiply_scaling_factor",&multiplyScalingFactor,py::arg("node"), py::arg("coeff")
+     R"mydelimiter(
+    Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. This function multiplies the existing scaling factor by a given coefficient.
+    :param node: A node representing the node to modify.
+    :param coeff: A floating value representing the multiplication coefficient to apply to the scaling factor.
+    )mydelimiter"
+    );
+    
     m.def("normalize_parameters", &normalizeParameters, py::arg("network"),
     R"mydelimiter(
     Normalize the parameters of each parametrized node, so that they fit in the [-1:1] range.
diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp
index 57ad7a8..1901e38 100644
--- a/src/PTQ/Clipping.cpp
+++ b/src/PTQ/Clipping.cpp
@@ -222,7 +222,7 @@ std::map<std::string, double> adjustRanges(Clipping clippingMode, std::map<std::
 
         for (std::shared_ptr<Node> node : graphView->getNodes())
         {
-            if (node->type() == "Scaling")
+            if (node->attributes()->hasAttr("isScaling"))
             {
                 std::vector<int> histogram = histograms[node->name()];
 
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 3677ae0..2d431f6 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -264,12 +264,19 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView)
                     Log::info(" ### inserting multiplicative node ...");
 
                     std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView);
-                    std::shared_ptr<Node> residualNode = Scaling(1.0, residualNodeName);
+                    std::shared_ptr<Node> residualNode = Mul(residualNodeName);
+                    residualNode->attributes()->addAttr("isScaling", 0.0);
+                    
+                    //Adding the SF as a producer of the node
+                    std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {1.0});
+                    std::shared_ptr<Node> scalingFactorProducer = addProducer(residualNode, 1, {1}, "ScalingFactor"); 
+                    scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
 
-                    residualNode->getOperator()->setDataType(DataType::Float64); //getDataType(parentNode)
+                    residualNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
                     residualNode->getOperator()->setBackend("cpu");
 
                     graphView->insertParent(node, residualNode, i, 0, 0);
+                    graphView->add(scalingFactorProducer);
                 }
             }
         }
@@ -295,7 +302,16 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
         if (isAffine(parentNode) || isMerging(parentNode))
         {
             std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView);
-            std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName);
+            //std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName);
+            
+            //Adding Mul operator with tag "isScaling"
+            std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName);
+            scalingNode->attributes()->addAttr("isScaling",0.0);
+
+            //Adding the SF as a producer of the node
+            std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {1.0});
+            std::shared_ptr<Node> scalingFactorProducer = addProducer(scalingNode, 1, {1}, "ScalingFactor"); 
+            scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
 
             scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
             scalingNode->getOperator()->setBackend("cpu");
@@ -320,12 +336,14 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
                 for (std::size_t i = 0; i < nextNodes.size(); i++)
                     scalingNode->addChild(nextNodes[i], 0, inputIndices[i]);
 
+                graphView->add(scalingFactorProducer);
                 graphView->add(scalingNode);
             }
             else
             {
                 // Log::info(" last node reached ! ");
                 parentNode->addChild(scalingNode, 0, 0);
+                graphView->add(scalingFactorProducer);
                 graphView->add(scalingNode);
             }
         }
@@ -335,7 +353,7 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
 static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> mergingNode)
 {
     std::shared_ptr<Node> currNode = mergingNode;
-    while(currNode->type() != "Scaling")
+    while(!currNode->attributes()->hasAttr("isScaling"))
     {
         if (currNode->getParents().size() == 0)
         {
@@ -378,7 +396,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
     for (std::shared_ptr<Node> node : nodeVector)
     {
         // Scaling nodes still have a ratio of 1, so they are seamless ...
-        if (node->type() == "ReLU" || node->type() == "Scaling" || isSeamless(node))
+        if (node->type() == "ReLU" || node->attributes()->hasAttr("isScaling") || isSeamless(node))
         {
             if (node != firstNode)
             {
@@ -439,8 +457,9 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
 
                 std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode);
 
-                double currScalingFactor = getScalingFactor(scalingNode);
-                updateScalingFactor(scalingNode, currScalingFactor / rescaling);
+                //double currScalingFactor = getScalingFactor(scalingNode);
+                //updateScalingFactor(scalingNode, currScalingFactor / rescaling);
+                multiplyScalingFactor(scalingNode,1/rescaling);
 
                 accumulatedRatios[mergingNode->name()] /= rescaling; // optional ...
             }
@@ -465,7 +484,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView
     std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
     for (std::shared_ptr<Node> node : nodeSet)
     {
-        if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer")))
+        if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer")))
         {
             std::shared_ptr<Operator> nodeOperator = node->getOperator();
             std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0));
@@ -487,7 +506,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView
     // std::shared_ptr<Node> inputNode = getFirstNode(graphView);
 
     for (std::shared_ptr<Node> node : nodeSet)
-        if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer")))
+        if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer")))
             valueRanges.insert(std::make_pair(node->name(), 0));
 
     if (useCuda)
@@ -514,7 +533,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView
         std::map<std::string, double> sampleRanges;
         for (std::shared_ptr<Node> node : nodeSet)
         {
-            if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer")))
+            if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer")))
             {
                 std::shared_ptr<Operator> nodeOperator = node->getOperator();
                 std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0));
@@ -536,7 +555,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView
 
         for (std::shared_ptr<Node> node : nodeSet)
         {
-            if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer")))
+            if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer")))
                 {
                     std::string nodeName = node->name();
                     if (sampleRanges[nodeName] > valueRanges[nodeName])
@@ -589,7 +608,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
         // Here prevNode is either a 'Affine' or a 'Merging'
         // => do not split the cases, just handle the bias ...
 
-        if (node->type() == "Scaling") 
+        if (node->attributes()->hasAttr("isScaling")) 
         {
             // retrieve the previous scaling factor ...
             std::shared_ptr<Node> prevNode = node->getParent(0);
@@ -598,8 +617,9 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
             // ValueRanges must contains all the scaling nodes !!!
             double scalingFactor = valueRanges[node->name()]; 
 
-            double currScalingFactor = getScalingFactor(node);
-            updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor));
+            //double currScalingFactor = getScalingFactor(node);
+            //updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor));
+            multiplyScalingFactor(node,1/(scalingFactor / prevScalingFactor));
 
             scalingFactors[node->name()] = scalingFactor;
 
@@ -642,8 +662,9 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
                 std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode);
                 //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name());
 
-                double currScalingFactor = getScalingFactor(scalingNode);
-                updateScalingFactor(scalingNode, currScalingFactor * rescaling);                
+                //double currScalingFactor = getScalingFactor(scalingNode);
+                //updateScalingFactor(scalingNode, currScalingFactor * rescaling);    
+                multiplyScalingFactor(scalingNode,rescaling) ;          
             }
         }
     }
@@ -679,7 +700,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap
             signMap[node->name()].second = false;
         } 
 
-        if (node->type() == "Scaling") 
+        if (node->attributes()->hasAttr("isScaling")) 
         {
             signMap[node->name()].second = false;
 
@@ -726,7 +747,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap
                 // Arbitration : Signed type wins !
                 for(std::shared_ptr<Node> parent : parentNodes)
                 {
-                    while (parent->type() != "Scaling")
+                    while (!parent->attributes()->hasAttr("isScaling"))
                     {
                         signMap[parent->name()] = std::make_pair(false, false);
                         // We are on a branch so nodes always have 1 parent ...
@@ -842,8 +863,9 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
             
             std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ...
 
-            double currScalingFactor = getScalingFactor(scalingNode);
-            updateScalingFactor(scalingNode, currScalingFactor * rescaling);
+           // double currScalingFactor = getScalingFactor(scalingNode);
+           // updateScalingFactor(scalingNode, currScalingFactor * rescaling);
+            multiplyScalingFactor(scalingNode,rescaling) ;          
         }
         
         if (isMerging(node))
@@ -858,23 +880,27 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
 
             std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ...
         
-            double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming
-            updateScalingFactor(scalingNode, currScalingFactor * rescaling);
+           // double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming
+           // updateScalingFactor(scalingNode, currScalingFactor * rescaling);
+            multiplyScalingFactor(scalingNode,rescaling) ;          
         }
         
         // Handle the Scaling Nodes ...
 
-        if (node->type() == "Scaling")
+        if (node->attributes()->hasAttr("isScaling"))
         {
             if (!noQuant) 
             {  
                 // Replace  the  Scaling Node by Quantizer
+                auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1);
+                std::shared_ptr<Tensor> fallback;
+                const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
+                double old_sf = localTensor.get<double>(0);//!\\ 
 
-                std::shared_ptr<Node> quantizerNode = Quantizer(getScalingFactor(node), -(signedMax + 1), signedMax, node->name());
+                std::shared_ptr<Node> quantizerNode = Quantizer(old_sf, -(signedMax + 1), signedMax, node->name());
                 quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
                 quantizerNode->getOperator()->setBackend("cpu");
-
-                graphView->replace({node}, {quantizerNode});
+                graphView->replace({node,node->getParent(1)}, {quantizerNode});
 
                 if (optimizeSigns)
                 {
@@ -888,6 +914,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
 
                     double currScalingFactor = getScalingFactor(quantizerNode);
                     updateScalingFactor(quantizerNode, currScalingFactor * rescaling);
+                    
 
                     if(outputIsUnsigned)
                     {
@@ -965,7 +992,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
 
             double approx = std::pow(2, std::ceil(std::log2(base)));
 
-            updateScalingFactor(scalingNode, approx);
+            updateScalingFactor(scalingNode,approx);
 
             double ratio = base / approx;
 
@@ -989,7 +1016,7 @@ static void printScalingFactors(std::shared_ptr<GraphView> graphView)
 {
     Log::info(" === SCALING FACTORS === ");
     for (auto node : retrieveNodeVector(graphView))
-        if (node->type() == "Scaling" || node->type() == "Quantizer")
+        if (node->attributes()->hasAttr("isScaling") || node->type() == "Quantizer")
         {
             double scalingFactor = getScalingFactor(node);
             Log::info(" {:.6f} ({})", scalingFactor, node->name());
@@ -1019,8 +1046,8 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri
 
     auto scheduling = scheduler.getStaticScheduling();
     for (auto node : scheduling)
-        if (node->type() == "Scaling")
-            fmt::println("{} range = {}", node->name(), valueRanges[node->name()]);
+        if (node->attributes()->hasAttr("isScaling"))
+            std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl;
 }
 
 void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose)
@@ -1042,7 +1069,6 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     insertScalingNodes(graphView);
 
     crossLayerEqualization(graphView);
-
     Log::info(" Normalizing the parameters ...");
     normalizeParameters(graphView);
 
diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp
index 527d853..4c17f9b 100644
--- a/src/PTQ/PTQMetaOps.cpp
+++ b/src/PTQ/PTQMetaOps.cpp
@@ -61,23 +61,6 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli
     return metaopNode; 
 }
 
-std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name)
-{
-    std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
-
-    std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_Scaling" : "");
-
-    std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); 
-    scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
-
-    std::shared_ptr<GraphView> graphView  = Sequential({mulNode});
-    std::shared_ptr<GraphView> connectedGraphView  = getConnectedGraphView(mulNode);
-
-    NodePtr metaopNode = MetaOperator("Scaling", connectedGraphView, {}, name);
-
-    return metaopNode;
-}
-
 static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType)
 {
     std::shared_ptr<Node> mulNode = nullptr;
@@ -88,9 +71,27 @@ static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, st
     return mulNode;
 }
 
+void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff)
+{
+    if(node->type() == "Mul" && node->attributes()->hasAttr("isScaling"))
+    {
+        auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1);
+        std::shared_ptr<Tensor> fallback;
+        const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
+        double previousScalingFactor = localTensor.get<double>(0);
+        std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff});
+        node->input(1).first->getOperator()->setOutput(0, finalTensor);
+    }
+    else
+    {
+        Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type());
+    }
+}
+
+
 void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor)
 {
-    if(metaOpNode->type() != "Scaling" && metaOpNode->type() != "Quantizer")
+    if(metaOpNode->type() != "Quantizer")
         Log::warn(" Cannot update the scaling factor on Node of type {}", metaOpNode->type());
 
     std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
@@ -107,7 +108,7 @@ void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor)
 
 double getScalingFactor(std::shared_ptr<Node> MetaOpNode)
 {
-    if (MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") {
+    if (MetaOpNode->type() != "Quantizer") {
         Log::warn(" Cannot get the scaling factor on Node of type {}", MetaOpNode->type());
         return 0;
     }
-- 
GitLab


From a98dbceaad16441d7449022992f3885332e7aaf4 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Mon, 13 Jan 2025 15:43:30 +0000
Subject: [PATCH 06/26] Refactoring Scaling Metaop deletions by removing old
 getScalingFactor and updateScalingFactor; Adding clear tag isCompensation for
 Mul used as compensations nodes

---
 .../PTQ => operator}/PTQMetaOps.hpp           |  8 ----
 include/aidge/quantization/PTQ/PTQ.hpp        |  8 ++++
 include/aidge/quantization_version.h          |  2 +-
 python_binding/pybind_PTQ.cpp                 |  2 +-
 src/PTQ/PTQ.cpp                               | 37 +++++++++++--------
 src/{PTQ => operator}/PTQMetaOps.cpp          | 18 +--------
 6 files changed, 33 insertions(+), 42 deletions(-)
 rename include/aidge/{quantization/PTQ => operator}/PTQMetaOps.hpp (86%)
 rename src/{PTQ => operator}/PTQMetaOps.cpp (84%)

diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/operator/PTQMetaOps.hpp
similarity index 86%
rename from include/aidge/quantization/PTQ/PTQMetaOps.hpp
rename to include/aidge/operator/PTQMetaOps.hpp
index a8028c6..22fb71e 100644
--- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp
+++ b/include/aidge/operator/PTQMetaOps.hpp
@@ -37,14 +37,6 @@ namespace Aidge {
 /// @return A shared pointer to an instance of the meta-operator node.
 std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name);
 
-/// @brief Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. 
-/// This function multiplies the existing scaling factor by a given coefficient. It verifies that the node is of the correct type ("Mul") 
-/// and has the `isScaling` attribute. If these conditions are not met, a warning is logged.
-/// @param node A shared pointer to an `Aidge::Node` object representing the node to modify.
-/// @param coeff  A double representing the multiplication coefficient to apply to the scaling factor.
-void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff);
-
-
 /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter.
 /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation.
 /// The meta-operator node must be a PTQ-specific operator, such as a Quantizer or Scaling node.
diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp
index d2b8b7f..e7cbddd 100644
--- a/include/aidge/quantization/PTQ/PTQ.hpp
+++ b/include/aidge/quantization/PTQ/PTQ.hpp
@@ -74,6 +74,14 @@ namespace Aidge {
      */
     bool checkArchitecture(std::shared_ptr<GraphView> graphView);
 
+    /**
+     * @brief This function multiplies the existing scaling factor by a given coefficient. It verifies that the node is of the correct type ("Mul") 
+     * and has the `isScaling` attribute. If these conditions are not met, a warning is logged.
+     * @param node A shared pointer to an `Aidge::Node` object representing the node to modify.
+     * @param coeff  A double representing the multiplication coefficient to apply to the scaling factor.
+     */
+    void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff);
+
 
     void prepareNetwork(std::shared_ptr<GraphView> graphView);
 
diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h
index f14a045..740621a 100644
--- a/include/aidge/quantization_version.h
+++ b/include/aidge/quantization_version.h
@@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0;
 static constexpr const int PROJECT_VERSION_MINOR = 3;
 static constexpr const int PROJECT_VERSION_PATCH = 0;
 static constexpr const char * PROJECT_VERSION = "0.3.0";
-static constexpr const char * PROJECT_GIT_HASH = "8c89214";
+static constexpr const char * PROJECT_GIT_HASH = "b4af1ce";
 }
 #endif // VERSION_H
diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp
index 7f7c57d..2c25dc6 100644
--- a/python_binding/pybind_PTQ.cpp
+++ b/python_binding/pybind_PTQ.cpp
@@ -49,7 +49,7 @@ void init_PTQ(py::module &m) {
     :type network: :py:class:`aidge_core.GraphView`
     )mydelimiter");
 
-    m.def( "multiply_scaling_factor",&multiplyScalingFactor,py::arg("node"), py::arg("coeff")
+    m.def( "multiply_scaling_factor",&multiplyScalingFactor,py::arg("node"), py::arg("coeff"),
      R"mydelimiter(
     Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. This function multiplies the existing scaling factor by a given coefficient.
     :param node: A node representing the node to modify.
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 2d431f6..23d9f01 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -12,7 +12,7 @@
 #include "aidge/quantization/PTQ/CLE.hpp"
 #include "aidge/quantization/PTQ/Clipping.hpp"
 #include "aidge/quantization/PTQ/PTQ.hpp"
-#include "aidge/quantization/PTQ/PTQMetaOps.hpp"
+#include "aidge/operator/PTQMetaOps.hpp"
 
 
 #include "aidge/data/Tensor.hpp"
@@ -72,6 +72,23 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView)
     return true;
 }
 
+void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff)
+{
+    if(node->type() == "Mul" && node->attributes()->hasAttr("isScaling"))
+    {
+        auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1);
+        std::shared_ptr<Tensor> fallback;
+        const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
+        double previousScalingFactor = localTensor.get<double>(0);
+        std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff});
+        node->input(1).first->getOperator()->setOutput(0, finalTensor);
+    }
+    else
+    {
+        Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type());
+    }
+}
+
 static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
 {
     auto mulOp = Mul_Op();
@@ -457,8 +474,6 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
 
                 std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode);
 
-                //double currScalingFactor = getScalingFactor(scalingNode);
-                //updateScalingFactor(scalingNode, currScalingFactor / rescaling);
                 multiplyScalingFactor(scalingNode,1/rescaling);
 
                 accumulatedRatios[mergingNode->name()] /= rescaling; // optional ...
@@ -617,8 +632,6 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
             // ValueRanges must contains all the scaling nodes !!!
             double scalingFactor = valueRanges[node->name()]; 
 
-            //double currScalingFactor = getScalingFactor(node);
-            //updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor));
             multiplyScalingFactor(node,1/(scalingFactor / prevScalingFactor));
 
             scalingFactors[node->name()] = scalingFactor;
@@ -661,9 +674,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
 
                 std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode);
                 //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name());
-
-                //double currScalingFactor = getScalingFactor(scalingNode);
-                //updateScalingFactor(scalingNode, currScalingFactor * rescaling);    
+                
                 multiplyScalingFactor(scalingNode,rescaling) ;          
             }
         }
@@ -863,8 +874,6 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
             
             std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ...
 
-           // double currScalingFactor = getScalingFactor(scalingNode);
-           // updateScalingFactor(scalingNode, currScalingFactor * rescaling);
             multiplyScalingFactor(scalingNode,rescaling) ;          
         }
         
@@ -880,8 +889,6 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
 
             std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ...
         
-           // double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming
-           // updateScalingFactor(scalingNode, currScalingFactor * rescaling);
             multiplyScalingFactor(scalingNode,rescaling) ;          
         }
         
@@ -951,6 +958,8 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
 
                 std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView);
                 std::shared_ptr<Node> mulNode = Mul(mulNodeName);
+                
+                mulNode->attributes()->addAttr("isCompensation",0.0);
                 mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
                 mulNode->getOperator()->setBackend("cpu");
 
@@ -982,9 +991,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
-        // TODO : use Compensation nodes instead of Mul nodes
-
-        if (isAffine(node) || (node->type() == "Mul"))
+        if (isAffine(node) || (node->type() == "Mul" && node->attributes()->hasAttr("isCompensation")))
         {
             std::shared_ptr<Node> scalingNode = (*node->getChildren().begin());
 
diff --git a/src/PTQ/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp
similarity index 84%
rename from src/PTQ/PTQMetaOps.cpp
rename to src/operator/PTQMetaOps.cpp
index 4c17f9b..facfed2 100644
--- a/src/PTQ/PTQMetaOps.cpp
+++ b/src/operator/PTQMetaOps.cpp
@@ -9,7 +9,7 @@
  *
  ********************************************************************************/
 
-#include "aidge/quantization/PTQ/PTQMetaOps.hpp"
+#include "aidge/operator/PTQMetaOps.hpp"
 
 #include <array>
 #include <memory>
@@ -71,22 +71,6 @@ static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, st
     return mulNode;
 }
 
-void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff)
-{
-    if(node->type() == "Mul" && node->attributes()->hasAttr("isScaling"))
-    {
-        auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1);
-        std::shared_ptr<Tensor> fallback;
-        const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
-        double previousScalingFactor = localTensor.get<double>(0);
-        std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff});
-        node->input(1).first->getOperator()->setOutput(0, finalTensor);
-    }
-    else
-    {
-        Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type());
-    }
-}
 
 
 void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor)
-- 
GitLab


From 48427337c51e5e257d2794d304af0bd5b777529b Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Mon, 13 Jan 2025 15:56:11 +0000
Subject: [PATCH 07/26] Changing include in python bindings

---
 python_binding/pybind_PTQ.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp
index 2c25dc6..61a3cb9 100644
--- a/python_binding/pybind_PTQ.cpp
+++ b/python_binding/pybind_PTQ.cpp
@@ -17,8 +17,6 @@
 #include "aidge/quantization/PTQ/Clipping.hpp"
 #include "aidge/quantization/PTQ/CLE.hpp"
 #include "aidge/quantization/PTQ/PTQ.hpp"
-#include "aidge/quantization/PTQ/PTQMetaOps.hpp"
-
 #include "aidge/graph/GraphView.hpp"
 
 namespace py = pybind11;
-- 
GitLab


From 496491774df40049dcb9e11640514ba0de7956e2 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Wed, 15 Jan 2025 11:05:21 +0000
Subject: [PATCH 08/26] rebasing with dev

---
 include/aidge/quantization_version.h |  2 +-
 src/PTQ/PTQ.cpp                      | 48 ++++++++++++----------------
 2 files changed, 22 insertions(+), 28 deletions(-)

diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h
index 740621a..d773aa8 100644
--- a/include/aidge/quantization_version.h
+++ b/include/aidge/quantization_version.h
@@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0;
 static constexpr const int PROJECT_VERSION_MINOR = 3;
 static constexpr const int PROJECT_VERSION_PATCH = 0;
 static constexpr const char * PROJECT_VERSION = "0.3.0";
-static constexpr const char * PROJECT_GIT_HASH = "b4af1ce";
+static constexpr const char * PROJECT_GIT_HASH = "94747bf";
 }
 #endif // VERSION_H
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 23d9f01..9dee442 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -283,6 +283,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView)
                     std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView);
                     std::shared_ptr<Node> residualNode = Mul(residualNodeName);
                     residualNode->attributes()->addAttr("isScaling", 0.0);
+                    residualNode->attributes()->addAttr("isResidual", 0.0);
                     
                     //Adding the SF as a producer of the node
                     std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {1.0});
@@ -944,43 +945,36 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
     {
         // A merging node is always followed by a Quantizer node at this point
 
-        if (node->type() == "Quantizer")
+        if (node->type() == "Quantizer" && node->attributes()->hasAttr("isResidual"))
         {   
             // check if the Quantizer is a residual one, and insert a compensation node if so ...
+            // create and insert the multplicative node before the Quantizer
 
-            bool prevNodeIsForking = ((node->getParent(0))->getChildren().size() > 1);
-            bool prevNodeIsAffine = isAffine(node->getParent(0));
-            bool insertNode = prevNodeIsForking || !prevNodeIsAffine;
-
-            if (insertNode)
-            {
-                // create and insert the multplicative node before the Quantizer
-
-                std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView);
-                std::shared_ptr<Node> mulNode = Mul(mulNodeName);
-                
-                mulNode->attributes()->addAttr("isCompensation",0.0);
-                mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
-                mulNode->getOperator()->setBackend("cpu");
+            std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView);
+            std::shared_ptr<Node> mulNode = Mul(mulNodeName);
+            
+            mulNode->attributes()->addAttr("isCompensation",0.0);
+            mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
+            mulNode->getOperator()->setBackend("cpu");
 
-                graphView->insertParent(node, mulNode, 0, 0, 0);
+            graphView->insertParent(node, mulNode, 0, 0, 0);
 
-                // Add the coeff producer to the multiplier node
+            // Add the coeff producer to the multiplier node
 
-                std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); 
-                std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax});
-                coeffProducer->getOperator()->setOutput(0, coeffTensor);
+            std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); 
+            std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax});
+            coeffProducer->getOperator()->setOutput(0, coeffTensor);
 
-                coeffProducer->getOperator()->setDataType(DataType::Float64);
-                coeffProducer->getOperator()->setBackend("cpu"); 
+            coeffProducer->getOperator()->setDataType(DataType::Float64);
+            coeffProducer->getOperator()->setBackend("cpu"); 
 
-                graphView->add(coeffProducer); // needed ?
+            graphView->add(coeffProducer); // needed ?
 
-                // Adapt the scaling factor value accordingly
+            // Adapt the scaling factor value accordingly
 
-                double currScalingFactor = getScalingFactor(node); 
-                updateScalingFactor(node, currScalingFactor / signedMax);
-            }
+            double currScalingFactor = getScalingFactor(node); 
+            updateScalingFactor(node, currScalingFactor / signedMax);
+            
         }
     }
 }
-- 
GitLab


From c9adaf08fdbbddd76a76e60d5811c2cc77660138 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Mon, 20 Jan 2025 15:53:11 +0000
Subject: [PATCH 09/26] Fixing isResidual bug in SSA when using tag; replacemnt
 of std::cout with Log::debug

---
 include/aidge/quantization_version.h |  2 +-
 src/PTQ/PTQ.cpp                      | 14 +++++++-------
 src/QAT/QAT_LSQ.cpp                  |  5 ++---
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h
index d773aa8..429e4bd 100644
--- a/include/aidge/quantization_version.h
+++ b/include/aidge/quantization_version.h
@@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0;
 static constexpr const int PROJECT_VERSION_MINOR = 3;
 static constexpr const int PROJECT_VERSION_PATCH = 0;
 static constexpr const char * PROJECT_VERSION = "0.3.0";
-static constexpr const char * PROJECT_GIT_HASH = "94747bf";
+static constexpr const char * PROJECT_GIT_HASH = "e464870";
 }
 #endif // VERSION_H
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 9dee442..a81b2b7 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -14,7 +14,6 @@
 #include "aidge/quantization/PTQ/PTQ.hpp"
 #include "aidge/operator/PTQMetaOps.hpp"
 
-
 #include "aidge/data/Tensor.hpp"
 #include "aidge/graph/GraphView.hpp"
 #include "aidge/graph/Node.hpp"
@@ -945,8 +944,9 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
     {
         // A merging node is always followed by a Quantizer node at this point
 
-        if (node->type() == "Quantizer" && node->attributes()->hasAttr("isResidual"))
+        if (node->type() == "Quantizer" && (node->attributes()->hasAttr("isResidual") || !isAffine(node->getParent(0))))
         {   
+
             // check if the Quantizer is a residual one, and insert a compensation node if so ...
             // create and insert the multplicative node before the Quantizer
 
@@ -1048,7 +1048,7 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri
     auto scheduling = scheduler.getStaticScheduling();
     for (auto node : scheduling)
         if (node->attributes()->hasAttr("isScaling"))
-            std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl;
+            Log::debug("{} range = {}",node->name(),valueRanges[node->name()]);
 }
 
 void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose)
@@ -1076,13 +1076,13 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     Log::info(" Computing the value ranges ...");
     std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda);
 
-    //std::cout << " === RANGES (BEFORE ADJUST) ===" << std::endl;
+    //Log:debug("=== RANGES (BEFORE ADJUST) ===");
     //printRanges(graphView, valueRanges);
 
     Log::info(" Optimizing the clipping values ...");
     valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose);
 
-    //std::cout << " === RANGES (AFTER ADJUST) ===" << std::endl;
+    //Log:debug("=== RANGES (AFTER ADJUST) ===");
     //printRanges(graphView, valueRanges);
 
     Log::info(" Normalizing the activations ...");
@@ -1103,7 +1103,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     if (verbose)
         printScalingFactors(graphView);
 
-    //std::cout << " === SCALINGS (BEFORE CAST) ===" << std::endl;
+    //Log::debug(" === SCALINGS (BEFORE CAST) ===");
     //printScalingFactors(graphView);
 
     setupDataType(graphView, inputDataSet, initialDataType);
@@ -1111,7 +1111,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     if (useCuda)
         graphView->setBackend("cuda");
 
-    //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
+    //Log::debug(" === SCALINGS (AFTER CAST) ===");
     //printScalingFactors(graphView);
 
     Log::info(" Reseting the scheduler ...");
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index 04f2027..8a42770 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -13,7 +13,6 @@
 #include "aidge/operator/LSQ.hpp"
 #include "aidge/operator/ReLU.hpp"
 
-
 #include "aidge/data/Tensor.hpp"
 #include "aidge/graph/GraphView.hpp"
 #include "aidge/scheduler/SequentialScheduler.hpp"
@@ -51,7 +50,7 @@ static bool initStepSize(std::shared_ptr<Node> quantizer)
 
     stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor);
 
-    std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl;
+    Log::debug("[ INIT STEP SIZE = {} ]",stepSize);
 
     return false;
 }
@@ -138,7 +137,7 @@ void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBi
 void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor)
 {
     float mean = (tensor->mean()).get<float> (0);
-    std::cout << " MEAN  = " << mean << std::endl;
+    Log::debug("MEAN = {}",mean);
 }
 
 }
\ No newline at end of file
-- 
GitLab


From f1eb07af4e073ace093647ae7d80e4481d2eb9aa Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Fri, 17 Jan 2025 15:29:47 +0000
Subject: [PATCH 10/26] Starting Work on adding Scaling Nodes (Tagged Mul)
 below Producers

---
 aidge_quantization/_version.py         |   2 +-
 include/aidge/quantization/PTQ/PTQ.hpp |   1 +
 src/PTQ/CLE.cpp                        |  43 +++++-
 src/PTQ/PTQ.cpp                        | 178 +++++++++++++++++++++----
 4 files changed, 193 insertions(+), 31 deletions(-)

diff --git a/aidge_quantization/_version.py b/aidge_quantization/_version.py
index d4ec20e..2d34d35 100644
--- a/aidge_quantization/_version.py
+++ b/aidge_quantization/_version.py
@@ -1,4 +1,4 @@
 # file generated by setuptools_scm
 # don't change, don't track in version control
 __version__ = version = '0.2.1.dev60+g8044e79.d20250106'
-__version_tuple__ = version_tuple = (0, 2, 1, 'dev60', 'g8044e79.d20250106')
+__version_tuple__ = version_tuple = (0, 2, 1, 'dev60', 'g8044e79.d20250106')
\ No newline at end of file
diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp
index e7cbddd..74a49c8 100644
--- a/include/aidge/quantization/PTQ/PTQ.hpp
+++ b/include/aidge/quantization/PTQ/PTQ.hpp
@@ -66,6 +66,7 @@ namespace Aidge {
      * @return The scheduled vector of nodes
      */
     std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> graphView, bool newSchedule = true, bool verbose = false);
+    bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView);
 
     /**
      * @brief Determine whether an input GraphView can be quantized or not.
diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index 0fe9575..d0383eb 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -130,17 +130,48 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD
         {
             std::shared_ptr<Node> n1 = affineNodeVector[i];
             std::shared_ptr<Node> n2 = affineNodeVector[i+1];
+            std::cout << "CLE\n";
+            std::cout << "node name is: " << n1->name() << std::endl;
+            std::cout << "node name is: " << n2->name() << std::endl;
+            std::cout << "node parent name is: " << n1->name() << std::endl;
+            std::cout << "node parent name is: " << n2->name() << std::endl;
+
+            std::shared_ptr<Aidge::Tensor> n1localTensor, n2localTensor;
+            if(n1->getParent(1)->attributes()->hasAttr("isProducerScaling"))
+            {
+                std::static_pointer_cast<OperatorTensor>(n1->getParent(1)->getOperator())->getOutput(0)->print();
+                n1localTensor = std::static_pointer_cast<OperatorTensor>(n1->getParent(1)->getOperator())->getOutput(0);
+            }
+            else
+            {
+                n1localTensor = getWeightTensor(n1);
+            }
+
+            if(n2->getParent(1)->attributes()->hasAttr("isProducerScaling"))
+            {
+                n2localTensor = std::static_pointer_cast<OperatorTensor>(n2->getParent(1)->getOperator())->getOutput(0);
+
+            }
+            else
+            {
+                n2localTensor = getWeightTensor(n2);
+            }
+
+            double r1 = getTensorAbsoluteMax(n1localTensor);
+            double r2 = getTensorAbsoluteMax(n2localTensor);
+            std::cout << "valeur: " << r1 <<std::endl;
+            std::cout << "valeur: " << r2 <<std::endl;
 
-            double r1 = getTensorAbsoluteMax(getWeightTensor(n1));
-            double r2 = getTensorAbsoluteMax(getWeightTensor(n2));
 
             double s1 = std::sqrt(r1 * r2) / r1;
             double s2 = std::sqrt(r1 * r2) / r2;
 
-            rescaleTensor(getWeightTensor(n1), s1);
-            rescaleTensor(getWeightTensor(n2), s2);
-
-            rescaleTensor(getBiasTensor(n1), s1);
+            //rescaleTensor(getWeightTensor(n1), s1);
+            insertScalingBelowProducer(n1->getParent(1),s1,graphView);
+            //rescaleTensor(getWeightTensor(n2), s2);
+            insertScalingBelowProducer(n2->getParent(1),s2,graphView);
+            //rescaleTensor(getBiasTensor(n1), s1);
+            insertScalingBelowProducer(n1->getParent(2),s1,graphView);
 
             double rangeDelta = std::abs(r1 - r2);
             if (rangeDelta > maxRangeDelta)
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index a81b2b7..25e5f20 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -54,6 +54,120 @@ bool isMerging(std::shared_ptr<Node> node)
 {
     return (mergingNodeTypes.find(node->type()) != mergingNodeTypes.end());
 }
+static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> parentNode)
+{
+    int index = 0;
+    while (node->getParent(index) != parentNode) 
+        index++;
+    return index;
+}
+
+void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff)
+{
+    if(node->type() == "Mul" && node->attributes()->hasAttr("isProducerScaling"))
+    {
+        auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1);
+        std::shared_ptr<Tensor> fallback;
+        const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
+        double previousScalingFactor = localTensor.get<double>(0);
+        std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff});
+        node->input(1).first->getOperator()->setOutput(0, finalTensor);
+    }
+    else
+    {
+        Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type());
+    }
+}
+bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView)
+{
+    std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round");
+    roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
+    roundNode->getOperator()->setBackend("cpu");
+
+    if (node->getChildren().size() > 0)
+    {
+        // SCALING NODE INSERTION
+        
+        // We always have one output from Affine and Add nodes, but possibly multiple childs
+        std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); 
+
+        // For each node in nextNodes store the connexion index
+        std::vector<int> inputIndices(nextNodes.size());
+        for (std::size_t i = 0; i < nextNodes.size(); i++)
+            inputIndices[i] = getInputIndex(nextNodes[i], node);
+            
+        for (std::shared_ptr<Node> nextNode : nextNodes)
+            node->removeChild(nextNode, 0);
+
+        node->addChild(roundNode, 0, 0);
+
+        for (std::size_t i = 0; i < nextNodes.size(); i++)
+            roundNode->addChild(nextNodes[i], 0, inputIndices[i]);
+            graphView->add(roundNode);
+    }
+    else
+    {
+        Log::warn("Unusual producer ");
+        node->addChild(roundNode, 0, 0);
+        graphView->add(roundNode);
+    }
+    return true;
+}
+bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView)
+{
+    if(node->attributes()->hasAttr("isProducerScaling"))
+    {
+        multiplyScalingFactor(node,sf);
+        return true;
+    }
+    if(node->type() != "Producer")
+    {
+        Log::warn(" Cannot apply a scaling factor on a node which is not a producer", node->type());
+        return false;
+    }
+    std::string scalingNodeName = makeUniqueName(node->name() + "_ProducerScaling", graphView);
+    
+    std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName);
+    scalingNode->attributes()->addAttr("isProducerScaling",0.0);
+    
+    std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {sf});
+    std::shared_ptr<Node> scalingFactorProducer = addProducer(scalingNode, 1, {1}, "Factor"); 
+    scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
+    graphView->add(scalingFactorProducer);
+    
+    scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
+    scalingNode->getOperator()->setBackend("cpu");
+
+    if (node->getChildren().size() > 0)
+    {
+        // SCALING NODE INSERTION
+        
+        // We always have one output from Affine and Add nodes, but possibly multiple childs
+        std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); 
+
+        // For each node in nextNodes store the connexion index
+        std::vector<int> inputIndices(nextNodes.size());
+        for (std::size_t i = 0; i < nextNodes.size(); i++)
+            inputIndices[i] = getInputIndex(nextNodes[i], node);
+            
+        for (std::shared_ptr<Node> nextNode : nextNodes)
+            node->removeChild(nextNode, 0);
+
+        node->addChild(scalingNode, 0, 0);
+
+        for (std::size_t i = 0; i < nextNodes.size(); i++)
+            scalingNode->addChild(nextNodes[i], 0, inputIndices[i]);
+
+        graphView->add(scalingNode);
+    }
+    else
+    {
+        Log::warn("Unusual producer ");
+        node->addChild(scalingNode, 0, 0);
+        graphView->add(scalingNode);
+    }
+    return true;
+}
 
 bool checkArchitecture(std::shared_ptr<GraphView> graphView)
 {
@@ -167,6 +281,15 @@ static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::s
 
     return remainingNodes;
 }
+static std::vector<std::shared_ptr<Node>> removeProdScalingNodes(std::vector<std::shared_ptr<Node>> nodeVector)
+{
+    std::vector<std::shared_ptr<Node>> remainingNodes;
+    for (std::shared_ptr<Node> node : nodeVector)
+        if (!node->attributes()->hasAttr("isProducerScaling"))
+            remainingNodes.push_back(node);
+
+    return remainingNodes;
+}
 
 static void fixScheduling(std::vector<std::shared_ptr<Node>>& nodeVector) {
 
@@ -211,6 +334,7 @@ std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView>
 
     fixScheduling(nodeVector);
     nodeVector = removeMatchingNodes(nodeVector, "Producer");
+    nodeVector = removeProdScalingNodes(nodeVector);
 
     if (verbose) 
     {
@@ -300,13 +424,6 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView)
     }
 }
 
-static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> parentNode)
-{
-    int index = 0;
-    while (node->getParent(index) != parentNode) 
-        index++;
-    return index;
-}
 
 void insertScalingNodes(std::shared_ptr<GraphView> graphView)
 {
@@ -429,7 +546,8 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
             std::shared_ptr<Tensor> weightTensor = getWeightTensor(node);
             double scaling = getTensorAbsoluteMax(weightTensor);
             double ratio = 1.0 / scaling;
-            rescaleTensor(weightTensor, ratio);
+            //rescaleTensor(weightTensor, ratio);
+            insertScalingBelowProducer(node->getParent(1),ratio,graphView);
 
             // Accumulate the ratio
             if (node == firstNode)
@@ -447,7 +565,8 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
             if (nodeHasBias(node))
             {
                 std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
-                rescaleTensor(biasTensor, accumulatedRatios[node->name()] );
+                //rescaleTensor(biasTensor, accumulatedRatios[node->name()] );
+                insertScalingBelowProducer(node->getParent(2),accumulatedRatios[node->name()],graphView);
             }
         }
 
@@ -606,7 +725,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
     for (std::shared_ptr<Node> node : nodeVector)
     {
         // Seamless scaling factor propagation ...
-    
+
         if (isAffine(node) || isSeamless(node) || node->type() == "ReLU") 
         {
             if (node == firstNode)
@@ -620,11 +739,13 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
             }
         }
 
+
         // Here prevNode is either a 'Affine' or a 'Merging'
         // => do not split the cases, just handle the bias ...
 
         if (node->attributes()->hasAttr("isScaling")) 
         {
+
             // retrieve the previous scaling factor ...
             std::shared_ptr<Node> prevNode = node->getParent(0);
             double prevScalingFactor = scalingFactors[prevNode->name()];
@@ -640,11 +761,13 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
 
             if (isAffine(prevNode))
             {
+
                 bool prevNodeHasBias = nodeHasBias(prevNode);
                 if (prevNodeHasBias)  
-                {
+                {                
                     std::shared_ptr<Tensor> biasTensor = getBiasTensor(prevNode);
-                    rescaleTensor(biasTensor, 1.0 / prevScalingFactor);
+                    //rescaleTensor(biasTensor, 1.0 / prevScalingFactor);
+                    insertScalingBelowProducer(prevNode->getParent(2),1.0 / prevScalingFactor,graphView);
                 }
             }
         }
@@ -842,10 +965,12 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
             // Rescale the weight tensor
 
             std::shared_ptr<Tensor> weightTensor = getWeightTensor(node);
-            rescaleTensor(weightTensor, signedMax);
+            //rescaleTensor(weightTensor, signedMax);
+            insertScalingBelowProducer(node->getParent(1),signedMax,graphView);
 
             if (!noQuant)
-                roundTensor(weightTensor);
+                insertRoundBelowProducer(node->getParent(1),graphView);
+                //roundTensor(weightTensor);
 
             // Rescale the bias tensor
 
@@ -856,10 +981,12 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
                 
 
                 std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
-                rescaleTensor(biasTensor, rescaling);
+                //rescaleTensor(biasTensor, rescaling);
+                insertScalingBelowProducer(node->getParent(2),rescaling,graphView);
 
                 if (!noQuant)
-                    roundTensor(biasTensor);
+                    insertRoundBelowProducer(node->getParent(2),graphView);
+                    //roundTensor(biasTensor);
             }
 
             // Compensate the rescaling using the next Scaling node
@@ -997,17 +1124,20 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
 
             double ratio = base / approx;
 
-            std::shared_ptr<Tensor> weightTensor = getWeightTensor(node);
-            rescaleTensor(weightTensor, ratio);
+            //std::shared_ptr<Tensor> weightTensor = getWeightTensor(node);
+            //rescaleTensor(weightTensor, ratio);
+            insertScalingBelowProducer(node->getParent(1),ratio,graphView);
             if (!noQuant)
-                roundTensor(weightTensor);
+                insertRoundBelowProducer(node->getParent(1),graphView);
 
             if (nodeHasBias(node))
             {
-                std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
-                rescaleTensor(biasTensor, ratio);
+                //std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
+                //rescaleTensor(biasTensor, ratio);
+                insertScalingBelowProducer(node->getParent(2),ratio,graphView);
+
                 if (!noQuant)
-                    roundTensor(biasTensor);
+                    insertRoundBelowProducer(node->getParent(2),graphView);
             }
         }
     }
@@ -1084,7 +1214,6 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
 
     //Log:debug("=== RANGES (AFTER ADJUST) ===");
     //printRanges(graphView, valueRanges);
-
     Log::info(" Normalizing the activations ...");
     normalizeActivations(graphView, valueRanges);
 
@@ -1143,7 +1272,8 @@ void clearBiases(std::shared_ptr<GraphView> graphView)
     for (std::shared_ptr<Node> node : graphView->getNodes()) {
         if (node->type() == "FC" || node->type() == "Conv2D") {
             std::shared_ptr<Tensor> biasTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2);
-            rescaleTensor(biasTensor, 0);
+            //rescaleTensor(biasTensor, 0);
+            insertScalingBelowProducer(node->getParent(2),0,graphView);
         }
     }
 }
-- 
GitLab


From cf51e87cbaf34ea4372a2cfdf64ce9d32b3bfc28 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Mon, 20 Jan 2025 14:22:55 +0000
Subject: [PATCH 11/26] Correction the Single Shift Approximation error with
 the new method for updating weight and bias

---
 include/aidge/quantization_version.h |  2 +-
 src/PTQ/PTQ.cpp                      | 13 ++++++++++++-
 src/operator/PTQMetaOps.cpp          | 16 ++++++++--------
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h
index 429e4bd..37853e3 100644
--- a/include/aidge/quantization_version.h
+++ b/include/aidge/quantization_version.h
@@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0;
 static constexpr const int PROJECT_VERSION_MINOR = 3;
 static constexpr const int PROJECT_VERSION_PATCH = 0;
 static constexpr const char * PROJECT_VERSION = "0.3.0";
-static constexpr const char * PROJECT_GIT_HASH = "e464870";
+static constexpr const char * PROJECT_GIT_HASH = "03286c7";
 }
 #endif // VERSION_H
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 25e5f20..fe2aef4 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -111,18 +111,27 @@ bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphVi
         node->addChild(roundNode, 0, 0);
         graphView->add(roundNode);
     }
+    roundNode->attributes()->addAttr("isProducerRounding",0.0);
     return true;
 }
 bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView)
 {
+    if(node->attributes()->hasAttr("isProducerRounding"))
+    {
+        //In this case we 'bump' the node to the one above him (an actual ProducerScaling)
+        // because the round node is not usable (only used when SSA is enabled)
+        node = node->getParent(0);
+    }
     if(node->attributes()->hasAttr("isProducerScaling"))
     {
+        // We accumulate the multiples scaling factors by multiplying the SF of the ProducerScaling node 
+        // (adding new nodes each time would make the graph unusable)
         multiplyScalingFactor(node,sf);
         return true;
     }
     if(node->type() != "Producer")
     {
-        Log::warn(" Cannot apply a scaling factor on a node which is not a producer", node->type());
+        Log::warn(" Cannot apply a scaling factor on a node which is not a producer on a node of type {} whose name is {}", node->type(),node->name());
         return false;
     }
     std::string scalingNodeName = makeUniqueName(node->name() + "_ProducerScaling", graphView);
@@ -1126,6 +1135,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
 
             //std::shared_ptr<Tensor> weightTensor = getWeightTensor(node);
             //rescaleTensor(weightTensor, ratio);
+            Log::warn("A\n");
             insertScalingBelowProducer(node->getParent(1),ratio,graphView);
             if (!noQuant)
                 insertRoundBelowProducer(node->getParent(1),graphView);
@@ -1134,6 +1144,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
             {
                 //std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
                 //rescaleTensor(biasTensor, ratio);
+                Log::warn("B\n");
                 insertScalingBelowProducer(node->getParent(2),ratio,graphView);
 
                 if (!noQuant)
diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp
index facfed2..105d4e8 100644
--- a/src/operator/PTQMetaOps.cpp
+++ b/src/operator/PTQMetaOps.cpp
@@ -75,8 +75,8 @@ static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, st
 
 void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor)
 {
-    if(metaOpNode->type() != "Quantizer")
-        Log::warn(" Cannot update the scaling factor on Node of type {}", metaOpNode->type());
+    if(metaOpNode->type() != "Scaling" && metaOpNode->type() != "Quantizer")
+        Log::warn("Cannot update the scaling factor on Node of type {}", metaOpNode->type());
 
     std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
 
@@ -85,15 +85,15 @@ void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor)
     std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul");
 
     if (!mulNode)
-        Log::warn(" Invalid PTQ MetaOperator, no Mul node found inside ! ");
+        Log::warn("Invalid PTQ MetaOperator, no Mul node found inside ! ");
 
     mulNode->input(1).first->getOperator()->setOutput(0, scalingFactorTensor);
 }
 
 double getScalingFactor(std::shared_ptr<Node> MetaOpNode)
 {
-    if (MetaOpNode->type() != "Quantizer") {
-        Log::warn(" Cannot get the scaling factor on Node of type {}", MetaOpNode->type());
+    if (MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") {
+        Log::warn("Cannot get the scaling factor on Node of type {}", MetaOpNode->type());
         return 0;
     }
 
@@ -102,7 +102,7 @@ double getScalingFactor(std::shared_ptr<Node> MetaOpNode)
     std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul");
 
     if (!mulNode) {
-        Log::warn(" Invalid PTQ MetaOperator, no Mul found inside node of type {}", MetaOpNode->type());
+        Log::warn("Invalid PTQ MetaOperator, no Mul found inside node of type {}", MetaOpNode->type());
         return 0;
     }
 
@@ -117,7 +117,7 @@ double getScalingFactor(std::shared_ptr<Node> MetaOpNode)
 void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max)
 {
     if (quantizerNode->type() != "Quantizer") {
-        Log::warn(" Cannot set the clipping range on Node of type {}", quantizerNode->type());
+        Log::warn("Cannot set the clipping range on Node of type {}", quantizerNode->type());
         return;
     }
 
@@ -126,7 +126,7 @@ void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max)
     std::shared_ptr<Node> clipNode = getSubNode(metaOp->getMicroGraph(), "Clip");
 
     if (!clipNode) {
-        Log::warn(" Invalid PTQ MetaOperator, no Clip found inside node of type {}", quantizerNode->type());
+        Log::warn("Invalid PTQ MetaOperator, no Clip found inside node of type {}", quantizerNode->type());
         return;
     }
 
-- 
GitLab


From a749505df0e1632345a5ddfb2fd6f38436ab9f83 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Tue, 21 Jan 2025 14:15:26 +0000
Subject: [PATCH 12/26] Fixing bug related to the lower result in
 resnet(switching the network to float64 solved it

---
 include/aidge/quantization_version.h |  2 +-
 src/PTQ/CLE.cpp                      |  1 +
 src/PTQ/PTQ.cpp                      | 22 +---------------------
 3 files changed, 3 insertions(+), 22 deletions(-)

diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h
index 37853e3..2e53dfc 100644
--- a/include/aidge/quantization_version.h
+++ b/include/aidge/quantization_version.h
@@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0;
 static constexpr const int PROJECT_VERSION_MINOR = 3;
 static constexpr const int PROJECT_VERSION_PATCH = 0;
 static constexpr const char * PROJECT_VERSION = "0.3.0";
-static constexpr const char * PROJECT_GIT_HASH = "03286c7";
+static constexpr const char * PROJECT_GIT_HASH = "01880af";
 }
 #endif // VERSION_H
diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index d0383eb..d47a2c2 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -14,6 +14,7 @@
 #include "aidge/quantization/PTQ/PTQ.hpp"
 
 #include "aidge/graph/GraphView.hpp"
+
 #include "aidge/scheduler/SequentialScheduler.hpp"
 #include "aidge/scheduler/Scheduler.hpp"
 #include "aidge/utils/Log.hpp"
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index fe2aef4..60326e8 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -363,7 +363,6 @@ static std::shared_ptr<Node> getFirstNode(std::shared_ptr<GraphView> graphView)
 void prepareNetwork(std::shared_ptr<GraphView> graphView)
 {
     removeFlatten(graphView);
-
     sanitizeNodeNames(graphView);
 
     bool containsBatchNorm = false;
@@ -972,30 +971,23 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
         if (isAffine(node))
         {
             // Rescale the weight tensor
-
             std::shared_ptr<Tensor> weightTensor = getWeightTensor(node);
-            //rescaleTensor(weightTensor, signedMax);
             insertScalingBelowProducer(node->getParent(1),signedMax,graphView);
 
             if (!noQuant)
                 insertRoundBelowProducer(node->getParent(1),graphView);
-                //roundTensor(weightTensor);
 
             // Rescale the bias tensor
-
             if (nodeHasBias(node))  
             {
                 bool inputIsUnsigned = signMap[node->name()].first;
                 double rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax;
-                
-
+            
                 std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
-                //rescaleTensor(biasTensor, rescaling);
                 insertScalingBelowProducer(node->getParent(2),rescaling,graphView);
 
                 if (!noQuant)
                     insertRoundBelowProducer(node->getParent(2),graphView);
-                    //roundTensor(biasTensor);
             }
 
             // Compensate the rescaling using the next Scaling node
@@ -1133,18 +1125,12 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
 
             double ratio = base / approx;
 
-            //std::shared_ptr<Tensor> weightTensor = getWeightTensor(node);
-            //rescaleTensor(weightTensor, ratio);
-            Log::warn("A\n");
             insertScalingBelowProducer(node->getParent(1),ratio,graphView);
             if (!noQuant)
                 insertRoundBelowProducer(node->getParent(1),graphView);
 
             if (nodeHasBias(node))
             {
-                //std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
-                //rescaleTensor(biasTensor, ratio);
-                Log::warn("B\n");
                 insertScalingBelowProducer(node->getParent(2),ratio,graphView);
 
                 if (!noQuant)
@@ -1289,10 +1275,4 @@ void clearBiases(std::shared_ptr<GraphView> graphView)
     }
 }
 
-void devPTQ(std::shared_ptr<GraphView> graphView) 
-{
-    for (std::shared_ptr<Node> node : graphView->getNodes())
-        fmt::println(" UUU : {}", node->name());
-}
-
 }
-- 
GitLab


From 5ec65431e486d00adb4ca7ac432786a0b7467858 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Wed, 22 Jan 2025 10:27:01 +0000
Subject: [PATCH 13/26] Rebasing on dev

---
 include/aidge/quantization_version.h |  2 +-
 src/PTQ/CLE.cpp                      |  9 +--------
 src/PTQ/PTQ.cpp                      | 22 +---------------------
 3 files changed, 3 insertions(+), 30 deletions(-)

diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h
index 2e53dfc..5a7e98b 100644
--- a/include/aidge/quantization_version.h
+++ b/include/aidge/quantization_version.h
@@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0;
 static constexpr const int PROJECT_VERSION_MINOR = 3;
 static constexpr const int PROJECT_VERSION_PATCH = 0;
 static constexpr const char * PROJECT_VERSION = "0.3.0";
-static constexpr const char * PROJECT_GIT_HASH = "01880af";
+static constexpr const char * PROJECT_GIT_HASH = "a749505";
 }
 #endif // VERSION_H
diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index d47a2c2..52e4ec0 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -45,7 +45,7 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
     mulOp.setDataType(tensor->dataType());
     mulOp.setBackend(tensor->backend());
 
-    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling});
+    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling});
     scalingTensor->setDataType(tensor->dataType());
     scalingTensor->setBackend(tensor->backend());
 
@@ -131,11 +131,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD
         {
             std::shared_ptr<Node> n1 = affineNodeVector[i];
             std::shared_ptr<Node> n2 = affineNodeVector[i+1];
-            std::cout << "CLE\n";
-            std::cout << "node name is: " << n1->name() << std::endl;
-            std::cout << "node name is: " << n2->name() << std::endl;
-            std::cout << "node parent name is: " << n1->name() << std::endl;
-            std::cout << "node parent name is: " << n2->name() << std::endl;
 
             std::shared_ptr<Aidge::Tensor> n1localTensor, n2localTensor;
             if(n1->getParent(1)->attributes()->hasAttr("isProducerScaling"))
@@ -160,8 +155,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD
 
             double r1 = getTensorAbsoluteMax(n1localTensor);
             double r2 = getTensorAbsoluteMax(n2localTensor);
-            std::cout << "valeur: " << r1 <<std::endl;
-            std::cout << "valeur: " << r2 <<std::endl;
 
 
             double s1 = std::sqrt(r1 * r2) / r1;
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 60326e8..108be02 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -26,11 +26,8 @@
 #include "aidge/operator/ReLU.hpp"
 #include "aidge/operator/BatchNorm.hpp"
 #include "aidge/operator/Conv.hpp"
-
 #include "aidge/operator/ArgMax.hpp"
-#include "aidge/operator/Abs.hpp"
 #include "aidge/operator/Reshape.hpp"
-#include "aidge/operator/Round.hpp"
 
 
 #include "aidge/recipes/Recipes.hpp"
@@ -64,7 +61,7 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren
 
 void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff)
 {
-    if(node->type() == "Mul" && node->attributes()->hasAttr("isProducerScaling"))
+    if(node->type() == "Mul" && (node->attributes()->hasAttr("isProducerScaling") || node->attributes()->hasAttr("isScaling")))
     {
         auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1);
         std::shared_ptr<Tensor> fallback;
@@ -194,23 +191,6 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView)
     return true;
 }
 
-void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff)
-{
-    if(node->type() == "Mul" && node->attributes()->hasAttr("isScaling"))
-    {
-        auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1);
-        std::shared_ptr<Tensor> fallback;
-        const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
-        double previousScalingFactor = localTensor.get<double>(0);
-        std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff});
-        node->input(1).first->getOperator()->setOutput(0, finalTensor);
-    }
-    else
-    {
-        Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type());
-    }
-}
-
 static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
 {
     auto mulOp = Mul_Op();
-- 
GitLab


From c374ce49cd3a60cab4521c1fb4b10abc8d1e6f43 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Wed, 22 Jan 2025 13:06:51 +0000
Subject: [PATCH 14/26] Correcting Log::warn into AIDGE_ASSERT to make the code
 safer

---
 include/aidge/quantization/PTQ/PTQ.hpp |  21 ++++-
 include/aidge/quantization_version.h   |   2 +-
 src/PTQ/PTQ.cpp                        | 110 +++++++++----------------
 3 files changed, 61 insertions(+), 72 deletions(-)

diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp
index 74a49c8..e1ef529 100644
--- a/include/aidge/quantization/PTQ/PTQ.hpp
+++ b/include/aidge/quantization/PTQ/PTQ.hpp
@@ -66,7 +66,26 @@ namespace Aidge {
      * @return The scheduled vector of nodes
      */
     std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> graphView, bool newSchedule = true, bool verbose = false);
-    bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView);
+    
+    /**
+     * @brief Inserts a scaling node below the given producer node in the graph view. 
+     *        If the node is already a producer scaling node, it accumulates the scaling factor by multiplyins its value directly.
+     *
+     * @param node A shared pointer to the producer node where the scaling node will be inserted (below).
+     * @param scalingFactor The scaling factor to apply.
+     * @param graphView A shared pointer to the graph view in which the nodes are located.
+     * @return True if the scaling node was successfully inserted or the scaling factor was accumulated; False otherwise.
+     */
+    bool insertScalingBelowProducer(std::shared_ptr<Node> node, double scalingFactor, std::shared_ptr<GraphView> graphView);
+
+    /**
+     * @brief Inserts a rounding node below the given producer (also below its ows producerScaling) node in the graph view. 
+     *
+     * @param node A shared pointer to the producer node where the rounding node will be inserted.
+     * @param graphView A shared pointer to the graph view in which the nodes are located.
+     * @return True if the rounding node was successfully inserted; False otherwise.
+     */
+    bool insertRoundBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphView> graphView);
 
     /**
      * @brief Determine whether an input GraphView can be quantized or not.
diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h
index 5a7e98b..9b4e3de 100644
--- a/include/aidge/quantization_version.h
+++ b/include/aidge/quantization_version.h
@@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0;
 static constexpr const int PROJECT_VERSION_MINOR = 3;
 static constexpr const int PROJECT_VERSION_PATCH = 0;
 static constexpr const char * PROJECT_VERSION = "0.3.0";
-static constexpr const char * PROJECT_GIT_HASH = "a749505";
+static constexpr const char * PROJECT_GIT_HASH = "5ec6543";
 }
 #endif // VERSION_H
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 108be02..bda0ae1 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -61,57 +61,39 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren
 
 void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff)
 {
-    if(node->type() == "Mul" && (node->attributes()->hasAttr("isProducerScaling") || node->attributes()->hasAttr("isScaling")))
-    {
-        auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1);
-        std::shared_ptr<Tensor> fallback;
-        const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
-        double previousScalingFactor = localTensor.get<double>(0);
-        std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff});
-        node->input(1).first->getOperator()->setOutput(0, finalTensor);
-    }
-    else
-    {
-        Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type());
-    }
+    AIDGE_ASSERT(node->type() == "Mul" && (node->attributes()->hasAttr("isProducerScaling") || node->attributes()->hasAttr("isScaling")),
+    "Cannot update the scaling factor on Node of type {} with no scaling tag",node->type());
+    auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1);
+    std::shared_ptr<Tensor> fallback;
+    const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
+    double previousScalingFactor = localTensor.get<double>(0);
+    std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff});
+    node->input(1).first->getOperator()->setOutput(0, finalTensor);
 }
 bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView)
 {
     std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round");
     roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
     roundNode->getOperator()->setBackend("cpu");
-
-    if (node->getChildren().size() > 0)
-    {
-        // SCALING NODE INSERTION
+    AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a scaling node.");    
+    std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); 
+    std::vector<int> inputIndices(nextNodes.size());
+    for (std::size_t i = 0; i < nextNodes.size(); i++)
+        inputIndices[i] = getInputIndex(nextNodes[i], node);
         
-        // We always have one output from Affine and Add nodes, but possibly multiple childs
-        std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); 
+    for (std::shared_ptr<Node> nextNode : nextNodes)
+        node->removeChild(nextNode, 0);
 
-        // For each node in nextNodes store the connexion index
-        std::vector<int> inputIndices(nextNodes.size());
-        for (std::size_t i = 0; i < nextNodes.size(); i++)
-            inputIndices[i] = getInputIndex(nextNodes[i], node);
-            
-        for (std::shared_ptr<Node> nextNode : nextNodes)
-            node->removeChild(nextNode, 0);
-
-        node->addChild(roundNode, 0, 0);
+    node->addChild(roundNode, 0, 0);
 
-        for (std::size_t i = 0; i < nextNodes.size(); i++)
-            roundNode->addChild(nextNodes[i], 0, inputIndices[i]);
-            graphView->add(roundNode);
-    }
-    else
-    {
-        Log::warn("Unusual producer ");
-        node->addChild(roundNode, 0, 0);
+    for (std::size_t i = 0; i < nextNodes.size(); i++)
+        roundNode->addChild(nextNodes[i], 0, inputIndices[i]);
         graphView->add(roundNode);
-    }
+
     roundNode->attributes()->addAttr("isProducerRounding",0.0);
     return true;
 }
-bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView)
+bool insertScalingBelowProducer(std::shared_ptr<Node> node,double scalingFactor, std::shared_ptr<GraphView> graphView)
 {
     if(node->attributes()->hasAttr("isProducerRounding"))
     {
@@ -123,55 +105,39 @@ bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::share
     {
         // We accumulate the multiples scaling factors by multiplying the SF of the ProducerScaling node 
         // (adding new nodes each time would make the graph unusable)
-        multiplyScalingFactor(node,sf);
+        multiplyScalingFactor(node,scalingFactor);
         return true;
     }
-    if(node->type() != "Producer")
-    {
-        Log::warn(" Cannot apply a scaling factor on a node which is not a producer on a node of type {} whose name is {}", node->type(),node->name());
-        return false;
-    }
+    AIDGE_ASSERT(node->type() == "Producer","Cannot apply a scaling factor on node of type: {} which is not a producer", node->type());
     std::string scalingNodeName = makeUniqueName(node->name() + "_ProducerScaling", graphView);
     
     std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName);
     scalingNode->attributes()->addAttr("isProducerScaling",0.0);
     
-    std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {sf});
+    std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
     std::shared_ptr<Node> scalingFactorProducer = addProducer(scalingNode, 1, {1}, "Factor"); 
     scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
     graphView->add(scalingFactorProducer);
     
     scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
     scalingNode->getOperator()->setBackend("cpu");
+    AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a scaling node.");
+    std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); 
 
-    if (node->getChildren().size() > 0)
-    {
-        // SCALING NODE INSERTION
+    // For each node in nextNodes store the connexion index
+    std::vector<int> inputIndices(nextNodes.size());
+    for (std::size_t i = 0; i < nextNodes.size(); i++)
+        inputIndices[i] = getInputIndex(nextNodes[i], node);
         
-        // We always have one output from Affine and Add nodes, but possibly multiple childs
-        std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); 
+    for (std::shared_ptr<Node> nextNode : nextNodes)
+        node->removeChild(nextNode, 0);
 
-        // For each node in nextNodes store the connexion index
-        std::vector<int> inputIndices(nextNodes.size());
-        for (std::size_t i = 0; i < nextNodes.size(); i++)
-            inputIndices[i] = getInputIndex(nextNodes[i], node);
-            
-        for (std::shared_ptr<Node> nextNode : nextNodes)
-            node->removeChild(nextNode, 0);
+    node->addChild(scalingNode, 0, 0);
 
-        node->addChild(scalingNode, 0, 0);
+    for (std::size_t i = 0; i < nextNodes.size(); i++)
+        scalingNode->addChild(nextNodes[i], 0, inputIndices[i]);
 
-        for (std::size_t i = 0; i < nextNodes.size(); i++)
-            scalingNode->addChild(nextNodes[i], 0, inputIndices[i]);
-
-        graphView->add(scalingNode);
-    }
-    else
-    {
-        Log::warn("Unusual producer ");
-        node->addChild(scalingNode, 0, 0);
-        graphView->add(scalingNode);
-    }
+    graphView->add(scalingNode);
     return true;
 }
 
@@ -1254,5 +1220,9 @@ void clearBiases(std::shared_ptr<GraphView> graphView)
         }
     }
 }
-
+void devPTQ(std::shared_ptr<GraphView> graphView) 
+{
+    for (std::shared_ptr<Node> node : graphView->getNodes())
+        Log::debug(" UUU : {}", node->name());
+}
 }
-- 
GitLab


From f255dcad5d34ec2f62eacd52301c7b9377ef4b29 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Wed, 22 Jan 2025 14:36:02 +0000
Subject: [PATCH 15/26] Changing the CLE to fit with the new method of
 ProducerScaling

---
 include/aidge/quantization_version.h |  2 +-
 src/PTQ/CLE.cpp                      | 38 ++++++++++------------------
 2 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h
index 9b4e3de..eba0eab 100644
--- a/include/aidge/quantization_version.h
+++ b/include/aidge/quantization_version.h
@@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0;
 static constexpr const int PROJECT_VERSION_MINOR = 3;
 static constexpr const int PROJECT_VERSION_PATCH = 0;
 static constexpr const char * PROJECT_VERSION = "0.3.0";
-static constexpr const char * PROJECT_GIT_HASH = "5ec6543";
+static constexpr const char * PROJECT_GIT_HASH = "c374ce4";
 }
 #endif // VERSION_H
diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index 52e4ec0..eb5ca7a 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -94,6 +94,16 @@ static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor)
 
     return flatTensor->get<double>(maxIndex);
 }
+//Function used to extraxt the local tensor (from a ProducerScalingNode)
+std::shared_ptr<Aidge::Tensor> getLocalTensor(std::shared_ptr<Node> node) {
+    if (node->getParent(1)->attributes()->hasAttr("isProducerScaling")) {
+        std::shared_ptr<Aidge::OperatorTensor> operatorTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getOperator());
+        operatorTensor->forward();// We need the forward pass to compute the scaled value of the Tensor
+        return operatorTensor->getOutput(0);
+    } else {
+        return getWeightTensor(node);
+    }
+}
 
 void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta)
 {
@@ -132,39 +142,17 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD
             std::shared_ptr<Node> n1 = affineNodeVector[i];
             std::shared_ptr<Node> n2 = affineNodeVector[i+1];
 
-            std::shared_ptr<Aidge::Tensor> n1localTensor, n2localTensor;
-            if(n1->getParent(1)->attributes()->hasAttr("isProducerScaling"))
-            {
-                std::static_pointer_cast<OperatorTensor>(n1->getParent(1)->getOperator())->getOutput(0)->print();
-                n1localTensor = std::static_pointer_cast<OperatorTensor>(n1->getParent(1)->getOperator())->getOutput(0);
-            }
-            else
-            {
-                n1localTensor = getWeightTensor(n1);
-            }
-
-            if(n2->getParent(1)->attributes()->hasAttr("isProducerScaling"))
-            {
-                n2localTensor = std::static_pointer_cast<OperatorTensor>(n2->getParent(1)->getOperator())->getOutput(0);
-
-            }
-            else
-            {
-                n2localTensor = getWeightTensor(n2);
-            }
-
+            std::shared_ptr<Aidge::Tensor> n1localTensor = getLocalTensor(n1);
+            std::shared_ptr<Aidge::Tensor> n2localTensor = getLocalTensor(n2);
+            
             double r1 = getTensorAbsoluteMax(n1localTensor);
             double r2 = getTensorAbsoluteMax(n2localTensor);
 
-
             double s1 = std::sqrt(r1 * r2) / r1;
             double s2 = std::sqrt(r1 * r2) / r2;
 
-            //rescaleTensor(getWeightTensor(n1), s1);
             insertScalingBelowProducer(n1->getParent(1),s1,graphView);
-            //rescaleTensor(getWeightTensor(n2), s2);
             insertScalingBelowProducer(n2->getParent(1),s2,graphView);
-            //rescaleTensor(getBiasTensor(n1), s1);
             insertScalingBelowProducer(n1->getParent(2),s1,graphView);
 
             double rangeDelta = std::abs(r1 - r2);
-- 
GitLab


From f0f9e607707966ca796dfcb4636a1354dc5568c0 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Thu, 23 Jan 2025 11:37:34 +0000
Subject: [PATCH 16/26] Minor refactorization of PTQ.cpp (deleting de^recated
 functions )

---
 src/PTQ/PTQ.cpp | 34 +---------------------------------
 1 file changed, 1 insertion(+), 33 deletions(-)

diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index bda0ae1..28bd587 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -75,7 +75,7 @@ bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphVi
     std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round");
     roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
     roundNode->getOperator()->setBackend("cpu");
-    AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a scaling node.");    
+    AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a rounding node.");    
     std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); 
     std::vector<int> inputIndices(nextNodes.size());
     for (std::size_t i = 0; i < nextNodes.size(); i++)
@@ -157,38 +157,6 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView)
     return true;
 }
 
-static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
-{
-    auto mulOp = Mul_Op();
-    mulOp.setDataType(tensor->dataType());
-    mulOp.setBackend(tensor->backend());
-
-    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling});
-    scalingTensor->setDataType(tensor->dataType());
-    scalingTensor->setBackend(tensor->backend());
-
-    mulOp.associateInput(0, tensor);
-    mulOp.associateInput(1, scalingTensor);
-
-    mulOp.forward();
-    
-    auto outTensor = mulOp.getOutput(0);
-    *tensor = *outTensor;
-}
-
-static void roundTensor(std::shared_ptr<Tensor> tensor)
-{
-    auto roundOp = Round_Op();
-    roundOp.setDataType(tensor->dataType());
-    roundOp.setBackend(tensor->backend());
-
-    roundOp.associateInput(0, tensor);
-    roundOp.forward();
-    
-    auto outTensor = roundOp.getOutput(0);
-    *tensor = *outTensor;
-}
-
 // TODO : make the retreival of argmax values backend independant (refCastFrom)
 static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor)
 {
-- 
GitLab


From 487718d2508ffe898b253bab61ce583341401813 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Thu, 23 Jan 2025 12:08:34 +0000
Subject: [PATCH 17/26] Refactorizing the code to add the function
 InsertBetween to easily insert a node between 2 already connected

---
 include/aidge/quantization_version.h |  2 +-
 src/PTQ/PTQ.cpp                      | 79 +++++++++++-----------------
 2 files changed, 33 insertions(+), 48 deletions(-)

diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h
index eba0eab..909ab28 100644
--- a/include/aidge/quantization_version.h
+++ b/include/aidge/quantization_version.h
@@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0;
 static constexpr const int PROJECT_VERSION_MINOR = 3;
 static constexpr const int PROJECT_VERSION_PATCH = 0;
 static constexpr const char * PROJECT_VERSION = "0.3.0";
-static constexpr const char * PROJECT_GIT_HASH = "c374ce4";
+static constexpr const char * PROJECT_GIT_HASH = "f0f9e60";
 }
 #endif // VERSION_H
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 28bd587..92049a2 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -70,25 +70,42 @@ void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff)
     std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff});
     node->input(1).first->getOperator()->setOutput(0, finalTensor);
 }
+/* Util function to insert a node below another one already connected */
+void insertNodeBetween(std::shared_ptr<Node> parent, 
+                       std::shared_ptr<Node> newNode, 
+                       std::shared_ptr<GraphView> graphView) 
+{
+    // Checking the parents always have at least 1 children
+    AIDGE_ASSERT(parent->getChildren().size() > 0, "The parent node must have at least one child to insert a new node.");
+
+    // Retrieve children connection indexes
+    std::vector<std::shared_ptr<Node>> nextNodes = parent->getChildren(0);
+    std::vector<int> inputIndices(nextNodes.size());
+    for (std::size_t i = 0; i < nextNodes.size(); i++) {
+        inputIndices[i] = getInputIndex(nextNodes[i], parent);
+    }
+
+    // Disconnect childs from parent
+    for (std::shared_ptr<Node> nextNode : nextNodes) {
+        parent->removeChild(nextNode, 0);
+    }
+
+    // Insert the new node between the child and the parent
+    parent->addChild(newNode, 0, 0);
+    for (std::size_t i = 0; i < nextNodes.size(); i++) {
+        newNode->addChild(nextNodes[i], 0, inputIndices[i]);
+    }
+
+    graphView->add(newNode);
+}
+
 bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView)
 {
     std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round");
     roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
     roundNode->getOperator()->setBackend("cpu");
-    AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a rounding node.");    
-    std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); 
-    std::vector<int> inputIndices(nextNodes.size());
-    for (std::size_t i = 0; i < nextNodes.size(); i++)
-        inputIndices[i] = getInputIndex(nextNodes[i], node);
-        
-    for (std::shared_ptr<Node> nextNode : nextNodes)
-        node->removeChild(nextNode, 0);
 
-    node->addChild(roundNode, 0, 0);
-
-    for (std::size_t i = 0; i < nextNodes.size(); i++)
-        roundNode->addChild(nextNodes[i], 0, inputIndices[i]);
-        graphView->add(roundNode);
+    insertNodeBetween(node,roundNode,graphView);
 
     roundNode->attributes()->addAttr("isProducerRounding",0.0);
     return true;
@@ -121,23 +138,9 @@ bool insertScalingBelowProducer(std::shared_ptr<Node> node,double scalingFactor,
     
     scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
     scalingNode->getOperator()->setBackend("cpu");
-    AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a scaling node.");
-    std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); 
-
-    // For each node in nextNodes store the connexion index
-    std::vector<int> inputIndices(nextNodes.size());
-    for (std::size_t i = 0; i < nextNodes.size(); i++)
-        inputIndices[i] = getInputIndex(nextNodes[i], node);
-        
-    for (std::shared_ptr<Node> nextNode : nextNodes)
-        node->removeChild(nextNode, 0);
 
-    node->addChild(scalingNode, 0, 0);
+    insertNodeBetween(node, scalingNode, graphView);
 
-    for (std::size_t i = 0; i < nextNodes.size(); i++)
-        scalingNode->addChild(nextNodes[i], 0, inputIndices[i]);
-
-    graphView->add(scalingNode);
     return true;
 }
 
@@ -374,26 +377,8 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
 
             if (parentNode->getChildren().size() > 0)
             {
-                // SCALING NODE INSERTION
-                
-                // We always have one output from Affine and Add nodes, but possibly multiple childs
-                std::vector<std::shared_ptr<Node>> nextNodes = parentNode->getChildren(0); 
-
-                // For each node in nextNodes store the connexion index
-                std::vector<int> inputIndices(nextNodes.size());
-                for (std::size_t i = 0; i < nextNodes.size(); i++)
-                    inputIndices[i] = getInputIndex(nextNodes[i], parentNode);
-                    
-                for (std::shared_ptr<Node> nextNode : nextNodes)
-                    parentNode->removeChild(nextNode, 0);
-
-                parentNode->addChild(scalingNode, 0, 0);
-
-                for (std::size_t i = 0; i < nextNodes.size(); i++)
-                    scalingNode->addChild(nextNodes[i], 0, inputIndices[i]);
-
+                insertNodeBetween(parentNode,scalingNode,graphView);
                 graphView->add(scalingFactorProducer);
-                graphView->add(scalingNode);
             }
             else
             {
-- 
GitLab


From 00f86582c8a9512175348b2c89449d2c2fff3104 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Tue, 28 Jan 2025 15:17:17 +0000
Subject: [PATCH 18/26] Adding the real quantization pipeline (allowing to
 fully cast a network as Int32 for now)

---
 src/PTQ/PTQ.cpp | 121 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 115 insertions(+), 6 deletions(-)

diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 92049a2..bab8465 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -21,6 +21,7 @@
 #include "aidge/scheduler/Scheduler.hpp"
 #include "aidge/utils/Log.hpp"
 
+#include "aidge/operator/BitShift.hpp"
 #include "aidge/operator/Producer.hpp"
 #include "aidge/operator/Mul.hpp"
 #include "aidge/operator/ReLU.hpp"
@@ -28,6 +29,7 @@
 #include "aidge/operator/Conv.hpp"
 #include "aidge/operator/ArgMax.hpp"
 #include "aidge/operator/Reshape.hpp"
+#include "aidge/operator/Cast.hpp"
 
 
 #include "aidge/recipes/Recipes.hpp"
@@ -58,6 +60,23 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren
         index++;
     return index;
 }
+/*Insert a node inside a graph*/
+bool insertNode(std::shared_ptr<Aidge::Node> baseNode, std::shared_ptr<Aidge::Node> insertNode, std::shared_ptr<GraphView> graphView)
+{
+    std::vector<std::shared_ptr<Node>> nextNodes = baseNode->getChildren(0); 
+    std::vector<int> inputIndices(nextNodes.size());
+    for (std::size_t i = 0; i < nextNodes.size(); i++)
+        inputIndices[i] = getInputIndex(nextNodes[i], baseNode);
+        
+    for (std::shared_ptr<Node> nextNode : nextNodes)
+        baseNode->removeChild(nextNode, 0);
+
+    baseNode->addChild(insertNode, 0, 0);
+
+    for (std::size_t i = 0; i < nextNodes.size(); i++)
+        insertNode->addChild(nextNodes[i], 0, inputIndices[i]);
+    graphView->add(insertNode);
+}
 
 void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff)
 {
@@ -76,9 +95,12 @@ void insertNodeBetween(std::shared_ptr<Node> parent,
                        std::shared_ptr<GraphView> graphView) 
 {
     // Checking the parents always have at least 1 children
-    AIDGE_ASSERT(parent->getChildren().size() > 0, "The parent node must have at least one child to insert a new node.");
-
-    // Retrieve children connection indexes
+    if(parent->getChildren().size() == 0)
+    {
+        parent->addChild(newNode, 0, 0);
+        graphView->add(newNode);
+        return;
+    }
     std::vector<std::shared_ptr<Node>> nextNodes = parent->getChildren(0);
     std::vector<int> inputIndices(nextNodes.size());
     for (std::size_t i = 0; i < nextNodes.size(); i++) {
@@ -99,6 +121,78 @@ void insertNodeBetween(std::shared_ptr<Node> parent,
     graphView->add(newNode);
 }
 
+bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType targetType, bool singleShift)
+{
+    std::vector<std::shared_ptr<Node>> nodeVector;
+    SequentialScheduler scheduler(graphView);
+    scheduler.resetScheduling();
+    scheduler.generateScheduling();
+    nodeVector = scheduler.getStaticScheduling();
+    for (std::shared_ptr<Node> node : nodeVector)
+    {
+        if (node->type() == "Round" && node->attributes()->hasAttr("isProducerRounding"))
+        {
+            std::shared_ptr<Aidge::Node> castNode =  Cast(targetType,node->name() + "_Cast");/*!!*/ /*Change Name (it keeps the round inside)*/
+            castNode->getOperator()->setDataType(targetType); /*!!*/ /*Set DataType on cast?*/
+            castNode->getOperator()->setBackend("cpu");
+            insertNodeBetween(node,castNode,graphView);
+            castNode->attributes()->addAttr("isProducerCasting",0.0);
+            node->getOperator()->setDataType(DataType::Float64);
+
+        }
+        if(node->type() == "Quantizer")
+        {
+            if(singleShift)
+            {
+                double scalingFactor = getScalingFactor(node);
+                int shift = std::log2(scalingFactor);
+                BitShift_Op::BitShiftDirection direction = BitShift_Op::BitShiftDirection::left;
+                if(shift < 0 )
+                {
+                    direction = BitShift_Op::BitShiftDirection::right;
+                    shift = -shift;
+                }
+                Log::warn("Valeur de dÃ©calage est : {}, orignale est {} for node {}",scalingFactor,(int)std::log2(scalingFactor),node->name());
+                std::shared_ptr<Node> bitshiftNode = BitShift(direction,node->name()+"BitShiftQuantizer");
+                std::shared_ptr<Tensor> bitshiftTensor = std::make_shared<Tensor>(Array1D<int, 1> {shift});
+                std::shared_ptr<Node> bitshiftProducer = addProducer(bitshiftNode, 1, {1}, "ScalingFactor"); 
+                bitshiftProducer->getOperator()->setOutput(0, bitshiftTensor);
+
+                bitshiftNode->getOperator()->setDataType(targetType); // getDataType(parentNode)
+                bitshiftNode->getOperator()->setBackend("cpu");
+                graphView->add(bitshiftProducer);
+                graphView->add(bitshiftNode);
+                graphView->replace({node}, {bitshiftProducer,bitshiftNode});
+                graphView->updateInputsOutputs();
+
+            }
+            else
+            {
+                std::shared_ptr<Aidge::Node> castPreNode =  Cast(DataType::Float64,node->name() + "_PreCast");/*!!*/ /*Change Name (it keeps the round inside)*/
+                castPreNode->getOperator()->setBackend("cpu");
+                insertNodeBetween(node->getParent(0),castPreNode,graphView);
+                castPreNode->attributes()->addAttr("isCasting",0.0);
+                castPreNode->getOperator()->setDataType(DataType::Float64); /*!!*/ /*Set DataType on cast?*/
+
+                std::shared_ptr<Aidge::Node> castPostNode =  Cast(targetType,node->name() + "_PostCast");/*!!*/ /*Change Name (it keeps the round inside)*/
+                castPostNode->getOperator()->setBackend("cpu");
+                insertNodeBetween(node,castPostNode,graphView);
+                castPostNode->attributes()->addAttr("isCasting",0.0);
+                castPostNode->getOperator()->setDataType(targetType); /*!!*/ /*Set DataType on cast?*/
+            }
+        }
+        if (node->type() != "Producer" &&
+        node->type() != "Quantizer" &&
+        !node->attributes()->hasAttr("isProducerRounding") && 
+        !node->attributes()->hasAttr("isProducerScaling")) 
+        { 
+            Log::warn("Node set to int is of type: {} and name is {}",node->type(),node->name()); 
+            node->getOperator()->setDataType(targetType);
+        }   
+    }
+    //scheduler.forward();
+    return true;
+}
 bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView)
 {
     std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round");
@@ -1015,6 +1109,8 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
         if (isAffine(node) || (node->type() == "Mul" && node->attributes()->hasAttr("isCompensation")))
         {
             std::shared_ptr<Node> scalingNode = (*node->getChildren().begin());
+            if(scalingNode->attributes()->hasAttr("isCasting"))
+                scalingNode = (*node->getChildren().begin());
 
             double base = getScalingFactor(scalingNode);
 
@@ -1025,14 +1121,14 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
             double ratio = base / approx;
 
             insertScalingBelowProducer(node->getParent(1),ratio,graphView);
-            if (!noQuant)
+            if (!noQuant && !node->getParent(1)->attributes()->hasAttr("isProducerRounding"))
                 insertRoundBelowProducer(node->getParent(1),graphView);
 
             if (nodeHasBias(node))
             {
                 insertScalingBelowProducer(node->getParent(2),ratio,graphView);
 
-                if (!noQuant)
+                if (!noQuant && !node->getParent(1)->attributes()->hasAttr("isProducerRounding"))
                     insertRoundBelowProducer(node->getParent(2),graphView);
             }
         }
@@ -1077,7 +1173,8 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri
             Log::debug("{} range = {}",node->name(),valueRanges[node->name()]);
 }
 
-void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose)
+void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet,
+ Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose)
 {
     Log::info(" === QUANT PTQ 0.2.21 === ");
 
@@ -1124,6 +1221,18 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
         Log::info(" Performing the Single-Shift approximation ...");
         performSingleShiftApproximation(graphView, noQuant);
     }
+    if(true)    /*!!*/  /*Give a name to CAST BOOLEAN*/
+    {
+        AIDGE_ASSERT(!noQuant,"Cannot cast operators with the noQuant(Fake Quantization) flag set to true!")
+        Log::info("Starting to cast operators into the desired type ...");
+        castQuantizedGraph(graphView,DataType::Int32,singleShift);
+    }
+    else
+    {
+        setupDataType(graphView, inputDataSet, initialDataType);
+    }
+    //Mandatory to handle all of the newly added connections!
+    graphView->updateInputsOutputs();
 
     if (verbose)
         printScalingFactors(graphView);
-- 
GitLab


From ebe0fb421524b5ee9b25db3ff7524709569845ed Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Fri, 31 Jan 2025 13:54:11 +0000
Subject: [PATCH 19/26] Full int32 with bitshift pipeline

---
 include/aidge/quantization/PTQ/PTQ.hpp |   3 +-
 python_binding/pybind_PTQ.cpp          |   8 +-
 src/PTQ/PTQ.cpp                        | 119 ++++++++++++++-----------
 3 files changed, 76 insertions(+), 54 deletions(-)

diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp
index e1ef529..3a35017 100644
--- a/include/aidge/quantization/PTQ/PTQ.hpp
+++ b/include/aidge/quantization/PTQ/PTQ.hpp
@@ -166,7 +166,8 @@ namespace Aidge {
      * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights. 
      * @param verbose Whether to print internal informations about the quantization process.
      */
-    void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool applyRounding, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose);
+    void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet,
+     Clipping clippingMode, DataType targetType, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda,bool foldGraph ,bool verbose);
 
     /**
      * @brief Compute the weight ranges of every affine node. Provided for debugging purposes.
diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp
index 61a3cb9..290d59d 100644
--- a/python_binding/pybind_PTQ.cpp
+++ b/python_binding/pybind_PTQ.cpp
@@ -13,7 +13,7 @@
 #include <pybind11/stl.h>
 
 #include <string>
-
+#include "aidge/operator/PTQMetaOps.hpp"
 #include "aidge/quantization/PTQ/Clipping.hpp"
 #include "aidge/quantization/PTQ/CLE.hpp"
 #include "aidge/quantization/PTQ/PTQ.hpp"
@@ -39,6 +39,8 @@ void init_PTQ(py::module &m) {
     :rtype: bool
     )mydelimiter");
 
+    m.def("quantizer",&Quantizer,py::arg("sf"),py::arg("min"),py::arg("max"),py::arg("name"));
+
     m.def("insert_scaling_nodes", &insertScalingNodes, py::arg("network"),
     R"mydelimiter(
     Insert a scaling node after each affine node of the GraphView.
@@ -100,7 +102,9 @@ void init_PTQ(py::module &m) {
     :type verbose: bool
     )mydelimiter");
 
-    m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("clipping_mode") = Clipping::MAX , py::arg("no_quantization") = true, py::arg("optimize_signs") = false, py::arg("single_shift") = false,  py::arg("use_cuda") = false, py::arg("verbose") = false,
+    m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), 
+    py::arg("clipping_mode") = Clipping::MAX ,py::arg("target_type") = DataType::Float64 ,py::arg("no_quantization") = true, py::arg("optimize_signs") = false,
+    py::arg("single_shift") = false,  py::arg("use_cuda") = false, py::arg("fold_graph") = true, py::arg("verbose") = false,
     R"mydelimiter(
     Main quantization routine. Performs every step of the quantization pipeline.
     :param network: The GraphView to be quantized.
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index bab8465..069a3d8 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -60,23 +60,6 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren
         index++;
     return index;
 }
-/*Insert a node inside a graph*/
-bool insertNode(std::shared_ptr<Aidge::Node> baseNode, std::shared_ptr<Aidge::Node> insertNode, std::shared_ptr<GraphView> graphView)
-{
-    std::vector<std::shared_ptr<Node>> nextNodes = baseNode->getChildren(0); 
-    std::vector<int> inputIndices(nextNodes.size());
-    for (std::size_t i = 0; i < nextNodes.size(); i++)
-        inputIndices[i] = getInputIndex(nextNodes[i], baseNode);
-        
-    for (std::shared_ptr<Node> nextNode : nextNodes)
-        baseNode->removeChild(nextNode, 0);
-
-    baseNode->addChild(insertNode, 0, 0);
-
-    for (std::size_t i = 0; i < nextNodes.size(); i++)
-        insertNode->addChild(nextNodes[i], 0, inputIndices[i]);
-    graphView->add(insertNode);
-}
 
 void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff)
 {
@@ -85,6 +68,7 @@ void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff)
     auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1);
     std::shared_ptr<Tensor> fallback;
     const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
+    
     double previousScalingFactor = localTensor.get<double>(0);
     std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff});
     node->input(1).first->getOperator()->setOutput(0, finalTensor);
@@ -121,26 +105,36 @@ void insertNodeBetween(std::shared_ptr<Node> parent,
     graphView->add(newNode);
 }
 
-bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType targetType, bool singleShift)
+void applyConstFold(std::shared_ptr<GraphView> &graphView)
 {
-    std::vector<std::shared_ptr<Node>> nodeVector;
-    SequentialScheduler scheduler(graphView);
-    scheduler.resetScheduling();
-    scheduler.generateScheduling();
-    nodeVector = scheduler.getStaticScheduling();
+    for (const std::shared_ptr<Node> node : graphView->getNodes())
+    {
+        if (node->type() == "Producer" )
+        {
+            const auto& producer = std::static_pointer_cast<Producer_Op>(node->getOperator());
+            producer->constant() = true;
+        }
+    }
+    constantFolding(graphView);
+}
+
+bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType targetType, bool singleShift, bool MP/*Rename*/)
+{
+    //We need a deepcopy of the graphs nodes since we will replace some nodes
+    std::vector<std::shared_ptr<Node>> nodeVector(graphView->getNodes().begin(), graphView->getNodes().end());
+
     for (std::shared_ptr<Node> node : nodeVector)
     {
         if (node->type() == "Round" && node->attributes()->hasAttr("isProducerRounding"))
         {
             std::shared_ptr<Aidge::Node> castNode =  Cast(targetType,node->name() + "_Cast");/*!!*/ /*Change Name (it keeps the round inside)*/
-            castNode->getOperator()->setDataType(targetType); /*!!*/ /*Set DataType on cast?*/
-            castNode->getOperator()->setBackend("cpu");
+            castNode->getOperator()->setDataType(targetType);
+            castNode->getOperator()->setBackend(node->getOperator()->backend());
             insertNodeBetween(node,castNode,graphView);
             castNode->attributes()->addAttr("isProducerCasting",0.0);
             node->getOperator()->setDataType(DataType::Float64);
-
         }
-        if(node->type() == "Quantizer")
+        else if(node->type() == "Quantizer")
         {
             if(singleShift)
             {
@@ -152,45 +146,51 @@ bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType t
                     direction = BitShift_Op::BitShiftDirection::right;
                     shift = -shift;
                 }
-                Log::warn("Valeur de dÃ©calage est : {}, orignale est {} for node {}",scalingFactor,(int)std::log2(scalingFactor),node->name());
-                std::shared_ptr<Node> bitshiftNode = BitShift(direction,node->name()+"BitShiftQuantizer");
+                std::shared_ptr<Node> bitshiftNode = BitShift(direction,node->name()+"_BitShift_Quantizer");
                 std::shared_ptr<Tensor> bitshiftTensor = std::make_shared<Tensor>(Array1D<int, 1> {shift});
                 std::shared_ptr<Node> bitshiftProducer = addProducer(bitshiftNode, 1, {1}, "ScalingFactor"); 
                 bitshiftProducer->getOperator()->setOutput(0, bitshiftTensor);
+                bitshiftProducer->getOperator()->setDataType(DataType::Int32); 
 
-                bitshiftNode->getOperator()->setDataType(targetType); // getDataType(parentNode)
-                bitshiftNode->getOperator()->setBackend("cpu");
+
+                bitshiftNode->getOperator()->setDataType(targetType); 
+                bitshiftNode->getOperator()->setBackend(node->getOperator()->backend());
                 graphView->add(bitshiftProducer);
                 graphView->add(bitshiftNode);
                 graphView->replace({node}, {bitshiftProducer,bitshiftNode});
                 graphView->updateInputsOutputs();
 
             }
-            else
+            else //If single shift is not enabled we keep using the Float Quantizer Metaoperator so we need to cast before and after each quantizer
             {
-                std::shared_ptr<Aidge::Node> castPreNode =  Cast(DataType::Float64,node->name() + "_PreCast");/*!!*/ /*Change Name (it keeps the round inside)*/
-                castPreNode->getOperator()->setBackend("cpu");
-                insertNodeBetween(node->getParent(0),castPreNode,graphView);
-                castPreNode->attributes()->addAttr("isCasting",0.0);
-                castPreNode->getOperator()->setDataType(DataType::Float64); /*!!*/ /*Set DataType on cast?*/
+                //we need this check since we dont want to add a second PRECAST 
+                if(!node->getParent(0)->attributes()->hasAttr("isCasting"))
+                {
+                    std::shared_ptr<Aidge::Node> castPreNode =  Cast(DataType::Float64,node->name() + "_PreCast");/*!!*/ /*Change Name (it keeps the round inside)*/
+                    castPreNode->getOperator()->setBackend(node->getOperator()->backend());
+                    node->addParent(castPreNode,0);
+                    castPreNode->attributes()->addAttr("isCasting",0.0);
+                    castPreNode->getOperator()->setDataType(DataType::Float64); 
+                }
 
                 std::shared_ptr<Aidge::Node> castPostNode =  Cast(targetType,node->name() + "_PostCast");/*!!*/ /*Change Name (it keeps the round inside)*/
-                castPostNode->getOperator()->setBackend("cpu");
+                castPostNode->getOperator()->setBackend(node->getOperator()->backend());
                 insertNodeBetween(node,castPostNode,graphView);
                 castPostNode->attributes()->addAttr("isCasting",0.0);
-                castPostNode->getOperator()->setDataType(targetType); /*!!*/ /*Set DataType on cast?*/
+                castPostNode->getOperator()->setDataType(targetType); 
             }
         }
-        if (node->type() != "Producer" &&
-        node->type() != "Quantizer" &&
-        !node->attributes()->hasAttr("isProducerRounding") && 
+        else if (node->type() != "Producer" &&
         !node->attributes()->hasAttr("isProducerScaling")) 
-        { 
-            Log::warn("Node set to int is of type: {} and name is {}",node->type(),node->name()); 
+        {              
             node->getOperator()->setDataType(targetType);
+            if(isAffine(node))
+            {
+                node->getOperator()->setDataType(DataType::Int32);
+                //node->getParent(2)->getOperator()->setDataType(DataType::Int32);
+            }
         }   
     }
-    //scheduler.forward();
     return true;
 }
 bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView)
@@ -220,7 +220,7 @@ bool insertScalingBelowProducer(std::shared_ptr<Node> node,double scalingFactor,
         return true;
     }
     AIDGE_ASSERT(node->type() == "Producer","Cannot apply a scaling factor on node of type: {} which is not a producer", node->type());
-    std::string scalingNodeName = makeUniqueName(node->name() + "_ProducerScaling", graphView);
+    std::string scalingNodeName = makeUniqueName(node->name() + "_Producer_Scaling", graphView);
     
     std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName);
     scalingNode->attributes()->addAttr("isProducerScaling",0.0);
@@ -1174,7 +1174,7 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri
 }
 
 void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet,
- Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose)
+ Clipping clippingMode, DataType targetType,bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool foldGraph ,bool verbose)
 {
     Log::info(" === QUANT PTQ 0.2.21 === ");
 
@@ -1221,17 +1221,36 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
         Log::info(" Performing the Single-Shift approximation ...");
         performSingleShiftApproximation(graphView, noQuant);
     }
-    if(true)    /*!!*/  /*Give a name to CAST BOOLEAN*/
+    if(targetType != DataType::Float64)    /*!!*/  /*Give a name to CAST BOOLEAN*/
     {
         AIDGE_ASSERT(!noQuant,"Cannot cast operators with the noQuant(Fake Quantization) flag set to true!")
         Log::info("Starting to cast operators into the desired type ...");
-        castQuantizedGraph(graphView,DataType::Int32,singleShift);
+        castQuantizedGraph(graphView,DataType::Int32,singleShift,true);
+        for (auto h :graphView->getNodes())
+        {
+            if(h->name() == "resnetv15_conv0_fwd_1" || h->name() == "fc0_Gemm_1" )
+            {
+                    std::shared_ptr<Aidge::Node> castPreNode =  Cast(DataType::Int32,h->name() + "_AJOUT");
+                    castPreNode->getOperator()->setBackend(h->getOperator()->backend());
+                    castPreNode->getOperator()->setDataType(DataType::Int32);
+                    castPreNode->addChild(h,0,0);
+                    graphView->add(castPreNode);
+            }
+        }
     }
     else
     {
         setupDataType(graphView, inputDataSet, initialDataType);
     }
+    
     //Mandatory to handle all of the newly added connections!
+   // graphView->updateInputsOutputs();
+
+    if(true)
+    {
+        Log::info("Applying constant folding recipe to the graph ...");
+        applyConstFold(graphView);
+    }
     graphView->updateInputsOutputs();
 
     if (verbose)
@@ -1240,8 +1259,6 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     //Log::debug(" === SCALINGS (BEFORE CAST) ===");
     //printScalingFactors(graphView);
 
-    setupDataType(graphView, inputDataSet, initialDataType);
-
     if (useCuda)
         graphView->setBackend("cuda");
 
-- 
GitLab


From 2be85f14c8c5aa73cd9e037a07ff8912404e1a24 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Wed, 5 Feb 2025 16:34:49 +0000
Subject: [PATCH 20/26] Adding Fully functional Cast to the desired type in the
 PTQ pipeline

---
 include/aidge/operator/PTQMetaOps.hpp |  14 ++++
 src/PTQ/PTQ.cpp                       | 113 +++++++++-----------------
 src/operator/PTQMetaOps.cpp           |  59 ++++++++++++--
 3 files changed, 106 insertions(+), 80 deletions(-)

diff --git a/include/aidge/operator/PTQMetaOps.hpp b/include/aidge/operator/PTQMetaOps.hpp
index 22fb71e..58571e0 100644
--- a/include/aidge/operator/PTQMetaOps.hpp
+++ b/include/aidge/operator/PTQMetaOps.hpp
@@ -37,6 +37,20 @@ namespace Aidge {
 /// @return A shared pointer to an instance of the meta-operator node.
 std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name);
 
+/// @brief IntQuantizer acts as an extension of the Quantizer meta-operator, enabling seamless integration 
+///        into computation graphs with a data type other than Float while preserving floating-point precision.
+/// 
+/// This operator modifies the provided Quantizer by inserting explicit casting operations before and after 
+/// the quantization process. It first casts the input to Float64, applies the quantization steps (Mul, Clip, Round), 
+/// and then casts the result back to the target data type. This ensures compatibility with integer-based computation graphs 
+/// while maintaining the precision of floating-point operations.
+///
+/// @param oldQuantizer A shared pointer to the existing Quantizer node that will be adapted.
+/// @param targetType The target data type to which the final output should be cast after the quantization process.
+/// @param name The name of the meta-operator node created.
+/// @return A shared pointer to a new instance of the modified meta-operator node.
+std::shared_ptr<Node> IntQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name);
+
 /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter.
 /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation.
 /// The meta-operator node must be a PTQ-specific operator, such as a Quantizer or Scaling node.
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 069a3d8..a9ac176 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -117,8 +117,8 @@ void applyConstFold(std::shared_ptr<GraphView> &graphView)
     }
     constantFolding(graphView);
 }
-
-bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType targetType, bool singleShift, bool MP/*Rename*/)
+//Add a condition to insert Cast Node to cast User Input Data into the desired type
+bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType targetType, bool singleShift)
 {
     //We need a deepcopy of the graphs nodes since we will replace some nodes
     std::vector<std::shared_ptr<Node>> nodeVector(graphView->getNodes().begin(), graphView->getNodes().end());
@@ -127,7 +127,7 @@ bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType t
     {
         if (node->type() == "Round" && node->attributes()->hasAttr("isProducerRounding"))
         {
-            std::shared_ptr<Aidge::Node> castNode =  Cast(targetType,node->name() + "_Cast");/*!!*/ /*Change Name (it keeps the round inside)*/
+            std::shared_ptr<Aidge::Node> castNode =  Cast(targetType,node->name() + "_Cast");
             castNode->getOperator()->setDataType(targetType);
             castNode->getOperator()->setBackend(node->getOperator()->backend());
             insertNodeBetween(node,castNode,graphView);
@@ -138,6 +138,7 @@ bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType t
         {
             if(singleShift)
             {
+                //If single shift is enabled we must replace each Quantizer by a bitShift
                 double scalingFactor = getScalingFactor(node);
                 int shift = std::log2(scalingFactor);
                 BitShift_Op::BitShiftDirection direction = BitShift_Op::BitShiftDirection::left;
@@ -150,8 +151,7 @@ bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType t
                 std::shared_ptr<Tensor> bitshiftTensor = std::make_shared<Tensor>(Array1D<int, 1> {shift});
                 std::shared_ptr<Node> bitshiftProducer = addProducer(bitshiftNode, 1, {1}, "ScalingFactor"); 
                 bitshiftProducer->getOperator()->setOutput(0, bitshiftTensor);
-                bitshiftProducer->getOperator()->setDataType(DataType::Int32); 
-
+                bitshiftProducer->getOperator()->setDataType(targetType); 
 
                 bitshiftNode->getOperator()->setDataType(targetType); 
                 bitshiftNode->getOperator()->setBackend(node->getOperator()->backend());
@@ -161,34 +161,17 @@ bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType t
                 graphView->updateInputsOutputs();
 
             }
-            else //If single shift is not enabled we keep using the Float Quantizer Metaoperator so we need to cast before and after each quantizer
+            else //If single shift is not enabled we keep using the alternative Int Quantizer (which cast the data before and after the regular Quantizer Operations) 
             {
-                //we need this check since we dont want to add a second PRECAST 
-                if(!node->getParent(0)->attributes()->hasAttr("isCasting"))
-                {
-                    std::shared_ptr<Aidge::Node> castPreNode =  Cast(DataType::Float64,node->name() + "_PreCast");/*!!*/ /*Change Name (it keeps the round inside)*/
-                    castPreNode->getOperator()->setBackend(node->getOperator()->backend());
-                    node->addParent(castPreNode,0);
-                    castPreNode->attributes()->addAttr("isCasting",0.0);
-                    castPreNode->getOperator()->setDataType(DataType::Float64); 
-                }
-
-                std::shared_ptr<Aidge::Node> castPostNode =  Cast(targetType,node->name() + "_PostCast");/*!!*/ /*Change Name (it keeps the round inside)*/
-                castPostNode->getOperator()->setBackend(node->getOperator()->backend());
-                insertNodeBetween(node,castPostNode,graphView);
-                castPostNode->attributes()->addAttr("isCasting",0.0);
-                castPostNode->getOperator()->setDataType(targetType); 
+                std::shared_ptr<Node> newQuantizer = IntQuantizer(node,targetType,node->name());
+                newQuantizer->getOperator()->setBackend(node->getOperator()->backend());
+                graphView->replace({node},{newQuantizer});
             }
         }
         else if (node->type() != "Producer" &&
         !node->attributes()->hasAttr("isProducerScaling")) 
         {              
             node->getOperator()->setDataType(targetType);
-            if(isAffine(node))
-            {
-                node->getOperator()->setDataType(DataType::Int32);
-                //node->getParent(2)->getOperator()->setDataType(DataType::Int32);
-            }
         }   
     }
     return true;
@@ -358,9 +341,9 @@ std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView>
 
     if (verbose) 
     {
-        Log::info("NB OF NODES = {}", nodeVector.size());
+        Log::notice("NB OF NODES = {}", nodeVector.size());
         for (std::shared_ptr<Node> node : nodeVector)
-            Log::info("{} {}", node->type(), node->name());
+            Log::notice("{} {}", node->type(), node->name());
     }
 
     return nodeVector;    
@@ -419,8 +402,8 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView)
                 if (parentIsForking)
                 {
                     // temporary verbose ...
-                    Log::info(" ### found residual branch at index {}", i);
-                    Log::info(" ### inserting multiplicative node ...");
+                    Log::notice(" ### found residual branch at index {}", i);
+                    Log::notice(" ### inserting multiplicative node ...");
 
                     std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView);
                     std::shared_ptr<Node> residualNode = Mul(residualNodeName);
@@ -476,7 +459,7 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
             }
             else
             {
-                // Log::info(" last node reached ! ");
+                // Log::notice(" last node reached ! ");
                 parentNode->addChild(scalingNode, 0, 0);
                 graphView->add(scalingFactorProducer);
                 graphView->add(scalingNode);
@@ -654,7 +637,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView
 
     for (std::shared_ptr<Tensor> sample : inputDataSet)
     {
-        //Log::info(" IT : {}", it++);
+        //Log::notice(" IT : {}", it++);
 
         // Inference ...
 
@@ -797,7 +780,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
                 double rescaling = mergingNodeScaling / maxScaling;
 
                 std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode);
-                //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name());
+                //Log::notice(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name());
                 
                 multiplyScalingFactor(scalingNode,rescaling) ;          
             }
@@ -913,9 +896,9 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap
 
     if (verbose)
     {
-        Log::info(" === SIGN MAP === ");
+        Log::notice(" === SIGN MAP === ");
         for (std::shared_ptr<Node> node : nodeVector)
-            Log::info(" {}{} | {}", static_cast<int>(signMap[node->name()].first), static_cast<int>(signMap[node->name()].second), node->name());
+            Log::notice(" {}{} | {}", static_cast<int>(signMap[node->name()].first), static_cast<int>(signMap[node->name()].second), node->name());
     }
 
     // SANITY CHECK (TEMPORARY)
@@ -1137,12 +1120,12 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
 
 static void printScalingFactors(std::shared_ptr<GraphView> graphView)
 {
-    Log::info(" === SCALING FACTORS === ");
+    Log::notice(" === SCALING FACTORS === ");
     for (auto node : retrieveNodeVector(graphView))
         if (node->attributes()->hasAttr("isScaling") || node->type() == "Quantizer")
         {
             double scalingFactor = getScalingFactor(node);
-            Log::info(" {:.6f} ({})", scalingFactor, node->name());
+            Log::notice(" {:.6f} ({})", scalingFactor, node->name());
         }
 }
 
@@ -1176,7 +1159,7 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri
 void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet,
  Clipping clippingMode, DataType targetType,bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool foldGraph ,bool verbose)
 {
-    Log::info(" === QUANT PTQ 0.2.21 === ");
+    Log::notice(" === QUANT PTQ 0.2.21 === ");
 
     graphView->setBackend("cpu");
 
@@ -1186,90 +1169,74 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     if (!checkArchitecture(graphView))
         return;
 
-    Log::info(" Preparing the network for the PTQ ... ");
+    Log::notice(" Preparing the network for the PTQ ... ");
     prepareNetwork(graphView);
 
-    Log::info(" Inserting the scaling nodes ...");
+    Log::notice(" Inserting the scaling nodes ...");
     insertScalingNodes(graphView);
 
     crossLayerEqualization(graphView);
-    Log::info(" Normalizing the parameters ...");
+    Log::notice(" Normalizing the parameters ...");
     normalizeParameters(graphView);
 
-    Log::info(" Computing the value ranges ...");
+    Log::notice(" Computing the value ranges ...");
     std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda);
 
     //Log:debug("=== RANGES (BEFORE ADJUST) ===");
     //printRanges(graphView, valueRanges);
 
-    Log::info(" Optimizing the clipping values ...");
+    Log::notice(" Optimizing the clipping values ...");
     valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose);
 
     //Log:debug("=== RANGES (AFTER ADJUST) ===");
     //printRanges(graphView, valueRanges);
-    Log::info(" Normalizing the activations ...");
+    Log::notice(" Normalizing the activations ...");
     normalizeActivations(graphView, valueRanges);
 
-    Log::info(" Quantizing the normalized network ...");
+    Log::notice(" Quantizing the normalized network ...");
     quantizeNormalizedNetwork(graphView, nbBits, noQuant, optimizeSigns, verbose);
 
     if (singleShift)
     {
-        Log::info( " Inserting the compensation nodes ...");
+        Log::notice( " Inserting the compensation nodes ...");
         insertCompensationNodes(graphView, nbBits);
 
-        Log::info(" Performing the Single-Shift approximation ...");
+        Log::notice(" Performing the Single-Shift approximation ...");
         performSingleShiftApproximation(graphView, noQuant);
     }
-    if(targetType != DataType::Float64)    /*!!*/  /*Give a name to CAST BOOLEAN*/
+
+    if(targetType != DataType::Float64) //!!  Adapt the condition 
     {
         AIDGE_ASSERT(!noQuant,"Cannot cast operators with the noQuant(Fake Quantization) flag set to true!")
-        Log::info("Starting to cast operators into the desired type ...");
-        castQuantizedGraph(graphView,DataType::Int32,singleShift,true);
-        for (auto h :graphView->getNodes())
-        {
-            if(h->name() == "resnetv15_conv0_fwd_1" || h->name() == "fc0_Gemm_1" )
-            {
-                    std::shared_ptr<Aidge::Node> castPreNode =  Cast(DataType::Int32,h->name() + "_AJOUT");
-                    castPreNode->getOperator()->setBackend(h->getOperator()->backend());
-                    castPreNode->getOperator()->setDataType(DataType::Int32);
-                    castPreNode->addChild(h,0,0);
-                    graphView->add(castPreNode);
-            }
-        }
+        Log::notice("Starting to cast operators into the desired type ...");
+        castQuantizedGraph(graphView,DataType::Int32,singleShift);
     }
     else
     {
         setupDataType(graphView, inputDataSet, initialDataType);
     }
-    
-    //Mandatory to handle all of the newly added connections!
-   // graphView->updateInputsOutputs();
 
-    if(true)
+    if(foldGraph)
     {
-        Log::info("Applying constant folding recipe to the graph ...");
+        Log::notice("Applying constant folding recipe to the graph ...");
         applyConstFold(graphView);
     }
+    //Mandatory to handle all of the newly added connections!
     graphView->updateInputsOutputs();
 
     if (verbose)
         printScalingFactors(graphView);
 
-    //Log::debug(" === SCALINGS (BEFORE CAST) ===");
-    //printScalingFactors(graphView);
 
     if (useCuda)
         graphView->setBackend("cuda");
 
-    //Log::debug(" === SCALINGS (AFTER CAST) ===");
-    //printScalingFactors(graphView);
-
-    Log::info(" Reseting the scheduler ...");
+    Log::notice(" Reseting the scheduler ...");
     SequentialScheduler scheduler(graphView);
     scheduler.resetScheduling();
 
-    Log::info(" Network is quantized !");
+    Log::notice(" Network is quantized !");
+
 }
 
 std::map<std::string, double> getWeightRanges(std::shared_ptr<GraphView> graphView)
diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp
index 105d4e8..f3ddbcf 100644
--- a/src/operator/PTQMetaOps.cpp
+++ b/src/operator/PTQMetaOps.cpp
@@ -19,6 +19,7 @@
 #include "aidge/operator/Clip.hpp"
 #include "aidge/operator/Mul.hpp"
 #include "aidge/operator/Round.hpp"
+#include "aidge/operator/Cast.hpp"
 
 #include "aidge/graph/Node.hpp"
 #include "aidge/graph/OpArgs.hpp"
@@ -34,6 +35,15 @@
 
 namespace Aidge 
 {
+static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType)
+{
+    std::shared_ptr<Node> mulNode = nullptr;
+    for(std::shared_ptr<Node> node : graphView->getNodes())
+        if (node->type() == nodeType)
+            mulNode = node;
+
+    return mulNode;
+}
 
 std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name)
 {
@@ -61,16 +71,51 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli
     return metaopNode; 
 }
 
-static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType)
+std::shared_ptr<Node> IntQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name)
 {
-    std::shared_ptr<Node> mulNode = nullptr;
-    for(std::shared_ptr<Node> node : graphView->getNodes())
-        if (node->type() == nodeType)
-            mulNode = node;
+    double scalingFactor = getScalingFactor(oldQuantizer);
 
-    return mulNode;
-}
+    std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op> (oldQuantizer->getOperator());
+    std::shared_ptr<Node> oldclipNode = getSubNode(metaOp->getMicroGraph(), "Clip");
+
+    if (!oldclipNode) {
+    Log::warn("Invalid PTQ MetaOperator, no Clip found inside node of type {}", oldQuantizer->type());
+        return nullptr;
+    }
+    std::shared_ptr<Clip_Op> clipOp = std::static_pointer_cast<Clip_Op>(oldclipNode->getOperator());
+
+    std::shared_ptr<Node> castPreNode =  Cast(DataType::Float64,((!name.empty()) ? name + "_PreCast" : ""));
 
+    std::shared_ptr<Node> mulNode =  Mul((!name.empty()) ? name + "_MulIQuant" : "");
+    std::shared_ptr<Node> roundNode = Round((!name.empty()) ? name + "_IRoundQuant" : "");
+    std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_IClipQuant" : "", clipOp->min(), clipOp->max());
+
+    std::shared_ptr<Node> castPostNode =  Cast(targetType,((!name.empty()) ? name + "_PostCast" : ""));
+
+    // connect the scaling factor producer
+
+    castPreNode->getOperator()->setDataType(DataType::Float64);
+    mulNode->getOperator()->setDataType(DataType::Float64);
+    roundNode->getOperator()->setDataType(DataType::Float64);
+    clipNode->getOperator()->setDataType(DataType::Float64);
+
+    castPostNode->getOperator()->setDataType(targetType);
+
+    std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
+    std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); 
+    scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
+    
+    // create the metaop graph
+
+    std::shared_ptr<GraphView> graphView = Sequential({castPreNode, mulNode, roundNode, clipNode, castPostNode});
+    std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ???
+
+    // return the metaop 
+
+    std::shared_ptr<Node> metaopNode = MetaOperator("IntQuantizer", connectedGraphView, {}, name); // XXX alternative prototype
+
+    return metaopNode; 
+}
 
 
 void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor)
-- 
GitLab


From 9289d0c48b0683080b53a9551a7d01f031cbafc4 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Mon, 17 Feb 2025 09:22:53 +0000
Subject: [PATCH 21/26] Changing the way we decide if one apply real cast to
 the PTQ or not

---
 src/PTQ/PTQ.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index a9ac176..d445624 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -1157,7 +1157,7 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri
 }
 
 void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet,
- Clipping clippingMode, DataType targetType,bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool foldGraph ,bool verbose)
+ Clipping clippingMode, DataType targetType,bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool foldGraph, bool verbose)
 {
     Log::notice(" === QUANT PTQ 0.2.21 === ");
 
@@ -1195,7 +1195,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
 
     Log::notice(" Quantizing the normalized network ...");
     quantizeNormalizedNetwork(graphView, nbBits, noQuant, optimizeSigns, verbose);
-
+    
     if (singleShift)
     {
         Log::notice( " Inserting the compensation nodes ...");
@@ -1204,8 +1204,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
         Log::notice(" Performing the Single-Shift approximation ...");
         performSingleShiftApproximation(graphView, noQuant);
     }
-
-    if(targetType != DataType::Float64) //!!  Adapt the condition 
+    if(targetType != DataType::Float64 && targetType != DataType::Float32 && targetType != DataType::Float16) 
     {
         AIDGE_ASSERT(!noQuant,"Cannot cast operators with the noQuant(Fake Quantization) flag set to true!")
         Log::notice("Starting to cast operators into the desired type ...");
@@ -1213,7 +1212,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     }
     else
     {
-        setupDataType(graphView, inputDataSet, initialDataType);
+        setupDataType(graphView, inputDataSet, targetType);
     }
 
     if(foldGraph)
-- 
GitLab


From f891fc6051aa0d5e5be33084a47405ad7839458d Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Mon, 17 Feb 2025 13:44:54 +0000
Subject: [PATCH 22/26] Adding BitShiftQuantizer MetaOperator that clip the
 value after the bitshift, during the inference with int32 (and ssa)

---
 include/aidge/operator/PTQMetaOps.hpp | 14 +++++++++
 src/PTQ/PTQ.cpp                       | 33 +++++---------------
 src/operator/PTQMetaOps.cpp           | 45 +++++++++++++++++++++++++++
 3 files changed, 67 insertions(+), 25 deletions(-)

diff --git a/include/aidge/operator/PTQMetaOps.hpp b/include/aidge/operator/PTQMetaOps.hpp
index 58571e0..a65e4d5 100644
--- a/include/aidge/operator/PTQMetaOps.hpp
+++ b/include/aidge/operator/PTQMetaOps.hpp
@@ -51,6 +51,20 @@ std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, dou
 /// @return A shared pointer to a new instance of the modified meta-operator node.
 std::shared_ptr<Node> IntQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name);
 
+/// @brief BitShiftQuantizer acts as an extension of the Quantizer meta-operator, enabling seamless integration 
+///        into computation graphs with a data type other than Float while preserving floating-point precision.
+/// 
+/// This operator modifies the provided Quantizer by inserting explicit casting operations before and after 
+/// the quantization process. It first casts the input to Float64, applies the quantization steps (Mul, Clip, Round), 
+/// and then casts the result back to the target data type. This ensures compatibility with integer-based computation graphs 
+/// while maintaining the precision of floating-point operations.
+///
+/// @param oldQuantizer A shared pointer to the existing Quantizer node that will be adapted.
+/// @param targetType The target data type to which the final output should be cast after the quantization process.
+/// @param name The name of the meta-operator node created.
+/// @return A shared pointer to a new instance of the modified meta-operator node.
+std::shared_ptr<Node> BitShiftQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name);
+
 /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter.
 /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation.
 /// The meta-operator node must be a PTQ-specific operator, such as a Quantizer or Scaling node.
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index d445624..f9a34ad 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -138,34 +138,16 @@ bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType t
         {
             if(singleShift)
             {
-                //If single shift is enabled we must replace each Quantizer by a bitShift
-                double scalingFactor = getScalingFactor(node);
-                int shift = std::log2(scalingFactor);
-                BitShift_Op::BitShiftDirection direction = BitShift_Op::BitShiftDirection::left;
-                if(shift < 0 )
-                {
-                    direction = BitShift_Op::BitShiftDirection::right;
-                    shift = -shift;
-                }
-                std::shared_ptr<Node> bitshiftNode = BitShift(direction,node->name()+"_BitShift_Quantizer");
-                std::shared_ptr<Tensor> bitshiftTensor = std::make_shared<Tensor>(Array1D<int, 1> {shift});
-                std::shared_ptr<Node> bitshiftProducer = addProducer(bitshiftNode, 1, {1}, "ScalingFactor"); 
-                bitshiftProducer->getOperator()->setOutput(0, bitshiftTensor);
-                bitshiftProducer->getOperator()->setDataType(targetType); 
-
-                bitshiftNode->getOperator()->setDataType(targetType); 
-                bitshiftNode->getOperator()->setBackend(node->getOperator()->backend());
-                graphView->add(bitshiftProducer);
-                graphView->add(bitshiftNode);
-                graphView->replace({node}, {bitshiftProducer,bitshiftNode});
-                graphView->updateInputsOutputs();
+                std::shared_ptr<Node> newBitShiftQuantizer = BitShiftQuantizer(node,targetType,node->name()+"_BitShift_Quantizer");
+                newBitShiftQuantizer->getOperator()->setBackend(node->getOperator()->backend());
+                graphView->replace({node},{newBitShiftQuantizer});
 
             }
             else //If single shift is not enabled we keep using the alternative Int Quantizer (which cast the data before and after the regular Quantizer Operations) 
             {
-                std::shared_ptr<Node> newQuantizer = IntQuantizer(node,targetType,node->name());
-                newQuantizer->getOperator()->setBackend(node->getOperator()->backend());
-                graphView->replace({node},{newQuantizer});
+                std::shared_ptr<Node> newIntQuantizer = IntQuantizer(node,targetType,node->name());
+                newIntQuantizer->getOperator()->setBackend(node->getOperator()->backend());
+                graphView->replace({node},{newIntQuantizer});
             }
         }
         else if (node->type() != "Producer" &&
@@ -1206,9 +1188,10 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     }
     if(targetType != DataType::Float64 && targetType != DataType::Float32 && targetType != DataType::Float16) 
     {
-        AIDGE_ASSERT(!noQuant,"Cannot cast operators with the noQuant(Fake Quantization) flag set to true!")
+        AIDGE_ASSERT(!noQuant,"Cannot cast operators with the noQuant (Fake Quantization) flag set to true!")
         Log::notice("Starting to cast operators into the desired type ...");
         castQuantizedGraph(graphView,DataType::Int32,singleShift);
+        // Method to set all InputTensor as nullptr
     }
     else
     {
diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp
index f3ddbcf..6b44366 100644
--- a/src/operator/PTQMetaOps.cpp
+++ b/src/operator/PTQMetaOps.cpp
@@ -20,6 +20,7 @@
 #include "aidge/operator/Mul.hpp"
 #include "aidge/operator/Round.hpp"
 #include "aidge/operator/Cast.hpp"
+#include "aidge/operator/BitShift.hpp"
 
 #include "aidge/graph/Node.hpp"
 #include "aidge/graph/OpArgs.hpp"
@@ -70,7 +71,51 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli
 
     return metaopNode; 
 }
+std::shared_ptr<Node> BitShiftQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name)
+{
+    double scalingFactor = getScalingFactor(oldQuantizer);
+
+    std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op> (oldQuantizer->getOperator());
+    std::shared_ptr<Node> oldclipNode = getSubNode(metaOp->getMicroGraph(), "Clip");
+
+    if (!oldclipNode) {
+    Log::warn("Invalid PTQ MetaOperator, no Clip found inside node of type {}", oldQuantizer->type());
+        return nullptr;
+    }
+
+    std::shared_ptr<Clip_Op> clipOp = std::static_pointer_cast<Clip_Op>(oldclipNode->getOperator());
+    int shift = std::log2(scalingFactor);
+    BitShift_Op::BitShiftDirection direction = BitShift_Op::BitShiftDirection::left;
+
+    if(shift < 0 )
+    {
+        direction = BitShift_Op::BitShiftDirection::right;
+        shift = -shift;
+    }
 
+    std::shared_ptr<Node> bitShiftNode = BitShift(direction,(!name.empty()) ? name + "_MulIQuant" : "");
+    std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_IClipQuant" : "", clipOp->min(), clipOp->max());
+
+    std::shared_ptr<Tensor> bitshiftTensor = std::make_shared<Tensor>(Array1D<int, 1> {shift});
+    std::shared_ptr<Node> bitshiftProducer = addProducer(bitShiftNode, 1, {1}, "ScalingFactor"); 
+    bitshiftProducer->getOperator()->setOutput(0, bitshiftTensor);
+    bitshiftProducer->getOperator()->setDataType(targetType); 
+
+    // connect the scaling factor producer
+
+    bitShiftNode->getOperator()->setDataType(targetType);
+    clipNode->getOperator()->setDataType(targetType);
+    
+    // create the metaop graph
+
+    std::shared_ptr<GraphView> graphView = Sequential({bitShiftNode,clipNode});
+    std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(bitShiftNode); // XXX why not use the graphView ???
+
+    // return the metaop 
+    std::shared_ptr<Node> metaopNode = MetaOperator("BitShiftQuantizer", connectedGraphView, {}, name); // XXX alternative prototype
+
+    return metaopNode; 
+}
 std::shared_ptr<Node> IntQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name)
 {
     double scalingFactor = getScalingFactor(oldQuantizer);
-- 
GitLab


From 35e30401735eea7109c45c1636ea2ee735c6d4c5 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Mon, 17 Feb 2025 15:53:42 +0000
Subject: [PATCH 23/26] updating gitignore

---
 .gitignore | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index ba5c593..c64cbb5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,8 +5,10 @@
 build*/
 install*/
 include/aidge/backend/quantization_version.h
+include/aidge/quantization_version.h
 
-# VSCode
+
+# VSCodes
 .vscode
 
 # Python
-- 
GitLab


From b627ea1fc1e85155a18f077c09d42207f22c4bd2 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Thu, 20 Feb 2025 10:54:31 +0000
Subject: [PATCH 24/26] Use cuda does not set the graphView at the end

---
 src/PTQ/PTQ.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index f9a34ad..1c279e5 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -1191,7 +1191,6 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
         AIDGE_ASSERT(!noQuant,"Cannot cast operators with the noQuant (Fake Quantization) flag set to true!")
         Log::notice("Starting to cast operators into the desired type ...");
         castQuantizedGraph(graphView,DataType::Int32,singleShift);
-        // Method to set all InputTensor as nullptr
     }
     else
     {
@@ -1206,12 +1205,18 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     //Mandatory to handle all of the newly added connections!
     graphView->updateInputsOutputs();
 
+    //reset input nodes
+    /*for(Aidge::NodePtr input_node : graphView->inputNodes())
+    {
+        std::static_pointer_cast<OperatorTensor>(input_node->getOperator())->resetInput()
+    }*/
+
     if (verbose)
         printScalingFactors(graphView);
 
 
     if (useCuda)
-        graphView->setBackend("cuda");
+        //graphView->setBackend("cuda");
 
     Log::notice(" Reseting the scheduler ...");
     SequentialScheduler scheduler(graphView);
-- 
GitLab


From 280506d04656bdad56445813d9f9f02c295b8a57 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Thu, 20 Feb 2025 13:58:59 +0000
Subject: [PATCH 25/26] Adding quantization tag to producers of BitShift and
 Compensation nodes in the graph to ease their use in the export

---
 src/PTQ/PTQ.cpp             | 1 +
 src/operator/PTQMetaOps.cpp | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 1c279e5..c2bc0e2 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -1052,6 +1052,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
             coeffProducer->getOperator()->setOutput(0, coeffTensor);
 
             coeffProducer->getOperator()->setDataType(DataType::Float64);
+            coeffProducer->attributes()->addAttr("quantization.ptq.CompensationCoeff",signedMax);
             coeffProducer->getOperator()->setBackend("cpu"); 
 
             graphView->add(coeffProducer); // needed ?
diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp
index 6b44366..fb73664 100644
--- a/src/operator/PTQMetaOps.cpp
+++ b/src/operator/PTQMetaOps.cpp
@@ -97,8 +97,10 @@ std::shared_ptr<Node> BitShiftQuantizer(std::shared_ptr<Node> oldQuantizer, Data
     std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_IClipQuant" : "", clipOp->min(), clipOp->max());
 
     std::shared_ptr<Tensor> bitshiftTensor = std::make_shared<Tensor>(Array1D<int, 1> {shift});
-    std::shared_ptr<Node> bitshiftProducer = addProducer(bitShiftNode, 1, {1}, "ScalingFactor"); 
+    std::shared_ptr<Node> bitshiftProducer = addProducer(bitShiftNode, 1, {1}, "ScalingFactor");
+     
     bitshiftProducer->getOperator()->setOutput(0, bitshiftTensor);
+    bitshiftProducer->attributes()->addAttr("quantization.ptq.ShiftAmount",shift);
     bitshiftProducer->getOperator()->setDataType(targetType); 
 
     // connect the scaling factor producer
-- 
GitLab


From 5a53de588bbadfa839ddf747384431d151c66308 Mon Sep 17 00:00:00 2001
From: Noam ZERAH <noam.zerah@cea.fr>
Date: Mon, 24 Feb 2025 13:34:57 +0000
Subject: [PATCH 26/26] Adding the script PTQ_tq.py to prevent regression in
 the PTQ pipeline

---
 scripts/PTQ/ptq_ts.py | 135 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 135 insertions(+)
 create mode 100644 scripts/PTQ/ptq_ts.py

diff --git a/scripts/PTQ/ptq_ts.py b/scripts/PTQ/ptq_ts.py
new file mode 100644
index 0000000..b836a7b
--- /dev/null
+++ b/scripts/PTQ/ptq_ts.py
@@ -0,0 +1,135 @@
+import unittest
+import re
+import numpy as np
+import gzip
+import aidge_core
+import aidge_onnx
+import os
+import copy
+import aidge_backend_cpu
+import aidge_quantization
+import sys
+import concurrent.futures
+
+aidge_core.Log.set_console_level(aidge_core.Level.Error)
+
+SIGMA = 0.05  # TolÃ©rance
+
+def print_in_color(text, color_code):
+    print(f"\033[{color_code}m{text}\033[0m")
+
+def run_model_test(model_name, expected_values, use_multithreading, asset_path, model_path):
+    NB_SAMPLES = 1000
+    NB_BITS = 4
+    CLIPPING = aidge_quantization.Clipping.MSE
+    VERBOSE = False
+
+    results = []
+
+    samples = np.load(gzip.GzipFile(asset_path + '/mnist_samples.npy.gz', "r"))
+    labels = np.load(gzip.GzipFile(asset_path + '/mnist_labels.npy.gz', "r"))
+
+    def load_model():
+        model = aidge_onnx.load_onnx(model_path + '/' + model_name + ".onnx", verbose=False)
+        aidge_core.remove_flatten(model)
+        model.set_datatype(aidge_core.dtype.float32)
+        model.set_backend("cpu")
+        return model
+
+    aidge_model = load_model()
+    scheduler = aidge_core.SequentialScheduler(aidge_model)
+
+    def propagate(model, scheduler, sample):
+        sample = np.reshape(sample, (1, 1, 28, 28))
+        input_tensor = aidge_core.Tensor(sample)
+        scheduler.forward(True, [input_tensor])
+        output_node = model.get_output_nodes().pop()
+        output_tensor = output_node.get_operator().get_output(0)
+        return np.array(output_tensor)
+
+    def compute_accuracy(model, samples, labels):
+        acc = sum(labels[i] == np.argmax(propagate(model, scheduler, x)) for i, x in enumerate(samples))
+        return acc / len(samples)
+
+    base_accuracy = compute_accuracy(aidge_model, samples[:NB_SAMPLES], labels)
+    if abs(base_accuracy * 100 - expected_values[0]) >= SIGMA:
+        results.append(f"âŒ [ERROR] Baseline accuracy mismatch for {model_name}: Expected {expected_values[0]}, got {base_accuracy * 100:.2f}")
+    else:
+        results.append(f"âœ… Baseline accuracy for {model_name}: Expected {expected_values[0]}, got {base_accuracy * 100:.2f}")
+
+    quant_model = load_model()
+    tensors = [aidge_core.Tensor(np.reshape(sample, (1, 1, 28, 28))) for sample in samples[:NB_SAMPLES]]
+    aidge_quantization.quantize_network(quant_model, NB_BITS, tensors, CLIPPING, aidge_core.dtype.float64, False, True, False, VERBOSE)
+    scheduler = aidge_core.SequentialScheduler(quant_model)
+
+    scaling = 2**(NB_BITS - 1) - 1
+    samples = samples * scaling
+
+    quant_accuracy = compute_accuracy(quant_model, samples[:NB_SAMPLES], labels)
+    if abs(quant_accuracy * 100 - expected_values[1]) >= SIGMA:
+        results.append(f"âŒ [ERROR] Quantized accuracy mismatch for {model_name}: Expected {expected_values[1]}, got {quant_accuracy * 100:.2f}")
+    else:
+        results.append(f"âœ… Quantized accuracy for {model_name}: Expected {expected_values[1]}, got {quant_accuracy * 100:.2f}")
+
+    # Quantification Single Shift
+    quant_model_ss = load_model()
+    aidge_quantization.quantize_network(quant_model_ss, NB_BITS, tensors, CLIPPING, aidge_core.dtype.float64, False, True, True, VERBOSE)
+    scheduler = aidge_core.SequentialScheduler(quant_model_ss)
+    quant_accuracy_ss = compute_accuracy(quant_model_ss, samples[:NB_SAMPLES], labels)
+
+    if abs(quant_accuracy_ss * 100 - expected_values[2]) >= SIGMA:
+        results.append(f"âŒ [ERROR] Quantized Single Shift Approximation accuracy mismatch for {model_name}: Expected {expected_values[2]}, got {quant_accuracy_ss * 100:.2f}")
+    else:
+        results.append(f"âœ… Quantized Single Shift Approximation accuracy for {model_name}: Expected {expected_values[2]}, got {quant_accuracy_ss * 100:.2f}")
+
+    return model_name, results
+
+def run_quantization_test(use_multithreading,model_path,asset_path):
+    EXPECTED_RESULTS = {
+        "MiniResNet": (95.4, 94.5, 94.7),
+        "ConvNet": (97.9, 97.7, 97.4),
+        "BranchNetV4": (93.8, 93.2, 93.7),
+        "TestNet": (95.5, 94.2, 94.2),
+        "MLP": (94.7, 94.2, 93.3)
+    }
+
+    all_results = []
+
+    if use_multithreading:
+        with concurrent.futures.ProcessPoolExecutor() as executor:
+            futures = {executor.submit(run_model_test, model, values, use_multithreading,asset_path,model_path): model for model, values in EXPECTED_RESULTS.items()}
+
+            for future in concurrent.futures.as_completed(futures):
+                model_name = futures[future]
+                try:
+                    model_name, results = future.result()
+                    all_results.append((model_name, results))
+                except Exception as exc:
+                    all_results.append((model_name, [f"âŒ [ERROR] {model_name} test failed with exception: {exc}"]))
+    else:
+        for model, values in EXPECTED_RESULTS.items():
+            try:
+                model_name, results = run_model_test(model, values, use_multithreading,asset_path,model_path)
+                all_results.append((model_name, results))
+            except Exception as exc:
+                all_results.append((model, [f"âŒ [ERROR] {model} test failed with exception: {exc}"]))
+
+    os.system("clear")
+    for model_name, results in all_results:
+        print(f"Results for {model_name}:")
+        for result in results:
+            if "âŒ [ERROR]" in result:
+                print_in_color(result, 31)
+            else:
+                print_in_color(result, 32)
+        print()
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Run quantization tests.")
+    parser.add_argument("-j", action="store_true", help="Enable multithreading")
+    parser.add_argument("--models_path", type=str, default="/data1/is156025/nz280189/sbx/Models", help="Path to models directory (default: /data)")
+    parser.add_argument("--asset_path", type=str, default="/data1/is156025/nz280189/sbx/assets", help="Path to assets directory (default: /data)")
+    args = parser.parse_args()
+
+    run_quantization_test(use_multithreading=args.j,model_path = args.models_path, asset_path = args.asset_path)
-- 
GitLab