From 616bc9b95666ba1d4303c75813436c2eaf8575c4 Mon Sep 17 00:00:00 2001
From: thibault allenet <thibault.allenet@cea.fr>
Date: Mon, 9 Dec 2024 16:40:56 +0000
Subject: [PATCH 01/60] Fix Optimized Signs

---
 src/PTQ/PTQ.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 76fe8f2..4f88aed 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -699,8 +699,12 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap
         {
             // Thoses nodes always have a single parent 
             std::shared_ptr<Node> parent = node->getParent(0); 
-            signMap[node->name()].first = signMap[parent->name()].second;
-            signMap[node->name()].second = signMap[node->name()].first;
+            if (parent)
+            {
+                signMap[node->name()].first = signMap[parent->name()].second;
+                signMap[node->name()].second = signMap[node->name()].first;
+            }
+            
         }
     }
 
-- 
GitLab


From e780f3a40c4bb35663282499885e36f0fb65c1e2 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Tue, 17 Dec 2024 15:40:19 +0000
Subject: [PATCH 02/60] fix the PTQ for float64 support and multi-outputs
 handling

---
 include/aidge/quantization/PTQ/Clipping.hpp   |   8 +-
 include/aidge/quantization/PTQ/PTQ.hpp        |   8 +-
 include/aidge/quantization/PTQ/PTQMetaOps.hpp |  10 +-
 src/PTQ/Clipping.cpp                          |  69 +++---
 src/PTQ/PTQ.cpp                               | 202 +++++++++++-------
 src/PTQ/PTQMetaOps.cpp                        |  37 ++--
 6 files changed, 198 insertions(+), 136 deletions(-)

diff --git a/include/aidge/quantization/PTQ/Clipping.hpp b/include/aidge/quantization/PTQ/Clipping.hpp
index 08a0b0a..d0622f4 100644
--- a/include/aidge/quantization/PTQ/Clipping.hpp
+++ b/include/aidge/quantization/PTQ/Clipping.hpp
@@ -36,7 +36,7 @@ namespace Aidge
      * @param inputDataSet The input dataset, consisting of a vector of input samples.
      * @return A map associating each node name to it's corresponding activation histogram.
      */
-    std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, float> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda);
+    std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda);
 
     /**
      * @brief Given an input activation histogram, compute the optimal clipping value in the sense of the Lp norm.
@@ -45,7 +45,7 @@ namespace Aidge
      * @param exponent: The exponent of the Lp norm (e.g. 2 for the MSE).
      * @return The optimal clipping value.
      */
-    float computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, float exponent);
+    double computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, double exponent);
 
     /**
      * @brief Given an input activation histogram, compute the optimal clipping value in the sense of the KL divergence.
@@ -53,7 +53,7 @@ namespace Aidge
      * @param nbBits: The quantization number of bits.
      * @return The optimal clipping value.
      */
-    float computeKLClipping(std::vector<int> histogram, std::uint8_t nbBits);
+    double computeKLClipping(std::vector<int> histogram, std::uint8_t nbBits);
 
     /**
      * @brief Return a corrected map of the provided activation ranges. 
@@ -67,7 +67,7 @@ namespace Aidge
      * @param verbose Whether to print the clipping values or not.
      * @return The corrected map associating each provided node to its clipped range.
      */
-    std::map<std::string, float> adjustRanges(Clipping clippingMode, std::map<std::string, float> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose);
+    std::map<std::string, double> adjustRanges(Clipping clippingMode, std::map<std::string, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose);
 
 }
 
diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp
index 52d83d6..d2b8b7f 100644
--- a/include/aidge/quantization/PTQ/PTQ.hpp
+++ b/include/aidge/quantization/PTQ/PTQ.hpp
@@ -31,7 +31,7 @@ namespace Aidge {
     /**
      * @brief Set of the types of the nodes which does not affect the PTQ process
      */
-    static const std::set<std::string> seamlessNodeTypes({"LeakyReLU", "Pad2D", "MaxPooling2D", "AvgPooling2D", "PaddedMaxPooling2D", "PaddedAvgPooling2D", "GlobalAveragePooling", "Reshape", "Transpose", "Gather"});
+    static const std::set<std::string> seamlessNodeTypes({"LeakyReLU", "Pad2D", "MaxPooling2D", "AvgPooling2D", "PaddedMaxPooling2D", "PaddedAvgPooling2D", "GlobalAveragePooling", "Reshape", "Transpose", "Gather", "Resize"});
 
     /**
      * @brief Set of the types of the nodes that merge multiple branches into one
@@ -98,7 +98,7 @@ namespace Aidge {
      * @param scalingNodesOnly Whether to restrain the retreival of the ranges to scaling nodes only or not.
      * @return A map associating each affine node name to it's corresponding output range.
      */
-    std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda);
+    std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda);
 
     /**
      * @brief Normalize the activations of each affine node so that they fit in the [-1:1] range.
@@ -106,7 +106,7 @@ namespace Aidge {
      * @param graphView The GraphView containing the affine nodes.
      * @param valueRanges The node output value ranges computed over the calibration dataset. 
      */
-    void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, float> valueRanges);
+    void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges);
 
     /**
      * @brief For each node, compute the sign of its input and output values. 
@@ -145,7 +145,7 @@ namespace Aidge {
      * @param graphView The GraphView containing the affine nodes.
      * @return A map associating each affine node name to it's corresponding weight range.
      */
-    std::map<std::string, float> getWeightRanges(std::shared_ptr<GraphView> graphView);
+    std::map<std::string, double> getWeightRanges(std::shared_ptr<GraphView> graphView);
 
     /**
      * @brief Clear the affine nodes biases. Provided form debugging purposes.
diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/quantization/PTQ/PTQMetaOps.hpp
index c4f2ac7..29bb7f2 100644
--- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp
+++ b/include/aidge/quantization/PTQ/PTQMetaOps.hpp
@@ -33,7 +33,7 @@
 /// @param clip_max The maximum value for the clip operation.
 /// @param name The name of the meta-operator node created.
 /// @return A shared pointer to an instance of the meta-operator node.
-std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float clip_max,const std::string& name);
+std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,double clip_max,const std::string& name);
 
 /// @brief The purpose of Scaling is to encapsulate the Mul operator and tag it as a PTQ node rather than a regular Mul operator.
 /// Therefore, this meta-operator consists solely of a [Mul] operation.
@@ -41,7 +41,7 @@ std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float
 /// @param scalingFactor The scaling factor to apply to the input (a scalar to multiply the input with).
 /// @param name The name of the meta-operator node created.
 /// @return A shared pointer to an instance of the scaling node.
-std::shared_ptr<Aidge::Node> Scaling(float scalingFactor,const std::string& name = "");
+std::shared_ptr<Aidge::Node> Scaling(double scalingFactor,const std::string& name = "");
 
 /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter.
 /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation.
@@ -50,7 +50,7 @@ std::shared_ptr<Aidge::Node> Scaling(float scalingFactor,const std::string& name
 /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor will be updated.
 /// @param newScalingFactor The new scaling factor to apply to the meta-operator node.
 /// @return True if the scaling factor was successfully updated, false if the operation failed (e.g., if MetaOpNode is null or incompatible).
-bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, float newScalingFactor);
+bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor);
 
 /// @brief Retrieves the current scaling factor of a PTQ meta-operator node.
 /// This function returns the scaling factor associated with the specified PTQ meta-operator node, 
@@ -58,7 +58,7 @@ bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, float newScali
 ///
 /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor is being queried.
 /// @return The scaling factor currently applied to the meta-operator node, or -1 if the operation fails (e.g., if MetaOpNode is null or incompatible).
-float getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode);
+double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode);
 
 /// @brief Sets the clip range for an existing Quantizer node by specifying minimum and maximum clipping values.
 /// This function modifies the clip range of a Quantizer node, allowing adjustment of the range within which values are clipped
@@ -69,6 +69,6 @@ float getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode);
 /// @param min The minimum value for the clip range. Values below this will be clipped to this minimum.
 /// @param max The maximum value for the clip range. Values above this will be clipped to this maximum.
 /// @return True if the clip range was successfully set, false if the operation failed (e.g., if QuantizerNode is null).
-bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, float min, float max);
+bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, double min, double max);
 
 #endif /* AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ */
diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp
index e001408..f8765f3 100644
--- a/src/PTQ/Clipping.cpp
+++ b/src/PTQ/Clipping.cpp
@@ -19,7 +19,7 @@
 namespace Aidge
 {
     
-std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, float> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda)
+std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda)
 {
     if (useCuda)
         graphView->setBackend("cuda");
@@ -72,7 +72,7 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string,
             bool isInsideRanges = (valueRanges.find(node->name()) != valueRanges.end());
             if (isInsideRanges)
             {
-                float valueRange = valueRanges[node->name()];
+                double valueRange = valueRanges[node->name()];
 
                 std::shared_ptr<Operator> nodeOperator = node->getOperator();
                 std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0));
@@ -80,15 +80,17 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string,
                 if (useCuda)
                     valueTensor->setBackend("cpu");
 
-                float * castedTensor = static_cast<float *> (valueTensor->getImpl()->rawPtr());
+                double * castedTensor = static_cast<double *> (valueTensor->getImpl()->rawPtr());
 
                 std::vector<int> nodeHistogram = histograms[node->name()];
                 for(std::size_t i = 0; i < valueTensor->size(); i++)
                 {
-                    int bin = std::round(std::abs(castedTensor[i] / valueRange * nbBins));
+                    std::size_t bin = std::round(std::abs(castedTensor[i] / valueRange * nbBins));
+                    bin = std::min(bin, nodeHistogram.size() - 1);
                     nodeHistogram[bin]++;
                 }
-                histograms[node->name()] = nodeHistogram;
+
+                histograms[node->name()] = nodeHistogram;   
 
                 if (useCuda)
                     valueTensor->setBackend("cuda");
@@ -105,52 +107,52 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string,
     return histograms;
 }
 
-float computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, float exponent)
+double computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, double exponent)
 {
     int nbBins = histogram.size();
     int nbIter = 100;
     int signedMax = (1 << (nbBits - 1)) - 1;
 
-    std::vector<float> clippingErrors;
+    std::vector<double> clippingErrors;
     for (int it = 1; it < nbIter; it++)
     {
         // Compute the rounding cost of this particular clipping ...
-        float accumulatedError = 0.0;
-        float clipping = it / static_cast<float> (nbIter);
+        double accumulatedError = 0.0;
+        double clipping = it / static_cast<double> (nbIter);
         for (int bin = 0; bin < nbBins; bin++)
         {
-            float value = (bin + 0.5) / nbBins;
-            float scaling = signedMax / clipping;
-            float rounded = std::round(value * scaling) / scaling;
-            float clipped = std::min(clipping, rounded);
+            double value = (bin + 0.5) / nbBins;
+            double scaling = signedMax / clipping;
+            double rounded = std::round(value * scaling) / scaling;
+            double clipped = std::min(clipping, rounded);
 
-            float approxError = std::abs(clipped - value);
+            double approxError = std::abs(clipped - value);
             accumulatedError += std::pow(approxError, exponent) * histogram[bin];
         }
         clippingErrors.push_back(accumulatedError);
     }
 
-    std::vector<float>::iterator it = std::min_element(clippingErrors.begin(), clippingErrors.end());
-    float bestClipping = static_cast<float> (std::distance(clippingErrors.begin(), it)) / static_cast<float> (nbIter);
+    std::vector<double>::iterator it = std::min_element(clippingErrors.begin(), clippingErrors.end());
+    double bestClipping = static_cast<double> (std::distance(clippingErrors.begin(), it)) / static_cast<double> (nbIter);
 
     return bestClipping;
 }
 
-float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits)
+double computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits)
 {
     // KL Clipping
 
     int nbIter = 100;
     int signedMax = (1 << (nbBits - 1)) - 1;
 
-    float refNorm = 0;
+    double refNorm = 0;
     for (int n : refHistogram)
-        refNorm += static_cast<float> (n);
+        refNorm += static_cast<double> (n);
 
-    std::vector<float> clippingErrors;
+    std::vector<double> clippingErrors;
     for (int it = 1; it < nbIter; it++)
     {
-        float clipping = it / static_cast<float> (nbIter);
+        double clipping = it / static_cast<double> (nbIter);
 
         // Create the histogram for this particular clipping ...
 
@@ -160,7 +162,7 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits)
         
         for (std::size_t refBin = 0; refBin < refHistogram.size(); refBin++)
         {
-            float value = (static_cast<float> (refBin) + 0.5f) / static_cast<float> (refHistogram.size());
+            double value = (static_cast<double> (refBin) + 0.5f) / static_cast<double> (refHistogram.size());
             int quantBin = std::floor(value / clipping * signedMax);
             quantBin = std::min(quantBin, signedMax-1);
             quantHistogram[quantBin] += refHistogram[refBin];
@@ -168,10 +170,10 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits)
         
         // Compute the mass of the histogram
 
-        float quantNorm = 0;
+        double quantNorm = 0;
         for (std::size_t refBin = 0; refBin < refHistogram.size(); refBin++)
         {
-            float value = (static_cast<float> (refBin) + 0.5f) / static_cast<float> (refHistogram.size());
+            double value = (static_cast<double> (refBin) + 0.5f) / static_cast<double> (refHistogram.size());
             int quantBin = std::floor(value / clipping * signedMax);
             if (quantBin < static_cast<int> (quantHistogram.size()))
                 quantNorm += quantHistogram[quantBin];
@@ -179,15 +181,15 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits)
         
         // Compute the KL divergence
         
-        float accumulatedError = 0.0;
+        double accumulatedError = 0.0;
         for (std::size_t refBin = 0; refBin < refHistogram.size(); refBin++)
         {
-            float value = (static_cast<float> (refBin) + 0.5f) / static_cast<float> (refHistogram.size());
+            double value = (static_cast<double> (refBin) + 0.5f) / static_cast<double> (refHistogram.size());
             int quantBin = std::floor(value / clipping * signedMax);
 
-            float p = static_cast<float> (refHistogram[refBin]) / refNorm;
-            float q = (quantBin < static_cast<int> (quantHistogram.size())) ?
-                static_cast<float> (quantHistogram[quantBin]) / quantNorm : 0; 
+            double p = static_cast<double> (refHistogram[refBin]) / refNorm;
+            double q = (quantBin < static_cast<int> (quantHistogram.size())) ?
+                static_cast<double> (quantHistogram[quantBin]) / quantNorm : 0; 
 
             if (p != 0 && q != 0)
                 accumulatedError += q * std::log(q / p);
@@ -196,16 +198,16 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits)
         clippingErrors.push_back(accumulatedError);
     }
 
-    std::vector<float>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end());
-    float bestClipping = static_cast<float> (std::distance(clippingErrors.begin(), it)) / static_cast<float> (nbIter);
+    std::vector<double>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end());
+    double bestClipping = static_cast<double> (std::distance(clippingErrors.begin(), it)) / static_cast<double> (nbIter);
 
     return bestClipping;
 }
 
 
-std::map<std::string, float> adjustRanges(Clipping clippingMode, std::map<std::string, float> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose)
+std::map<std::string, double> adjustRanges(Clipping clippingMode, std::map<std::string, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose)
 {
-    float clipping = 1.0f;
+    double clipping = 1.0f;
 
     int nbBins = (1 << (nbBits + 4)) ; // XXX Enhance this !!!
 
@@ -213,6 +215,7 @@ std::map<std::string, float> adjustRanges(Clipping clippingMode, std::map<std::s
     {
         if (verbose)
             Log::info(" === CLIPPING VALUES === ");
+
         std::map<std::string, std::vector<int>> histograms = computeHistograms(valueRanges, nbBins, graphView, inputDataSet, useCuda);
 
         for (std::shared_ptr<Node> node : graphView->getNodes())
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 76fe8f2..bfc5e3f 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -66,20 +66,20 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView)
     return true;
 }
 
-static void fillTensor(std::shared_ptr<Tensor> tensor, float value)
+static void fillTensor(std::shared_ptr<Tensor> tensor, double value)
 {
     // Get the tensor data pointer
-    float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr());
+    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
 
     // Fill the tensor
     for(std::size_t i = 0; i < tensor->size(); i++)
         castedTensor[i] = value;
 }
 
-static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
+static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
 {
     // Get the tensor data pointer
-    float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr());
+    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
 
     // Rescale the tensor
     for(std::size_t i = 0; i < tensor->size(); i++)
@@ -89,20 +89,20 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
 static void roundTensor(std::shared_ptr<Tensor> tensor)
 {
     // Get the tensor data pointer
-    float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr());
+    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
 
     // Rescale the tensor
     for(std::size_t i = 0; i < tensor->size(); i++)
         castedTensor[i] = std::nearbyint(castedTensor[i]);//Round
 }
 
-static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
+static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
 {
     // Get the tensor data pointer and edit it
-    float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr());
+    double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr());
 
     // Get the tensor absolute max value
-    float maxValue = 0.0f;
+    double maxValue = 0.0f;
     for(std::size_t i = 0; i < tensor->size(); ++i) {
         if(std::fabs(castedTensor[i]) > maxValue) {
             maxValue = std::fabs(castedTensor[i]);
@@ -187,6 +187,7 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView)
 
     bool containsBatchNorm = false;
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+
     for (std::shared_ptr<Node> node : nodeVector)
         if (node->type() == "BatchNorm")
         {
@@ -200,6 +201,12 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView)
     popSoftMax(graphView);
 }
 
+// TODO : enhance this by modifying OperatorImpl in "core" ...
+static DataType getDataType(std::shared_ptr<Node> node)
+{
+    auto op = std::static_pointer_cast<OperatorTensor>(node->getOperator());
+    return op->getOutput(0)->dataType();
+}
 
 // XXX HERE : Branches containing only Seamless nodes should be considered as residual too !!!
 void insertResidualNodes(std::shared_ptr<GraphView> graphView)
@@ -217,6 +224,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView)
             {
                 std::shared_ptr<Node> parentNode = node->getParent(i);
                 bool parentIsForking = (parentNode->getChildren().size() > 1);
+
                 if (parentIsForking)
                 {
                     // temporary verbose ...
@@ -224,8 +232,9 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView)
                     Log::info(" ### inserting multiplicative node ...");
 
                     std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView);
-                    std::shared_ptr<Node> residualNode = Scaling(1.0,residualNodeName);
-                    residualNode->getOperator()->setDataType(DataType::Float32);
+                    std::shared_ptr<Node> residualNode = Scaling(1.0, residualNodeName);
+
+                    residualNode->getOperator()->setDataType(DataType::Float64); //getDataType(parentNode)
                     residualNode->getOperator()->setBackend("cpu");
 
                     graphView->insertParent(node, residualNode, i, 0, 0);
@@ -255,7 +264,8 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
         {
             std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView);
             std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName);
-            scalingNode->getOperator()->setDataType(DataType::Float32);
+
+            scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
             scalingNode->getOperator()->setBackend("cpu");
 
             if (parentNode->getChildren().size() > 0)
@@ -283,7 +293,8 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
             else
             {
                 // Log::info(" last node reached ! ");
-                graphView->addChild(scalingNode);
+                parentNode->addChild(scalingNode, 0, 0);
+                graphView->add(scalingNode);
             }
         }
     }
@@ -322,7 +333,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
 
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
-    std::map<std::string, float> accumulatedRatios;
+    std::map<std::string, double> accumulatedRatios;
     for (std::shared_ptr<Node> node : nodeVector)
     {
         accumulatedRatios.insert(std::make_pair(node->name(), 1.0));
@@ -349,8 +360,8 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
         {
             // Rescale the weight tensor
             std::shared_ptr<Tensor> weightTensor = getWeightTensor(node);
-            float scaling = getTensorAbsoluteMax(weightTensor);
-            float ratio = 1.0 / scaling;
+            double scaling = getTensorAbsoluteMax(weightTensor);
+            double ratio = 1.0 / scaling;
             rescaleTensor(weightTensor, ratio);
 
             // Accumulate the ratio
@@ -378,10 +389,10 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
             std::vector<std::shared_ptr<Node>> mergingNodes = node->getParents();
 
             // Compute the max ratio ...
-            float maxRatio = 0;
+            double maxRatio = 0;
             for (std::shared_ptr<Node> mergingNode : mergingNodes)
             {
-                float merginNodeRatio = accumulatedRatios[mergingNode->name()];
+                double merginNodeRatio = accumulatedRatios[mergingNode->name()];
                 if (merginNodeRatio > maxRatio)
                     maxRatio = merginNodeRatio;
             }
@@ -391,12 +402,12 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
             // Rescale the previous scaling Nodes
             for (std::shared_ptr<Node> mergingNode : mergingNodes)
             {
-                float mergingNodeRatio = accumulatedRatios[mergingNode->name()];
-                float rescaling = mergingNodeRatio / maxRatio;
+                double mergingNodeRatio = accumulatedRatios[mergingNode->name()];
+                double rescaling = mergingNodeRatio / maxRatio;
 
                 std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode);
 
-                float scaling_factor = getScalingFactor(scalingNode);
+                double scaling_factor = getScalingFactor(scalingNode);
                 updateScalingFactor(scalingNode,scaling_factor / rescaling);
                 accumulatedRatios[mergingNode->name()] /= rescaling; // optional ...
             }
@@ -405,9 +416,9 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
 }
 
 // XXX TODO : take care of the CUDA backend for this too !!!
-std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> inputTensor, bool scalingNodesOnly)
+std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> inputTensor, bool scalingNodesOnly)
 {
-    std::map<std::string, float> valueRanges;
+    std::map<std::string, double> valueRanges;
 
     SequentialScheduler scheduler(graphView);
     scheduler.resetScheduling();
@@ -425,7 +436,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView,
         {
             std::shared_ptr<Operator> nodeOperator = node->getOperator();
             std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0));
-            float range = getTensorAbsoluteMax(valueTensor);
+            double range = getTensorAbsoluteMax(valueTensor);
 
             // Associate the value to the scaling node ...
             valueRanges.insert(std::make_pair(node->name(), range));
@@ -435,9 +446,9 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView,
     return valueRanges;
 }
 
-std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda)
+std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda)
 {
-    std::map<std::string, float> valueRanges;
+    std::map<std::string, double> valueRanges;
     std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
     
     // std::shared_ptr<Node> inputNode = getFirstNode(graphView);
@@ -467,7 +478,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView,
 
         // Gather the sample ranges ...
 
-        std::map<std::string, float> sampleRanges;
+        std::map<std::string, double> sampleRanges;
         for (std::shared_ptr<Node> node : nodeSet)
         {
             if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer")))
@@ -478,7 +489,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView,
                 if (useCuda)
                     valueTensor->setBackend("cpu");
 
-                float range = getTensorAbsoluteMax(valueTensor);
+                double range = getTensorAbsoluteMax(valueTensor);
 
                 // Associate the value to the scaling node ...
                 sampleRanges.insert(std::make_pair(node->name(), range));
@@ -510,7 +521,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView,
     return valueRanges;
 }
 
-void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, float> valueRanges)
+void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges)
 {
     std::shared_ptr<Node> firstNode = getFirstNode(graphView);
 
@@ -518,7 +529,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
 
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
-    std::map<std::string, float> scalingFactors;
+    std::map<std::string, double> scalingFactors;
 
     for (std::shared_ptr<Node> node : nodeVector)
         scalingFactors.insert(std::make_pair(node->name(), 1.0));
@@ -549,12 +560,12 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
         {
             // retrieve the previous scaling factor ...
             std::shared_ptr<Node> prevNode = node->getParent(0);
-            float prevScalingFactor = scalingFactors[prevNode->name()];
+            double prevScalingFactor = scalingFactors[prevNode->name()];
 
             // ValueRanges must contains all the scaling nodes !!!
-            float scalingFactor = valueRanges[node->name()]; 
+            double scalingFactor = valueRanges[node->name()]; 
 
-            float scaling_factor = getScalingFactor(node);
+            double scaling_factor = getScalingFactor(node);
             updateScalingFactor(node, (scaling_factor) / (scalingFactor / prevScalingFactor));
 
             scalingFactors[node->name()] = scalingFactor;
@@ -579,10 +590,10 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
             std::vector<std::shared_ptr<Node>> mergingNodes = node->getParents();
 
             // Compute the max scaling ...
-            float maxScaling = 0;
+            double maxScaling = 0;
             for (std::size_t i = 0; i < mergingNodes.size(); i++)
             {
-                float merginNodeScaling = scalingFactors[mergingNodes[i]->name()];
+                double merginNodeScaling = scalingFactors[mergingNodes[i]->name()];
                 if (merginNodeScaling > maxScaling) {
                     maxScaling = merginNodeScaling;
                 }
@@ -592,12 +603,12 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
 
             for (std::shared_ptr<Node> mergingNode : mergingNodes)
             {
-                float mergingNodeScaling = scalingFactors[mergingNode->name()];
-                float rescaling = mergingNodeScaling / maxScaling;
+                double mergingNodeScaling = scalingFactors[mergingNode->name()];
+                double rescaling = mergingNodeScaling / maxScaling;
 
                 std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode);
                 //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name());
-                float scaling_factor = getScalingFactor(scalingNode);
+                double scaling_factor = getScalingFactor(scalingNode);
                 updateScalingFactor(scalingNode, scaling_factor * rescaling);                
             }
         }
@@ -735,8 +746,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
         AIDGE_THROW_OR_ABORT(std::runtime_error,"Signs optimization can not be applied if network is not fully quantized ...");
     }
 
-    float signedMax = (1 << (nbBits - 1)) - 1;
-    float unsignedMax = (1 << nbBits) - 1;
+    double signedMax = (1 << (nbBits - 1)) - 1;
+    double unsignedMax = (1 << nbBits) - 1;
 
     std::map<std::string, std::pair<bool, bool>> signMap;
 
@@ -771,7 +782,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
             if (nodeHasBias(node))  
             {
                 bool inputIsUnsigned = signMap[node->name()].first;
-                float rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax;
+                double rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax;
                 
 
                 std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
@@ -783,7 +794,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
 
             // Compensate the rescaling using the next Scaling node
 
-            float rescaling = 1.0 / signedMax;
+            double rescaling = 1.0 / signedMax;
 
             bool inputIsUnsigned  = signMap[node->name()].first; 
             bool outputIsUnsigned = signMap[node->name()].second;
@@ -792,13 +803,13 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
             rescaling *= outputIsUnsigned ? unsignedMax : signedMax;
             
             std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ...
-            float scaling_factor = getScalingFactor(scalingNode);
+            double scaling_factor = getScalingFactor(scalingNode);
             updateScalingFactor(scalingNode, scaling_factor * rescaling);
         }
         
         if (isMerging(node))
         {
-            float rescaling = 1.0;
+            double rescaling = 1.0;
 
             bool inputIsUnsigned  = signMap[node->name()].first;
             bool outputIsUnsigned = signMap[node->name()].second;
@@ -808,9 +819,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
 
             std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ...
         
-
-            float scaling_factor = getScalingFactor(scalingNode);
-            updateScalingFactor(scalingNode,scaling_factor * rescaling);
+            double scaling_factor = getScalingFactor(scalingNode); // XXX bad naming
+            updateScalingFactor(scalingNode, scaling_factor * rescaling);
         }
         
         // Handle the Scaling Nodes ...
@@ -819,18 +829,19 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
         {
             if (!noQuant) 
             {  
-                //[!!] replacement of Scaling Node by Quantizer
-                float currentSF = getScalingFactor(node);
+                // Replacement of Scaling Node by Quantizer
+                double currentSF = getScalingFactor(node); // XXX bad naming !
+
+                std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, -(signedMax + 1), signedMax, node->name());
 
-                std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, - (signedMax + 1), signedMax, node->name());
-                quantizerNode->getOperator()->setDataType(DataType::Float32);
+                quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
                 quantizerNode->getOperator()->setBackend("cpu");
-                graphView->replace({node}, {quantizerNode});
 
+                graphView->replace({node}, {quantizerNode});
 
                 if (optimizeSigns)
                 {
-                    float rescaling = 1.0;
+                    double rescaling = 1.0;
 
                     bool inputIsUnsigned  = signMap[node->name()].first;
                     bool outputIsUnsigned = signMap[node->name()].second;
@@ -838,7 +849,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
                     rescaling /= inputIsUnsigned  ? unsignedMax : signedMax;
                     rescaling *= outputIsUnsigned ? unsignedMax : signedMax;
 
-                    float scalingFactor = getScalingFactor(quantizerNode);
+                    double scalingFactor = getScalingFactor(quantizerNode);
                     updateScalingFactor(quantizerNode,scalingFactor * rescaling);
 
                     if(outputIsUnsigned)
@@ -854,7 +865,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
 static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits)
 {
     // XXX Use the signMap to increase the resolution when possible ...
-    float signedMax = (1 << (nbBits - 1)) - 1;    
+    double signedMax = (1 << (nbBits - 1)) - 1;    
 
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
@@ -874,7 +885,8 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
 
                 std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView);
                 std::shared_ptr<Node> mulNode = Mul(mulNodeName);
-                mulNode->getOperator()->setDataType(DataType::Float32);
+
+                mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
                 mulNode->getOperator()->setBackend("cpu");
 
                 graphView->insertParent(node, mulNode, 0, 0, 0);
@@ -882,10 +894,11 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
                 // create and insert the producer node
 
                 std::shared_ptr<Tensor> inputTensor = std::static_pointer_cast<Tensor> (mulNode->getOperator()->getRawInput(0));
-
                 std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>();
-                coeffTensor->setDataType(DataType::Float32);
-                coeffTensor->setBackend("cpu");            
+
+                coeffTensor->setDataType(DataType::Float64); // getDataType(parentNode)
+                coeffTensor->setBackend("cpu"); 
+
                 coeffTensor->resize(inputTensor->dims());
                 fillTensor(coeffTensor, 1); 
 
@@ -896,8 +909,8 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
                 // rescale the coeffs and edit scaling factor
 
                 fillTensor(coeffTensor, signedMax);
-                float sf  = getScalingFactor(node);
-                updateScalingFactor(node,sf/signedMax);
+                double sf  = getScalingFactor(node); // XXX bad naming !
+                updateScalingFactor(node, sf/signedMax);
 
                 // TODO : double check this !!!
                 //std::cout << getTensorAbsoluteMax(coeffTensor) << std::endl;
@@ -906,9 +919,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
     }
 }
 
-void 
-
-performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant)
+void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant)
 {
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
@@ -919,13 +930,13 @@ performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQua
         {
             std::shared_ptr<Node> scalingNode = (*node->getChildren().begin());
 
-            float base = getScalingFactor(scalingNode);
+            double base = getScalingFactor(scalingNode);
 
-            float approx = std::pow(2, std::ceil(std::log2(base)));
+            double approx = std::pow(2, std::ceil(std::log2(base)));
 
             updateScalingFactor(scalingNode,approx);
 
-            float ratio = base / approx;
+            double ratio = base / approx;
 
             std::shared_ptr<Tensor> weightTensor = getWeightTensor(node);
             rescaleTensor(weightTensor, ratio);
@@ -949,17 +960,46 @@ static void printScalingFactors(std::shared_ptr<GraphView> graphView)
     for (auto node : retrieveNodeVector(graphView))
         if (node->type() == "Scaling")
         {
-            float factor = getScalingFactor(node);
+            double factor = getScalingFactor(node);
             Log::info(" {:.6f} ({})", factor, node->name());
         }
 }
 
+/*
+std::string deduceBackend(std::shared_ptr<GraphView> graphView)
+{
+    std::string rootNodeBackend = graphView->getRootNode()->backend();
+    for (auto node : graphView->getNodes())
+        if (node->backend() != rootNodeBackend)
+            log::warn(" Multiple backend detected, setting all nodes to {}")
+}
+*/
+
+static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType dataType)
+{
+    graphView->setDataType(dataType);
+
+    for (auto inputNode : graphView->inputNodes()) {
+        auto op = std::static_pointer_cast<OperatorTensor>(inputNode->getOperator());
+        auto inputTensor = op->getInput(0);
+        if (inputTensor)
+            inputTensor->setDataType(dataType);
+    }
+
+    for (auto tensor : inputDataSet)
+        tensor->setDataType(dataType);
+}
+
+
 void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose)
 {
     Log::info(" === QUANT PTQ 0.2.21 === ");
 
     graphView->setBackend("cpu");
 
+    DataType initialDataType = (inputDataSet[0])->dataType();
+    setupDataType(graphView, inputDataSet, DataType::Float64);
+
     if (!checkArchitecture(graphView))
         return;
 
@@ -975,8 +1015,22 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     normalizeParameters(graphView);
 
     Log::info(" Computing the value ranges ...");
-    std::map<std::string, float> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda);
+    std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda);
+
+    // XXX 
+/*
+    SequentialScheduler scheduler(graphView);
+    scheduler.resetScheduling();
+    scheduler.generateScheduling();
+
+    auto scheduling = scheduler.getStaticScheduling();
+    for (auto node : scheduling)
+        if (node->type() == "Scaling")
+            std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl;
 
+    std::cout << " RETURN " << std::endl;
+    return;
+*/
     Log::info(" Optimizing the clipping values ...");
     valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose);
 
@@ -992,32 +1046,34 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
         insertCompensationNodes(graphView, nbBits);
 
         Log::info(" Performing the Single-Shift approximation ...");
-        performSingleShiftApproximation(graphView,noQuant);
+        performSingleShiftApproximation(graphView, noQuant);
     }
     
     if (verbose)
         printScalingFactors(graphView);
 
-    Log::info(" Resetting the scheduler ...");
+    Log::info(" Reseting the scheduler ...");
     SequentialScheduler scheduler(graphView);
     scheduler.resetScheduling();
 
+    setupDataType(graphView, inputDataSet, initialDataType);
     if (useCuda)
         graphView->setBackend("cuda");
 
     Log::info(" Network is quantized !");
+
 }
 
-std::map<std::string, float> getWeightRanges(std::shared_ptr<GraphView> graphView)
+std::map<std::string, double> getWeightRanges(std::shared_ptr<GraphView> graphView)
 {
-    std::map<std::string, float> weightRanges;
+    std::map<std::string, double> weightRanges;
 
     for (std::shared_ptr<Node> node : graphView->getNodes())
     {
         if (isAffine(node))
         {
             std::shared_ptr<Tensor> weightTensor = getWeightTensor(node);
-            float range = getTensorAbsoluteMax(weightTensor);
+            double range = getTensorAbsoluteMax(weightTensor);
             weightRanges.insert(std::make_pair(node->name(), range));
         }
     }
diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp
index 89590cb..d2423d0 100644
--- a/src/PTQ/PTQMetaOps.cpp
+++ b/src/PTQ/PTQMetaOps.cpp
@@ -28,9 +28,9 @@
 #include "aidge/utils/Types.h"
 #include "aidge/operator/Identity.hpp"
 #include "aidge/data/Tensor.hpp"
-std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float clip_max,const std::string& name)
+std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,double clip_max,const std::string& name)
 {
-    std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1>{scalingFactor});
+    std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1>{scalingFactor});
     std::shared_ptr<Aidge::Node> mul_node =  Aidge::Mul((!name.empty()) ? name + "_MulQuant" : "");
 
     std::shared_ptr<Aidge::Node> producer_scaling_factor = addProducer(mul_node,1,{1},"ScalingFactor"); 
@@ -48,27 +48,30 @@ std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float
     return metaopNode; 
 }
 
-std::shared_ptr<Aidge::Node> Scaling(float scalingFactor,const std::string& name)
+std::shared_ptr<Aidge::Node> Scaling(double scalingFactor,const std::string& name)
 {
-    std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1>{scalingFactor});
-    
-    std::shared_ptr<Aidge::Node> mul_node = Aidge::Mul((!name.empty()) ? name + "_Scaling" : "");
-    
-    std::shared_ptr<Aidge::Node> producer_scaling_factor = addProducer(mul_node,1,{1},"ScalingFactor"); 
-    producer_scaling_factor->getOperator()->setOutput(0, ScalingFactorTensorAttached);
-    std::shared_ptr<Aidge::GraphView> graph = Aidge::Sequential({mul_node});
-    std::shared_ptr<Aidge::GraphView> connectedGV = getConnectedGraphView(mul_node);
-    Aidge::NodePtr metaopNode = MetaOperator("Scaling",connectedGV,{},name);
+    std::shared_ptr<Aidge::Tensor> scalingFactorTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scalingFactor});
+
+    std::shared_ptr<Aidge::Node> mulNode = Aidge::Mul((!name.empty()) ? name + "_Scaling" : "");
+
+    std::shared_ptr<Aidge::Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); 
+    scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
+
+    std::shared_ptr<Aidge::GraphView> graphView  = Aidge::Sequential({mulNode});
+    std::shared_ptr<Aidge::GraphView> connectedGraphView  = getConnectedGraphView(mulNode);
+
+    Aidge::NodePtr metaopNode = MetaOperator("Scaling", connectedGraphView, {}, name);
+
     return metaopNode;
 }
 
-bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, float newScalingFactor)
+bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor)
 {
     if(MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer")
     {
         AIDGE_ASSERT("Cannot use updatePTQMetaOpsScalingFactor on Node of type {}", MetaOpNode->type());
     }
-    std::shared_ptr<Aidge::Tensor> newScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1>{newScalingFactor});
+    std::shared_ptr<Aidge::Tensor> newScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1>{newScalingFactor});
     std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(MetaOpNode->getOperator());
     std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Nodes inside PTQ Metaop Node
     for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List)
@@ -82,7 +85,7 @@ bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, float newScali
     AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type());
     return false;
 }
-float getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode)
+double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode)
 {
     if(MetaOpNode->type() != "Scaling"  && MetaOpNode->type() != "Quantizer")
     {
@@ -97,13 +100,13 @@ float getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode)
             {
                     std::shared_ptr<Aidge::Data> MulInput1Data = node->input(1).first->getOperator()->getRawOutput(0);
                     void* RawInputScalingFactor = std::static_pointer_cast<Aidge::Tensor>(MulInput1Data)->getImpl()->rawPtr();
-                    return (*(static_cast<float*>(RawInputScalingFactor)));
+                    return (*(static_cast<double*>(RawInputScalingFactor)));
             }
     }
     AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type());
     return -1;
 }
-bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode,float min, float max)
+bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode,double min, double max)
 {
     if(QuantizerNode->type() != "Quantizer")
     {
-- 
GitLab


From 57239cf31424a7c5a8f0a5f5b6db2197d75655f4 Mon Sep 17 00:00:00 2001
From: Olivier BICHLER <olivier.bichler@cea.fr>
Date: Wed, 18 Dec 2024 12:10:45 +0100
Subject: [PATCH 03/60] Switch back to float32 except for Producer

---
 src/PTQ/PTQ.cpp | 70 ++++++++++++++++++-------------------------------
 1 file changed, 25 insertions(+), 45 deletions(-)

diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index bfc5e3f..2641dde 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -52,7 +52,7 @@ bool isMerging(std::shared_ptr<Node> node)
 
 bool checkArchitecture(std::shared_ptr<GraphView> graphView)
 {
-    std::set<std::string> otherNodeTypes({"Flatten", "Softmax", "BatchNorm2D", "ReLU", "Producer"});
+    const std::set<std::string> otherNodeTypes({"Flatten", "Softmax", "BatchNorm2D", "ReLU", "Producer"});
 
     for (std::shared_ptr<Node> node : graphView->getNodes())
     {
@@ -76,10 +76,10 @@ static void fillTensor(std::shared_ptr<Tensor> tensor, double value)
         castedTensor[i] = value;
 }
 
-static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
+static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
 {
     // Get the tensor data pointer
-    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
+    float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr());
 
     // Rescale the tensor
     for(std::size_t i = 0; i < tensor->size(); i++)
@@ -89,20 +89,20 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
 static void roundTensor(std::shared_ptr<Tensor> tensor)
 {
     // Get the tensor data pointer
-    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
+    float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr());
 
     // Rescale the tensor
     for(std::size_t i = 0; i < tensor->size(); i++)
         castedTensor[i] = std::nearbyint(castedTensor[i]);//Round
 }
 
-static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
+static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
 {
     // Get the tensor data pointer and edit it
-    double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr());
+    float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr());
 
     // Get the tensor absolute max value
-    double maxValue = 0.0f;
+    float maxValue = 0.0f;
     for(std::size_t i = 0; i < tensor->size(); ++i) {
         if(std::fabs(castedTensor[i]) > maxValue) {
             maxValue = std::fabs(castedTensor[i]);
@@ -186,7 +186,7 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView)
     removeFlatten(graphView);
 
     bool containsBatchNorm = false;
-    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+    const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     for (std::shared_ptr<Node> node : nodeVector)
         if (node->type() == "BatchNorm")
@@ -213,7 +213,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView)
 {
     // TODO: double check this ...
 
-    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+    const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
@@ -234,7 +234,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView)
                     std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView);
                     std::shared_ptr<Node> residualNode = Scaling(1.0, residualNodeName);
 
-                    residualNode->getOperator()->setDataType(DataType::Float64); //getDataType(parentNode)
+                    residualNode->getOperator()->setDataType(DataType::Float32); //getDataType(parentNode)
                     residualNode->getOperator()->setBackend("cpu");
 
                     graphView->insertParent(node, residualNode, i, 0, 0);
@@ -256,16 +256,16 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
 {
     insertResidualNodes(graphView);
 
-    std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
+    const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
 
     for (std::shared_ptr<Node> parentNode : nodeSet)
     {
         if (isAffine(parentNode) || isMerging(parentNode))
         {
-            std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView);
+            const std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView);
             std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName);
 
-            scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
+            scalingNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode)
             scalingNode->getOperator()->setBackend("cpu");
 
             if (parentNode->getChildren().size() > 0)
@@ -273,7 +273,7 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
                 // SCALING NODE INSERTION
                 
                 // We always have one output from Affine and Add nodes, but possibly multiple childs
-                std::vector<std::shared_ptr<Node>> nextNodes = parentNode->getChildren(0); 
+                const std::vector<std::shared_ptr<Node>> nextNodes = parentNode->getChildren(0); 
 
                 // For each node in nextNodes store the connexion index
                 std::vector<int> inputIndices(nextNodes.size());
@@ -331,7 +331,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
 {
     // CREATE THE ACCUMULATED RATIO MAP ///////////////////////////////////////
 
-    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+    const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     std::map<std::string, double> accumulatedRatios;
     for (std::shared_ptr<Node> node : nodeVector)
@@ -429,7 +429,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView
 
     // Gather ranges ...
 
-    std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
+    const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
     for (std::shared_ptr<Node> node : nodeSet)
     {
         if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer")))
@@ -449,7 +449,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView
 std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda)
 {
     std::map<std::string, double> valueRanges;
-    std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
+    const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
     
     // std::shared_ptr<Node> inputNode = getFirstNode(graphView);
 
@@ -527,7 +527,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
 
     // CREATE THE SCALING FACTOR MAP //////////////////////////////////////////
 
-    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+    const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     std::map<std::string, double> scalingFactors;
 
@@ -628,7 +628,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap
 
     // ITERATE OVER THE GRAPH
 
-    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+    const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
@@ -672,7 +672,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap
        
         if (isMerging(node))
         {
-            std::vector<std::shared_ptr<Node>> parentNodes = node->getParents(); 
+            const std::vector<std::shared_ptr<Node>> parentNodes = node->getParents(); 
 
             bool allParentAreSigned = true;
             bool allParentAreUnsigned = true;
@@ -763,7 +763,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
 
     // ITERATE OVER THE GRAPH /////////////////////////////////////////////////
 
-    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+    const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
@@ -834,7 +834,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
 
                 std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, -(signedMax + 1), signedMax, node->name());
 
-                quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
+                quantizerNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode)
                 quantizerNode->getOperator()->setBackend("cpu");
 
                 graphView->replace({node}, {quantizerNode});
@@ -867,7 +867,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
     // XXX Use the signMap to increase the resolution when possible ...
     double signedMax = (1 << (nbBits - 1)) - 1;    
 
-    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+    const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
@@ -886,7 +886,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
                 std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView);
                 std::shared_ptr<Node> mulNode = Mul(mulNodeName);
 
-                mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
+                mulNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode)
                 mulNode->getOperator()->setBackend("cpu");
 
                 graphView->insertParent(node, mulNode, 0, 0, 0);
@@ -921,7 +921,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
 
 void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant)
 {
-    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+    const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
@@ -975,31 +975,12 @@ std::string deduceBackend(std::shared_ptr<GraphView> graphView)
 }
 */
 
-static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType dataType)
-{
-    graphView->setDataType(dataType);
-
-    for (auto inputNode : graphView->inputNodes()) {
-        auto op = std::static_pointer_cast<OperatorTensor>(inputNode->getOperator());
-        auto inputTensor = op->getInput(0);
-        if (inputTensor)
-            inputTensor->setDataType(dataType);
-    }
-
-    for (auto tensor : inputDataSet)
-        tensor->setDataType(dataType);
-}
-
-
 void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose)
 {
     Log::info(" === QUANT PTQ 0.2.21 === ");
 
     graphView->setBackend("cpu");
 
-    DataType initialDataType = (inputDataSet[0])->dataType();
-    setupDataType(graphView, inputDataSet, DataType::Float64);
-
     if (!checkArchitecture(graphView))
         return;
 
@@ -1056,7 +1037,6 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     SequentialScheduler scheduler(graphView);
     scheduler.resetScheduling();
 
-    setupDataType(graphView, inputDataSet, initialDataType);
     if (useCuda)
         graphView->setBackend("cuda");
 
-- 
GitLab


From fcb167c4c23733ddc200f8e4eccfb52e7188cc65 Mon Sep 17 00:00:00 2001
From: Olivier BICHLER <olivier.bichler@cea.fr>
Date: Wed, 18 Dec 2024 14:12:21 +0000
Subject: [PATCH 04/60] Revert "Switch back to float32 except for Producer"

This reverts commit 57239cf31424a7c5a8f0a5f5b6db2197d75655f4
---
 src/PTQ/PTQ.cpp | 70 +++++++++++++++++++++++++++++++------------------
 1 file changed, 45 insertions(+), 25 deletions(-)

diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 2641dde..bfc5e3f 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -52,7 +52,7 @@ bool isMerging(std::shared_ptr<Node> node)
 
 bool checkArchitecture(std::shared_ptr<GraphView> graphView)
 {
-    const std::set<std::string> otherNodeTypes({"Flatten", "Softmax", "BatchNorm2D", "ReLU", "Producer"});
+    std::set<std::string> otherNodeTypes({"Flatten", "Softmax", "BatchNorm2D", "ReLU", "Producer"});
 
     for (std::shared_ptr<Node> node : graphView->getNodes())
     {
@@ -76,10 +76,10 @@ static void fillTensor(std::shared_ptr<Tensor> tensor, double value)
         castedTensor[i] = value;
 }
 
-static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
+static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
 {
     // Get the tensor data pointer
-    float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr());
+    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
 
     // Rescale the tensor
     for(std::size_t i = 0; i < tensor->size(); i++)
@@ -89,20 +89,20 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
 static void roundTensor(std::shared_ptr<Tensor> tensor)
 {
     // Get the tensor data pointer
-    float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr());
+    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
 
     // Rescale the tensor
     for(std::size_t i = 0; i < tensor->size(); i++)
         castedTensor[i] = std::nearbyint(castedTensor[i]);//Round
 }
 
-static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
+static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
 {
     // Get the tensor data pointer and edit it
-    float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr());
+    double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr());
 
     // Get the tensor absolute max value
-    float maxValue = 0.0f;
+    double maxValue = 0.0f;
     for(std::size_t i = 0; i < tensor->size(); ++i) {
         if(std::fabs(castedTensor[i]) > maxValue) {
             maxValue = std::fabs(castedTensor[i]);
@@ -186,7 +186,7 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView)
     removeFlatten(graphView);
 
     bool containsBatchNorm = false;
-    const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     for (std::shared_ptr<Node> node : nodeVector)
         if (node->type() == "BatchNorm")
@@ -213,7 +213,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView)
 {
     // TODO: double check this ...
 
-    const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
@@ -234,7 +234,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView)
                     std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView);
                     std::shared_ptr<Node> residualNode = Scaling(1.0, residualNodeName);
 
-                    residualNode->getOperator()->setDataType(DataType::Float32); //getDataType(parentNode)
+                    residualNode->getOperator()->setDataType(DataType::Float64); //getDataType(parentNode)
                     residualNode->getOperator()->setBackend("cpu");
 
                     graphView->insertParent(node, residualNode, i, 0, 0);
@@ -256,16 +256,16 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
 {
     insertResidualNodes(graphView);
 
-    const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
+    std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
 
     for (std::shared_ptr<Node> parentNode : nodeSet)
     {
         if (isAffine(parentNode) || isMerging(parentNode))
         {
-            const std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView);
+            std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView);
             std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName);
 
-            scalingNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode)
+            scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
             scalingNode->getOperator()->setBackend("cpu");
 
             if (parentNode->getChildren().size() > 0)
@@ -273,7 +273,7 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView)
                 // SCALING NODE INSERTION
                 
                 // We always have one output from Affine and Add nodes, but possibly multiple childs
-                const std::vector<std::shared_ptr<Node>> nextNodes = parentNode->getChildren(0); 
+                std::vector<std::shared_ptr<Node>> nextNodes = parentNode->getChildren(0); 
 
                 // For each node in nextNodes store the connexion index
                 std::vector<int> inputIndices(nextNodes.size());
@@ -331,7 +331,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
 {
     // CREATE THE ACCUMULATED RATIO MAP ///////////////////////////////////////
 
-    const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     std::map<std::string, double> accumulatedRatios;
     for (std::shared_ptr<Node> node : nodeVector)
@@ -429,7 +429,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView
 
     // Gather ranges ...
 
-    const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
+    std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
     for (std::shared_ptr<Node> node : nodeSet)
     {
         if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer")))
@@ -449,7 +449,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView
 std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda)
 {
     std::map<std::string, double> valueRanges;
-    const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
+    std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
     
     // std::shared_ptr<Node> inputNode = getFirstNode(graphView);
 
@@ -527,7 +527,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
 
     // CREATE THE SCALING FACTOR MAP //////////////////////////////////////////
 
-    const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     std::map<std::string, double> scalingFactors;
 
@@ -628,7 +628,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap
 
     // ITERATE OVER THE GRAPH
 
-    const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
@@ -672,7 +672,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap
        
         if (isMerging(node))
         {
-            const std::vector<std::shared_ptr<Node>> parentNodes = node->getParents(); 
+            std::vector<std::shared_ptr<Node>> parentNodes = node->getParents(); 
 
             bool allParentAreSigned = true;
             bool allParentAreUnsigned = true;
@@ -763,7 +763,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
 
     // ITERATE OVER THE GRAPH /////////////////////////////////////////////////
 
-    const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
@@ -834,7 +834,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
 
                 std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, -(signedMax + 1), signedMax, node->name());
 
-                quantizerNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode)
+                quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
                 quantizerNode->getOperator()->setBackend("cpu");
 
                 graphView->replace({node}, {quantizerNode});
@@ -867,7 +867,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
     // XXX Use the signMap to increase the resolution when possible ...
     double signedMax = (1 << (nbBits - 1)) - 1;    
 
-    const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
@@ -886,7 +886,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
                 std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView);
                 std::shared_ptr<Node> mulNode = Mul(mulNodeName);
 
-                mulNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode)
+                mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
                 mulNode->getOperator()->setBackend("cpu");
 
                 graphView->insertParent(node, mulNode, 0, 0, 0);
@@ -921,7 +921,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
 
 void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant)
 {
-    const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
+    std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
@@ -975,12 +975,31 @@ std::string deduceBackend(std::shared_ptr<GraphView> graphView)
 }
 */
 
+static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType dataType)
+{
+    graphView->setDataType(dataType);
+
+    for (auto inputNode : graphView->inputNodes()) {
+        auto op = std::static_pointer_cast<OperatorTensor>(inputNode->getOperator());
+        auto inputTensor = op->getInput(0);
+        if (inputTensor)
+            inputTensor->setDataType(dataType);
+    }
+
+    for (auto tensor : inputDataSet)
+        tensor->setDataType(dataType);
+}
+
+
 void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose)
 {
     Log::info(" === QUANT PTQ 0.2.21 === ");
 
     graphView->setBackend("cpu");
 
+    DataType initialDataType = (inputDataSet[0])->dataType();
+    setupDataType(graphView, inputDataSet, DataType::Float64);
+
     if (!checkArchitecture(graphView))
         return;
 
@@ -1037,6 +1056,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     SequentialScheduler scheduler(graphView);
     scheduler.resetScheduling();
 
+    setupDataType(graphView, inputDataSet, initialDataType);
     if (useCuda)
         graphView->setBackend("cuda");
 
-- 
GitLab


From f8be53be11be7efcfae5bfef3caa8533098d0bbf Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Fri, 20 Dec 2024 12:33:50 +0000
Subject: [PATCH 05/60] fix the scaling factor getter

---
 src/PTQ/PTQMetaOps.cpp | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp
index d2423d0..69b5dd4 100644
--- a/src/PTQ/PTQMetaOps.cpp
+++ b/src/PTQ/PTQMetaOps.cpp
@@ -28,6 +28,9 @@
 #include "aidge/utils/Types.h"
 #include "aidge/operator/Identity.hpp"
 #include "aidge/data/Tensor.hpp"
+#include "aidge/operator/OperatorTensor.hpp"
+
+
 std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,double clip_max,const std::string& name)
 {
     std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1>{scalingFactor});
@@ -96,12 +99,17 @@ double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode)
     std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Nodes inside PTQ Metaop Node
     for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List)
     {
-            if(node->type() == "Mul")
-            {
-                    std::shared_ptr<Aidge::Data> MulInput1Data = node->input(1).first->getOperator()->getRawOutput(0);
-                    void* RawInputScalingFactor = std::static_pointer_cast<Aidge::Tensor>(MulInput1Data)->getImpl()->rawPtr();
-                    return (*(static_cast<double*>(RawInputScalingFactor)));
-            }
+        if(node->type() == "Mul")
+        {
+            //auto tensor = std::static_pointer_cast<Aidge::OperatorTensor>(node->getOperator())->getInput(1);
+            //bool useFloat = tensor->dataType() == Aidge::DataType::Float32;
+            //return useFloat ? tensor->get<float>(0) : tensor->get<double>(0);
+
+            auto tensor = std::static_pointer_cast<Aidge::OperatorTensor>(node->getOperator())->getInput(1);
+            std::shared_ptr<Aidge::Tensor> fallback;
+            const Aidge::Tensor& scalingFactorTensor = tensor->refCastFrom(fallback, Aidge::DataType::Float64, "cpu"); 
+            return scalingFactorTensor.get<double>(0);
+        } 
     }
     AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type());
     return -1;
-- 
GitLab


From e854b9768c6078c2b4e2dac7b74b9d9c267027cf Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Fri, 20 Dec 2024 12:35:22 +0000
Subject: [PATCH 06/60] fix the histogram bin computation

---
 src/PTQ/Clipping.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp
index f8765f3..57ad7a8 100644
--- a/src/PTQ/Clipping.cpp
+++ b/src/PTQ/Clipping.cpp
@@ -132,9 +132,10 @@ double computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, double
         clippingErrors.push_back(accumulatedError);
     }
 
-    std::vector<double>::iterator it = std::min_element(clippingErrors.begin(), clippingErrors.end());
-    double bestClipping = static_cast<double> (std::distance(clippingErrors.begin(), it)) / static_cast<double> (nbIter);
-
+    std::vector<double>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end()); 
+    int bestBin = static_cast<int> (std::distance(clippingErrors.begin(), it)) + 1;
+    double bestClipping = static_cast<double> (bestBin) / static_cast<double> (nbIter);
+    
     return bestClipping;
 }
 
@@ -199,7 +200,8 @@ double computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits)
     }
 
     std::vector<double>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end());
-    double bestClipping = static_cast<double> (std::distance(clippingErrors.begin(), it)) / static_cast<double> (nbIter);
+    int bestBin = static_cast<int> (std::distance(clippingErrors.begin(), it)) + 1;
+    double bestClipping = (static_cast<double> (bestBin)) / static_cast<double> (nbIter);
 
     return bestClipping;
 }
-- 
GitLab


From f82754691c9848c28c9cf1ccf57803b1bdf2ca84 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Fri, 20 Dec 2024 12:39:48 +0000
Subject: [PATCH 07/60] remove commented code

---
 src/PTQ/PTQ.cpp        | 58 ++++++++++++++++++++----------------------
 src/PTQ/PTQMetaOps.cpp |  4 ---
 2 files changed, 28 insertions(+), 34 deletions(-)

diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index bfc5e3f..ffd5044 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -925,7 +925,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
-        //Use A meatoperator of type Scaling of MulCompensation instead
+        // Use A meatoperator of type Scaling of MulCompensation instead
         if (isAffine(node) || (node->type() == "Mul"))
         {
             std::shared_ptr<Node> scalingNode = (*node->getChildren().begin());
@@ -958,23 +958,13 @@ static void printScalingFactors(std::shared_ptr<GraphView> graphView)
 {
     Log::info(" === SCALING FACTORS === ");
     for (auto node : retrieveNodeVector(graphView))
-        if (node->type() == "Scaling")
+        if (node->type() == "Scaling" || node->type() == "Quantizer")
         {
             double factor = getScalingFactor(node);
             Log::info(" {:.6f} ({})", factor, node->name());
         }
 }
 
-/*
-std::string deduceBackend(std::shared_ptr<GraphView> graphView)
-{
-    std::string rootNodeBackend = graphView->getRootNode()->backend();
-    for (auto node : graphView->getNodes())
-        if (node->backend() != rootNodeBackend)
-            log::warn(" Multiple backend detected, setting all nodes to {}")
-}
-*/
-
 static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType dataType)
 {
     graphView->setDataType(dataType);
@@ -990,6 +980,17 @@ static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std:
         tensor->setDataType(dataType);
 }
 
+static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges)
+{
+    SequentialScheduler scheduler(graphView);
+    scheduler.resetScheduling();
+    scheduler.generateScheduling();
+
+    auto scheduling = scheduler.getStaticScheduling();
+    for (auto node : scheduling)
+        if (node->type() == "Scaling")
+            std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl;
+}
 
 void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose)
 {
@@ -1017,23 +1018,15 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     Log::info(" Computing the value ranges ...");
     std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda);
 
-    // XXX 
-/*
-    SequentialScheduler scheduler(graphView);
-    scheduler.resetScheduling();
-    scheduler.generateScheduling();
-
-    auto scheduling = scheduler.getStaticScheduling();
-    for (auto node : scheduling)
-        if (node->type() == "Scaling")
-            std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl;
+    //std::cout << " === RANGES (BEFORE ADJUST) ===" << std::endl;
+    //printRanges(graphView, valueRanges);
 
-    std::cout << " RETURN " << std::endl;
-    return;
-*/
     Log::info(" Optimizing the clipping values ...");
     valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose);
 
+    //std::cout << " === RANGES (AFTER ADJUST) ===" << std::endl;
+    //printRanges(graphView, valueRanges);
+
     Log::info(" Normalizing the activations ...");
     normalizeActivations(graphView, valueRanges);
 
@@ -1048,20 +1041,25 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
         Log::info(" Performing the Single-Shift approximation ...");
         performSingleShiftApproximation(graphView, noQuant);
     }
-    
+
     if (verbose)
         printScalingFactors(graphView);
 
-    Log::info(" Reseting the scheduler ...");
-    SequentialScheduler scheduler(graphView);
-    scheduler.resetScheduling();
+    //std::cout << " === SCALINGS (BEFORE CAST) ===" << std::endl;
+    //printScalingFactors(graphView);
 
     setupDataType(graphView, inputDataSet, initialDataType);
     if (useCuda)
         graphView->setBackend("cuda");
 
-    Log::info(" Network is quantized !");
+    //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
+    //printScalingFactors(graphView);
+
+    Log::info(" Reseting the scheduler ...");
+    SequentialScheduler scheduler(graphView);
+    scheduler.resetScheduling();
 
+    Log::info(" Network is quantized !");
 }
 
 std::map<std::string, double> getWeightRanges(std::shared_ptr<GraphView> graphView)
diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp
index 69b5dd4..d2bc184 100644
--- a/src/PTQ/PTQMetaOps.cpp
+++ b/src/PTQ/PTQMetaOps.cpp
@@ -101,10 +101,6 @@ double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode)
     {
         if(node->type() == "Mul")
         {
-            //auto tensor = std::static_pointer_cast<Aidge::OperatorTensor>(node->getOperator())->getInput(1);
-            //bool useFloat = tensor->dataType() == Aidge::DataType::Float32;
-            //return useFloat ? tensor->get<float>(0) : tensor->get<double>(0);
-
             auto tensor = std::static_pointer_cast<Aidge::OperatorTensor>(node->getOperator())->getInput(1);
             std::shared_ptr<Aidge::Tensor> fallback;
             const Aidge::Tensor& scalingFactorTensor = tensor->refCastFrom(fallback, Aidge::DataType::Float64, "cpu"); 
-- 
GitLab


From 115852970bb7386305cc10b866dc75c6ff4b3e4b Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Fri, 20 Dec 2024 16:03:48 +0000
Subject: [PATCH 08/60] improve code quality

---
 include/aidge/quantization/PTQ/PTQMetaOps.hpp |  12 +-
 python_binding/pybind_PTQ.cpp                 |   2 +-
 src/PTQ/PTQ.cpp                               |  42 ++---
 src/PTQ/PTQMetaOps.cpp                        | 166 ++++++++++--------
 4 files changed, 123 insertions(+), 99 deletions(-)

diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/quantization/PTQ/PTQMetaOps.hpp
index 29bb7f2..62fac87 100644
--- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp
+++ b/include/aidge/quantization/PTQ/PTQMetaOps.hpp
@@ -25,6 +25,8 @@
 #include "aidge/graph/OpArgs.hpp" // Sequential
 #include "aidge/operator/MetaOperator.hpp"
 
+namespace Aidge {
+
 /// @brief Quantizer acts as a meta-operator to handle scaling operations in the PTQ, replacing the Scaling Operator.
 /// This operator is composed of a sequence of [Mul] -> [Clip] -> [Round] operations.
 ///
@@ -33,7 +35,7 @@
 /// @param clip_max The maximum value for the clip operation.
 /// @param name The name of the meta-operator node created.
 /// @return A shared pointer to an instance of the meta-operator node.
-std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,double clip_max,const std::string& name);
+std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name);
 
 /// @brief The purpose of Scaling is to encapsulate the Mul operator and tag it as a PTQ node rather than a regular Mul operator.
 /// Therefore, this meta-operator consists solely of a [Mul] operation.
@@ -41,7 +43,7 @@ std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,dou
 /// @param scalingFactor The scaling factor to apply to the input (a scalar to multiply the input with).
 /// @param name The name of the meta-operator node created.
 /// @return A shared pointer to an instance of the scaling node.
-std::shared_ptr<Aidge::Node> Scaling(double scalingFactor,const std::string& name = "");
+std::shared_ptr<Aidge::Node> Scaling(double scalingFactor, const std::string& name = "");
 
 /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter.
 /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation.
@@ -50,7 +52,7 @@ std::shared_ptr<Aidge::Node> Scaling(double scalingFactor,const std::string& nam
 /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor will be updated.
 /// @param newScalingFactor The new scaling factor to apply to the meta-operator node.
 /// @return True if the scaling factor was successfully updated, false if the operation failed (e.g., if MetaOpNode is null or incompatible).
-bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor);
+void updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor);
 
 /// @brief Retrieves the current scaling factor of a PTQ meta-operator node.
 /// This function returns the scaling factor associated with the specified PTQ meta-operator node, 
@@ -69,6 +71,8 @@ double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode);
 /// @param min The minimum value for the clip range. Values below this will be clipped to this minimum.
 /// @param max The maximum value for the clip range. Values above this will be clipped to this maximum.
 /// @return True if the clip range was successfully set, false if the operation failed (e.g., if QuantizerNode is null).
-bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, double min, double max);
+void setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, double min, double max);
+
+}
 
 #endif /* AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ */
diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp
index 73b217d..195c0bf 100644
--- a/python_binding/pybind_PTQ.cpp
+++ b/python_binding/pybind_PTQ.cpp
@@ -220,7 +220,7 @@ void init_PTQ(py::module &m) {
     :type network: :py:class:`aidge_core.GraphView`    
     )mydelimiter");
 
-    m.def("prepare_network", &prepareNetwork, py::arg("network"), "prepare the network fo the PTQ");
+    m.def("prepare_network", &prepareNetwork, py::arg("network"), "prepare the network for the PTQ");
 
 }
 
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index ffd5044..4c5d1d1 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -407,8 +407,9 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
 
                 std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode);
 
-                double scaling_factor = getScalingFactor(scalingNode);
-                updateScalingFactor(scalingNode,scaling_factor / rescaling);
+                double currScalingFactor = getScalingFactor(scalingNode);
+                updateScalingFactor(scalingNode, currScalingFactor / rescaling);
+
                 accumulatedRatios[mergingNode->name()] /= rescaling; // optional ...
             }
         }
@@ -565,8 +566,8 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
             // ValueRanges must contains all the scaling nodes !!!
             double scalingFactor = valueRanges[node->name()]; 
 
-            double scaling_factor = getScalingFactor(node);
-            updateScalingFactor(node, (scaling_factor) / (scalingFactor / prevScalingFactor));
+            double currScalingFactor = getScalingFactor(node);
+            updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor));
 
             scalingFactors[node->name()] = scalingFactor;
 
@@ -608,8 +609,9 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st
 
                 std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode);
                 //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name());
-                double scaling_factor = getScalingFactor(scalingNode);
-                updateScalingFactor(scalingNode, scaling_factor * rescaling);                
+
+                double currScalingFactor = getScalingFactor(scalingNode);
+                updateScalingFactor(scalingNode, currScalingFactor * rescaling);                
             }
         }
     }
@@ -803,8 +805,9 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
             rescaling *= outputIsUnsigned ? unsignedMax : signedMax;
             
             std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ...
-            double scaling_factor = getScalingFactor(scalingNode);
-            updateScalingFactor(scalingNode, scaling_factor * rescaling);
+
+            double currScalingFactor = getScalingFactor(scalingNode);
+            updateScalingFactor(scalingNode, currScalingFactor * rescaling);
         }
         
         if (isMerging(node))
@@ -819,8 +822,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
 
             std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ...
         
-            double scaling_factor = getScalingFactor(scalingNode); // XXX bad naming
-            updateScalingFactor(scalingNode, scaling_factor * rescaling);
+            double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming
+            updateScalingFactor(scalingNode, currScalingFactor * rescaling);
         }
         
         // Handle the Scaling Nodes ...
@@ -829,11 +832,9 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
         {
             if (!noQuant) 
             {  
-                // Replacement of Scaling Node by Quantizer
-                double currentSF = getScalingFactor(node); // XXX bad naming !
-
-                std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, -(signedMax + 1), signedMax, node->name());
+                // Replace  the  Scaling Node by Quantizer
 
+                std::shared_ptr<Node> quantizerNode = Quantizer(getScalingFactor(node), -(signedMax + 1), signedMax, node->name());
                 quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
                 quantizerNode->getOperator()->setBackend("cpu");
 
@@ -849,8 +850,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_
                     rescaling /= inputIsUnsigned  ? unsignedMax : signedMax;
                     rescaling *= outputIsUnsigned ? unsignedMax : signedMax;
 
-                    double scalingFactor = getScalingFactor(quantizerNode);
-                    updateScalingFactor(quantizerNode,scalingFactor * rescaling);
+                    double currScalingFactor = getScalingFactor(quantizerNode);
+                    updateScalingFactor(quantizerNode, currScalingFactor * rescaling);
 
                     if(outputIsUnsigned)
                     {
@@ -909,8 +910,9 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
                 // rescale the coeffs and edit scaling factor
 
                 fillTensor(coeffTensor, signedMax);
-                double sf  = getScalingFactor(node); // XXX bad naming !
-                updateScalingFactor(node, sf/signedMax);
+
+                double currScalingFactor = getScalingFactor(node); // XXX bad naming !
+                updateScalingFactor(node, currScalingFactor / signedMax);
 
                 // TODO : double check this !!!
                 //std::cout << getTensorAbsoluteMax(coeffTensor) << std::endl;
@@ -960,8 +962,8 @@ static void printScalingFactors(std::shared_ptr<GraphView> graphView)
     for (auto node : retrieveNodeVector(graphView))
         if (node->type() == "Scaling" || node->type() == "Quantizer")
         {
-            double factor = getScalingFactor(node);
-            Log::info(" {:.6f} ({})", factor, node->name());
+            double scalingFactor = getScalingFactor(node);
+            Log::info(" {:.6f} ({})", scalingFactor, node->name());
         }
 }
 
diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp
index d2bc184..152a3b0 100644
--- a/src/PTQ/PTQMetaOps.cpp
+++ b/src/PTQ/PTQMetaOps.cpp
@@ -29,106 +29,124 @@
 #include "aidge/operator/Identity.hpp"
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/OperatorTensor.hpp"
+#include "aidge/utils/Log.hpp"
 
 
-std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,double clip_max,const std::string& name)
+namespace Aidge 
 {
-    std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1>{scalingFactor});
-    std::shared_ptr<Aidge::Node> mul_node =  Aidge::Mul((!name.empty()) ? name + "_MulQuant" : "");
 
-    std::shared_ptr<Aidge::Node> producer_scaling_factor = addProducer(mul_node,1,{1},"ScalingFactor"); 
-    producer_scaling_factor ->getOperator()->setOutput(0,ScalingFactorTensorAttached);
-    
-    std::shared_ptr<Aidge::Node> clip_node = Aidge::Clip((!name.empty()) ? name + "_ClipQuant" : "",clip_min,clip_max);
+std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name)
+{
+    // create the nodes
+
+    std::shared_ptr<Node> mulNode =  Mul((!name.empty()) ? name + "_MulQuant" : "");
+    std::shared_ptr<Node> roundNode = Round((!name.empty()) ? name + "_RoundQuant" : "");
+    std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_ClipQuant" : "", clipMin, clipMax);
+
+    // connect the scaling factor producer
+
+    std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
+    std::shared_ptr<Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); 
+    scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
     
-    std::shared_ptr<Aidge::GraphView> graph = Aidge::Sequential({
-        mul_node,
-        Aidge::Round((!name.empty()) ? name + "_RoundQuant" : ""),
-        clip_node});
+    // create the metaop graph
+
+    std::shared_ptr<GraphView> graphView = Sequential({mulNode, roundNode, clipNode});
+    std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ???
+
+    // return the metaop 
+
+    std::shared_ptr<Node> metaopNode = MetaOperator("Quantizer", connectedGraphView, {}, name); // XXX alternative prototype
 
-    std::shared_ptr<Aidge::GraphView> connectedGV = getConnectedGraphView(mul_node);
-    std::shared_ptr<Aidge::Node> metaopNode = MetaOperator("Quantizer",connectedGV,{},name);
     return metaopNode; 
 }
 
-std::shared_ptr<Aidge::Node> Scaling(double scalingFactor,const std::string& name)
+std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name)
 {
-    std::shared_ptr<Aidge::Tensor> scalingFactorTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scalingFactor});
+    std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
 
-    std::shared_ptr<Aidge::Node> mulNode = Aidge::Mul((!name.empty()) ? name + "_Scaling" : "");
+    std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_Scaling" : "");
 
-    std::shared_ptr<Aidge::Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); 
+    std::shared_ptr<Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); 
     scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
 
-    std::shared_ptr<Aidge::GraphView> graphView  = Aidge::Sequential({mulNode});
-    std::shared_ptr<Aidge::GraphView> connectedGraphView  = getConnectedGraphView(mulNode);
+    std::shared_ptr<GraphView> graphView  = Sequential({mulNode});
+    std::shared_ptr<GraphView> connectedGraphView  = getConnectedGraphView(mulNode);
 
-    Aidge::NodePtr metaopNode = MetaOperator("Scaling", connectedGraphView, {}, name);
+    NodePtr metaopNode = MetaOperator("Scaling", connectedGraphView, {}, name);
 
     return metaopNode;
 }
 
-bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor)
+static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType)
 {
-    if(MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer")
-    {
-        AIDGE_ASSERT("Cannot use updatePTQMetaOpsScalingFactor on Node of type {}", MetaOpNode->type());
-    }
-    std::shared_ptr<Aidge::Tensor> newScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1>{newScalingFactor});
-    std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(MetaOpNode->getOperator());
-    std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Nodes inside PTQ Metaop Node
-    for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List)
-    {
-            if(node->type() == "Mul")
-            {
-                node->input(1).first->getOperator()->setOutput(0, newScalingFactorTensorAttached);
-                return true;
-            }
-    }
-    AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type());
-    return false;
+    std::shared_ptr<Node> mulNode = nullptr;
+    for(std::shared_ptr<Node> node : graphView->getNodes())
+        if (node->type() == nodeType)
+            mulNode = node;
+
+    return mulNode;
 }
-double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode)
+
+void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor)
 {
-    if(MetaOpNode->type() != "Scaling"  && MetaOpNode->type() != "Quantizer")
-    {
-        AIDGE_ASSERT("Cannot use getPTQMetaOpsScalingFactor on Node of type {}",MetaOpNode->type());
-        return -1;
+    if(metaOpNode->type() != "Scaling" && metaOpNode->type() != "Quantizer")
+        Log::warn(" Cannot update the scaling factor on Node of type {}", metaOpNode->type());
+
+    std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
+
+    std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(metaOpNode->getOperator());
+    
+    std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul");
+
+    if (!mulNode)
+        Log::warn(" Invalid PTQ MetaOperator, no Mul node found inside ! ");
+
+    mulNode->input(1).first->getOperator()->setOutput(0, scalingFactorTensor);
+}
+
+double getScalingFactor(std::shared_ptr<Node> MetaOpNode)
+{
+    if (MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") {
+        Log::warn(" Cannot get the scaling factor on Node of type {}", MetaOpNode->type());
+        return 0;
     }
-    std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(MetaOpNode->getOperator());
-    std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Nodes inside PTQ Metaop Node
-    for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List)
-    {
-        if(node->type() == "Mul")
-        {
-            auto tensor = std::static_pointer_cast<Aidge::OperatorTensor>(node->getOperator())->getInput(1);
-            std::shared_ptr<Aidge::Tensor> fallback;
-            const Aidge::Tensor& scalingFactorTensor = tensor->refCastFrom(fallback, Aidge::DataType::Float64, "cpu"); 
-            return scalingFactorTensor.get<double>(0);
-        } 
+
+    std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(MetaOpNode->getOperator());
+    
+    std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul");
+
+    if (!mulNode) {
+        Log::warn(" Invalid PTQ MetaOperator, no Mul found inside node of type {}", MetaOpNode->type());
+        return 0;
     }
-    AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type());
-    return -1;
+
+    auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1);
+    std::shared_ptr<Tensor> fallback;
+    const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); 
+    
+    return localTensor.get<double>(0);
 }
-bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode,double min, double max)
+
+
+void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max)
 {
-    if(QuantizerNode->type() != "Quantizer")
-    {
-        AIDGE_ASSERT("Cannot use setQuantizerClipRange on Node of type {}",QuantizerNode->type());
-        return false;
+    if (quantizerNode->type() != "Quantizer") {
+        Log::warn(" Cannot set the clipping range on Node of type {}", quantizerNode->type());
+        return;
     }
-    std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(QuantizerNode->getOperator());
-    std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Node inside  
-    for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List)
-    {
-            if(node->type() == "Clip")
-            {
-                std::shared_ptr<Aidge::Clip_Op> Clip_Node_Op = std::static_pointer_cast<Aidge::Clip_Op>(node->getOperator());
-                Clip_Node_Op->max() = max;
-                Clip_Node_Op->min() = min;
-                return true;
-            }
+
+    std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op> (quantizerNode->getOperator());
+
+    std::shared_ptr<Node> clipNode = getSubNode(metaOp->getMicroGraph(), "Clip");
+
+    if (!clipNode) {
+        Log::warn(" Invalid PTQ MetaOperator, no Clip found inside node of type {}", quantizerNode->type());
+        return;
     }
-    AIDGE_ASSERT("Invalid MetaOperator Quantizer, no clip node found inside Node of type {}",QuantizerNode->type());
-    return false;
+
+    std::shared_ptr<Clip_Op> clipOp = std::static_pointer_cast<Clip_Op>(clipNode->getOperator());
+    clipOp->max() = max;
+    clipOp->min() = min;
+}
 }
\ No newline at end of file
-- 
GitLab


From 579d9320830d8353bc216d56c93b5f6385f082e2 Mon Sep 17 00:00:00 2001
From: cmoineau <cyril.moineau@cea.fr>
Date: Wed, 11 Dec 2024 10:08:53 +0000
Subject: [PATCH 09/60] Update quantization with
 https://gitlab.eclipse.org/eclipse/aidge/aidge_core/-/merge_requests/277

---
 .gitignore                                    |  3 +-
 CMakeLists.txt                                | 26 +++++++++++++
 include/aidge/quantization_version.h          | 11 ++++++
 .../sys_info/QuantizationVersionInfo.hpp      | 38 +++++++++++++++++++
 include/aidge/version.h.in                    | 11 ++++++
 pyproject.toml                                | 18 +++++----
 python_binding/pybind_Quantization.cpp        |  6 ++-
 .../pybind_QuantizationVersionInfo.cpp        | 11 ++++++
 setup.cfg                                     |  3 ++
 9 files changed, 115 insertions(+), 12 deletions(-)
 create mode 100644 include/aidge/quantization_version.h
 create mode 100644 include/aidge/utils/sys_info/QuantizationVersionInfo.hpp
 create mode 100644 include/aidge/version.h.in
 create mode 100644 python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp
 create mode 100644 setup.cfg

diff --git a/.gitignore b/.gitignore
index 18f1583..ba5c593 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,17 +4,16 @@
 # C++ Build
 build*/
 install*/
+include/aidge/backend/quantization_version.h
 
 # VSCode
 .vscode
 
 # Python
-aidge_quantization/_version.py
 *.so
 __pycache__
 *.pyc
 *.egg-info
-aidge_quantization/_version.py
 wheelhouse/*
 
 # Mermaid
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 905a2a2..7a2b168 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,13 +5,39 @@ set(CXX_STANDARD 14)
 file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version)
 file(STRINGS "${CMAKE_SOURCE_DIR}/project_name.txt" project)
 
+# Parse version.txt to retrieve Major, Minor and Path
+string(REGEX MATCH "([0-9]+)\\.([0-9]+)\\.([0-9]+)" _ MATCHES ${version})
+set(PROJECT_VERSION_MAJOR ${CMAKE_MATCH_1})
+set(PROJECT_VERSION_MINOR ${CMAKE_MATCH_2})
+set(PROJECT_VERSION_PATCH ${CMAKE_MATCH_3})
+
+# Retrieve latest git commit
+execute_process(
+    COMMAND git rev-parse --short HEAD
+    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+    OUTPUT_VARIABLE GIT_COMMIT_HASH
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    ERROR_QUIET
+)
+
 message(STATUS "Project name: ${project}")
 message(STATUS "Project version: ${version}")
+message(STATUS "Latest git commit: ${GIT_COMMIT_HASH}")
+
+message(STATUS "Creating ${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h")
 
 project(${project}
         VERSION ${version}
         DESCRIPTION "Quantization methods for the Aidge framework."
         LANGUAGES CXX)
+# Note: Using configure_file later in the code make so that version variables are lost...
+# I tried to set in internal cache but it failed.
+# Current code is working, but there might be a scope issue.
+# Generate version.h file from config file version.h.in
+configure_file(
+    "${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/version.h.in"
+    "${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h"
+)
 
 # Note : project name is {project} and python module name is also {project}
 set(module_name _${project}) # target name
diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h
new file mode 100644
index 0000000..546263a
--- /dev/null
+++ b/include/aidge/quantization_version.h
@@ -0,0 +1,11 @@
+#ifndef VERSION_H
+#define VERSION_H
+
+namespace Aidge {
+static constexpr const int PROJECT_VERSION_MAJOR = 0;
+static constexpr const int PROJECT_VERSION_MINOR = 2;
+static constexpr const int PROJECT_VERSION_PATCH = 0;
+static constexpr const char * PROJECT_VERSION = "0.2.0";
+static constexpr const char * PROJECT_GIT_HASH = "f50c860";
+}
+#endif // VERSION_H
diff --git a/include/aidge/utils/sys_info/QuantizationVersionInfo.hpp b/include/aidge/utils/sys_info/QuantizationVersionInfo.hpp
new file mode 100644
index 0000000..6b4deb8
--- /dev/null
+++ b/include/aidge/utils/sys_info/QuantizationVersionInfo.hpp
@@ -0,0 +1,38 @@
+#ifndef AIDGE_UTILS_SYS_INFO_OPENCV_VERSION_INFO_H
+#define AIDGE_UTILS_SYS_INFO_OPENCV_VERSION_INFO_H
+
+#include "aidge/utils/Log.hpp"
+#include "aidge/quantization_version.h"
+
+namespace Aidge {
+
+constexpr inline const char * getQuantizationProjectVersion(){
+    return PROJECT_VERSION;
+}
+
+constexpr inline const char * getQuantizationGitHash(){
+    return PROJECT_GIT_HASH;
+}
+
+void showQuantizationVersion() {
+    Log::info("Aidge quantization: {} ({}), {} {}", getQuantizationProjectVersion(), getQuantizationGitHash(), __DATE__, __TIME__);
+        // Compiler version
+    #if defined(__clang__)
+    /* Clang/LLVM. ---------------------------------------------- */
+        Log::info("Clang/LLVM compiler version: {}.{}.{}\n", __clang_major__ , __clang_minor__, __clang_patchlevel__);
+    #elif defined(__ICC) || defined(__INTEL_COMPILER)
+    /* Intel ICC/ICPC. ------------------------------------------ */
+        Log::info("Intel ICC/ICPC compiler version: {}\n", __INTEL_COMPILER);
+    #elif defined(__GNUC__) || defined(__GNUG__)
+    /* GNU GCC/G++. --------------------------------------------- */
+        Log::info("GNU GCC/G++ compiler version: {}.{}.{}", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
+    #elif defined(_MSC_VER)
+    /* Microsoft Visual Studio. --------------------------------- */
+        Log::info("Microsoft Visual Studio compiler version: {}\n", _MSC_VER);
+    #else
+        Log::info("Unknown compiler\n");
+    #endif
+
+}
+}  // namespace Aidge
+#endif  // AIDGE_UTILS_SYS_INFO_OPENCV_VERSION_INFO_H
diff --git a/include/aidge/version.h.in b/include/aidge/version.h.in
new file mode 100644
index 0000000..4b876f6
--- /dev/null
+++ b/include/aidge/version.h.in
@@ -0,0 +1,11 @@
+#ifndef VERSION_H
+#define VERSION_H
+
+namespace Aidge {
+static constexpr const int PROJECT_VERSION_MAJOR = @PROJECT_VERSION_MAJOR@;
+static constexpr const int PROJECT_VERSION_MINOR = @PROJECT_VERSION_MINOR@;
+static constexpr const int PROJECT_VERSION_PATCH = @PROJECT_VERSION_PATCH@;
+static constexpr const char * PROJECT_VERSION = "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@";
+static constexpr const char * PROJECT_GIT_HASH = "@GIT_COMMIT_HASH@";
+}
+#endif // VERSION_H
diff --git a/pyproject.toml b/pyproject.toml
index fc745eb..deb91c7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,18 +11,24 @@ classifiers = [
     "Development Status :: 2 - Pre-Alpha",
     "Programming Language :: Python :: 3"
     ]
-dynamic = ["version"] # defined in tool.setuptools_scm
-# version="1"
+dynamic = ["version"] # defined by pbr
 
 [build-system]
 requires = [
     "setuptools>=64",
-    "setuptools_scm[toml]==7.1.0",
     "cmake>=3.15.3.post1",
-    "toml"
+    "toml",
+    "pbr"
 ]
 build-backend = "setuptools.build_meta"
 
+[project.urls]
+Homepage = "https://www.deepgreen.ai/en/platform"
+Documentation = "https://eclipse-aidge.readthedocs.io/en/latest/"
+Repository = "https://gitlab.eclipse.org/eclipse/aidge/aidge_quantization"
+Issues = "https://gitlab.eclipse.org/eclipse/aidge/aidge_quantization/-/issues/"
+Changelog = "https://gitlab.eclipse.org/eclipse/aidge/aidge_quantization/-/releases"
+
 #####################################################
 # SETUPTOOLS
 [tool.setuptools]
@@ -35,10 +41,6 @@ exclude = [
     "aidge_quantization.unit_tests.assets"
 ]  # exclude packages matching these glob patterns (empty by default)
 
-# SETUPTOOLS_SCM
-[tool.setuptools_scm]
-write_to = "aidge_quantization/_version.py"
-
 #####################################################
 # CIBUILDWHEEL
 [tool.cibuildwheel]
diff --git a/python_binding/pybind_Quantization.cpp b/python_binding/pybind_Quantization.cpp
index cd18cf8..7ac344d 100644
--- a/python_binding/pybind_Quantization.cpp
+++ b/python_binding/pybind_Quantization.cpp
@@ -20,7 +20,7 @@
 
 namespace py = pybind11;
 
-namespace Aidge 
+namespace Aidge
 {
 
 // operators
@@ -35,8 +35,9 @@ void init_QAT_FixedQ(py::module &m);
 void init_QAT_LSQ(py::module &m);
 void init_QuantRecipes(py::module &m);
 
+void init_QuantizationVersionInfo(py::module &m);
 
-PYBIND11_MODULE(aidge_quantization, m) 
+PYBIND11_MODULE(aidge_quantization, m)
 {
     init_FixedQ(m);
     init_LSQ(m);
@@ -47,6 +48,7 @@ PYBIND11_MODULE(aidge_quantization, m)
     init_QAT_FixedQ(m);
     init_QAT_LSQ(m);
     init_QuantRecipes(m);
+    init_QuantizationVersionInfo(m);
 }
 
 } // namespace Aidge
diff --git a/python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp b/python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp
new file mode 100644
index 0000000..abed12b
--- /dev/null
+++ b/python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp
@@ -0,0 +1,11 @@
+#include <pybind11/pybind11.h>
+#include "aidge/utils/sys_info/QuantizationVersionInfo.hpp"
+
+namespace py = pybind11;
+namespace Aidge {
+void init_QuantizationVersionInfo(py::module& m){
+    m.def("show_version", &showQuantizationVersion);
+    m.def("get_project_version", &getQuantizationProjectVersion);
+    m.def("get_git_hash", &getQuantizationGitHash);
+}
+}
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..aa0f227
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,3 @@
+# pbr file
+[metadata]
+version = file: version.txt
-- 
GitLab


From d3798ad61a45abdbf67238fe1d749b58d98e6464 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Fri, 3 Jan 2025 16:10:53 +0000
Subject: [PATCH 10/60] set the LSQ op backward kernels to gradient
 accumulation mode

---
 .../aidge/backend/cpu/operator/LSQImpl_kernels.hpp  | 12 ++++++------
 src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu   | 13 ++++++++-----
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp b/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp
index ddb8209..1ed05e2 100644
--- a/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp
@@ -67,16 +67,16 @@ void LSQImpl_cpu_backward_kernel(const std::size_t inputLength,
         const GI fullPrecScale_4 = input[4*i+3] / stepSize[0];
         /*****************Features Gradient Computation********************/
         // STE method is simply applied
-        grad_input[4*i] = grad_output[4*i]*((fullPrecScale_1 <= static_cast<GI>(range.first)) ? GI(0.0) :
+        grad_input[4*i] += grad_output[4*i]*((fullPrecScale_1 <= static_cast<GI>(range.first)) ? GI(0.0) :
                                                           (fullPrecScale_1 >= static_cast<GI>(range.second)) ? GI(0.0) :
                                                           GI(1.0));
-        grad_input[4*i+1] = grad_output[4*i+1]*((fullPrecScale_2 <= static_cast<GI>(range.first)) ? GI(0.0) :
+        grad_input[4*i+1] += grad_output[4*i+1]*((fullPrecScale_2 <= static_cast<GI>(range.first)) ? GI(0.0) :
                                                               (fullPrecScale_2 >= static_cast<GI>(range.second)) ? GI(0.0) :
                                                               GI(1.0));
-        grad_input[4*i+2] = grad_output[4*i+2]*((fullPrecScale_3 <= static_cast<GI>(range.first)) ? GI(0.0) :
+        grad_input[4*i+2] += grad_output[4*i+2]*((fullPrecScale_3 <= static_cast<GI>(range.first)) ? GI(0.0) :
                                                               (fullPrecScale_3 >= static_cast<GI>(range.second)) ? GI(0.0) :
                                                               GI(1.0));
-        grad_input[4*i+3] = grad_output[4*i+3]*((fullPrecScale_4 <= static_cast<GI>(range.first)) ? GI(0.0) :
+        grad_input[4*i+3] += grad_output[4*i+3]*((fullPrecScale_4 <= static_cast<GI>(range.first)) ? GI(0.0) :
                                                               (fullPrecScale_4 >= static_cast<GI>(range.second)) ? GI(0.0) :
                                                               GI(1.0));
 
@@ -105,7 +105,7 @@ void LSQImpl_cpu_backward_kernel(const std::size_t inputLength,
     // Process remaining
     for(unsigned int i=inputLength-inputLength%4; i<inputLength; ++i) {
         const GI fullPrecScale = input[i] / stepSize[0];
-        grad_input[i] = grad_output[i]*((fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) :
+        grad_input[i] += grad_output[i]*((fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) :
                                         (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) :
                                         GI(1.0));
         GI qData = fullPrecScale;
@@ -117,7 +117,7 @@ void LSQImpl_cpu_backward_kernel(const std::size_t inputLength,
 
     const GI gradScaleFactor = static_cast<GI>(1.0f / std::sqrt(inputLength * range.second));
     // 3rd: Multiply Step Size gradient with scale factor
-    grad_stepSize[0] = diffStepSize * gradScaleFactor;
+    grad_stepSize[0] += diffStepSize * gradScaleFactor;
 }
 
 
diff --git a/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu b/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu
index 0d54909..96065e4 100644
--- a/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu
+++ b/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu
@@ -84,10 +84,11 @@ __global__ void LSQImpl_cuda_backward_kernel_(const std::size_t inputLength,
 
         const GI fullPrecScale = input[i] / stepSize[0];
         /*****************************Data/Weights Gradient Computation************************/
-        // STE method is simply apply:
-        grad_input[i] = grad_output[i]*(  (fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) :
-                                            (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) :
-                                                    GI(1.0)); 
+        // STE method is simply applied :
+        // (we accumulate the gradient instead of replacing it)
+        grad_input[i] += grad_output[i] * ((fullPrecScale <= static_cast<GI>(range.first))  ? GI(0.0) :
+                                           (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) :
+                                            GI(1.0)); 
 
         /*****************************Step Size Gradient Computation*************************/
         GI qData = fullPrecScale;
@@ -142,7 +143,9 @@ void Aidge::LSQImpl_cuda_backward_kernel(const std::size_t inputLength,
     // for simplicity and foolproof-ness
     thrust::device_ptr<GI> grad_workspacePtr(grad_workspace);
     thrust::device_ptr<GI> grad_stepSizePtr(grad_stepSize);
-    grad_stepSizePtr[0] = thrust::reduce(grad_workspacePtr, grad_workspacePtr + inputLength, GI(0.0));
+
+    // We accumulate the stepSize gradient instead of replacing it 
+    grad_stepSizePtr[0] += thrust::reduce(grad_workspacePtr, grad_workspacePtr + inputLength, GI(0.0));
 
     //printf(" step grad = %f \n", (float) grad_stepSizePtr[0]);
 
-- 
GitLab


From 73e899eb733850400ec8df7896171663e225d0ca Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Wed, 8 Jan 2025 10:32:06 +0000
Subject: [PATCH 11/60] improve tensor manipulation routines + enhance
 insertCompensationNodes

---
 src/PTQ/CLE.cpp     |  73 +++++++++++++++++------
 src/PTQ/PTQ.cpp     | 137 ++++++++++++++++++++++++++------------------
 src/QAT/QAT_LSQ.cpp |   9 +--
 3 files changed, 138 insertions(+), 81 deletions(-)

diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index 1d5ccc7..2c6e374 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -19,6 +19,12 @@
 #include "aidge/utils/Log.hpp"
 #include "aidge/operator/OperatorTensor.hpp"
 
+#include "aidge/operator/Mul.hpp"
+#include "aidge/operator/ArgMax.hpp"
+#include "aidge/operator/Abs.hpp"
+#include "aidge/operator/Reshape.hpp"
+#include "aidge/operator/Round.hpp"
+
 namespace Aidge
 {
 
@@ -34,27 +40,58 @@ static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node)
 
 static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
 {
-    // Get the tensor data pointer
-    float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr());
-
-    // Rescale the tensor
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        castedTensor[i] *= scaling;
+    auto mulOp = Mul_Op();
+    mulOp.setDataType(tensor->dataType());
+    mulOp.setBackend(tensor->backend());
+
+    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling});
+    scalingTensor->setDataType(tensor->dataType());
+    scalingTensor->setBackend(tensor->backend());
+
+    mulOp.associateInput(0, tensor);
+    mulOp.associateInput(1, scalingTensor);
+
+    mulOp.forward();
+    
+    auto outTensor = mulOp.getOutput(0);
+    *tensor = *outTensor;
+    //tensor->copyCast(*outTensor);
 }
 
-static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
+// TODO : make the retreival of argmax values backend independant (refCastFrom)
+static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor)
 {
-    // Get the tensor data pointer and edit it
-    float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr());
-
-    // Get the tensor absolute max value
-    float maxValue = 0.0f;
-    for(std::size_t i = 0; i < tensor->size(); ++i) {
-        if(std::fabs(castedTensor[i]) > maxValue) {
-            maxValue = std::fabs(castedTensor[i]);
-        }
-    }
-    return maxValue;
+    // get the abs tensor
+
+    std::shared_ptr<Tensor> absTensor = std::make_shared<Tensor>(tensor->abs());
+
+    // flatten the abs tensor
+
+    std::int64_t nbElement = tensor->size();
+
+    auto reshapeOp = Reshape_Op({nbElement});
+    reshapeOp.setDataType(tensor->dataType());
+    reshapeOp.setBackend(tensor->backend());
+
+    reshapeOp.associateInput(0, absTensor);
+    reshapeOp.forward();
+    std::shared_ptr<Tensor> flatTensor = reshapeOp.getOutput(0);
+
+    // Get the argmax
+
+    auto argmaxOp = ArgMax_Op(0, true, false);
+    argmaxOp.setDataType(tensor->dataType());
+    argmaxOp.setBackend(tensor->backend());
+
+    argmaxOp.associateInput(0, flatTensor);
+    argmaxOp.forward();
+    std::shared_ptr<Tensor> argmaxTensor = argmaxOp.getOutput(0);
+
+    // Return the max
+
+    int maxIndex = std::round(argmaxTensor->get<double>(0));
+
+    return flatTensor->get<double>(maxIndex);
 }
 
 void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDelta)
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 54b95cb..54d645e 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -28,6 +28,12 @@
 #include "aidge/operator/BatchNorm.hpp"
 #include "aidge/operator/Conv.hpp"
 
+#include "aidge/operator/ArgMax.hpp"
+#include "aidge/operator/Abs.hpp"
+#include "aidge/operator/Reshape.hpp"
+#include "aidge/operator/Round.hpp"
+
+
 #include "aidge/recipes/Recipes.hpp"
 #include "aidge/recipes/QuantRecipes.hpp"
 
@@ -66,51 +72,75 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView)
     return true;
 }
 
-static void fillTensor(std::shared_ptr<Tensor> tensor, double value)
+static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
 {
-    // Get the tensor data pointer
-    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
+    auto mulOp = Mul_Op();
+    mulOp.setDataType(tensor->dataType());
+    mulOp.setBackend(tensor->backend());
 
-    // Fill the tensor
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        castedTensor[i] = value;
-}
+    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling});
+    scalingTensor->setDataType(tensor->dataType());
+    scalingTensor->setBackend(tensor->backend());
 
-static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
-{
-    // Get the tensor data pointer
-    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
+    mulOp.associateInput(0, tensor);
+    mulOp.associateInput(1, scalingTensor);
 
-    // Rescale the tensor
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        castedTensor[i] *= scaling;
+    mulOp.forward();
+    
+    auto outTensor = mulOp.getOutput(0);
+    *tensor = *outTensor;
 }
 
 static void roundTensor(std::shared_ptr<Tensor> tensor)
 {
-    // Get the tensor data pointer
-    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
+    auto roundOp = Round_Op();
+    roundOp.setDataType(tensor->dataType());
+    roundOp.setBackend(tensor->backend());
 
-    // Rescale the tensor
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        castedTensor[i] = std::nearbyint(castedTensor[i]);//Round
+    roundOp.associateInput(0, tensor);
+    roundOp.forward();
+    
+    auto outTensor = roundOp.getOutput(0);
+    *tensor = *outTensor;
 }
 
-static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
+// TODO : make the retreival of argmax values backend independant (refCastFrom)
+static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor)
 {
-    // Get the tensor data pointer and edit it
-    double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr());
-
-    // Get the tensor absolute max value
-    double maxValue = 0.0f;
-    for(std::size_t i = 0; i < tensor->size(); ++i) {
-        if(std::fabs(castedTensor[i]) > maxValue) {
-            maxValue = std::fabs(castedTensor[i]);
-        }
-    }
-    return maxValue;
+    // get the abs tensor
+
+    std::shared_ptr<Tensor> absTensor = std::make_shared<Tensor>(tensor->abs());
+
+    // flatten the abs tensor
+
+    std::int64_t nbElement = tensor->size();
+
+    auto reshapeOp = Reshape_Op({nbElement});
+    reshapeOp.setDataType(tensor->dataType());
+    reshapeOp.setBackend(tensor->backend());
+
+    reshapeOp.associateInput(0, absTensor);
+    reshapeOp.forward();
+    std::shared_ptr<Tensor> flatTensor = reshapeOp.getOutput(0);
+
+    // Get the argmax
+
+    auto argmaxOp = ArgMax_Op(0, true, false);
+    argmaxOp.setDataType(tensor->dataType());
+    argmaxOp.setBackend(tensor->backend());
+
+    argmaxOp.associateInput(0, flatTensor);
+    argmaxOp.forward();
+    std::shared_ptr<Tensor> argmaxTensor = argmaxOp.getOutput(0);
+
+    // Return the max
+
+    int maxIndex = std::round(argmaxTensor->get<double>(0));
+
+    return flatTensor->get<double>(maxIndex);
 }
 
+
 // TODO : pass nodeVector by reference ...
 static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::shared_ptr<Node>> nodeVector, std::string nodeType)
 {
@@ -876,50 +906,42 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
-        // A merging node is always followed by a scaling node at this point ...
+        // A merging node is always followed by a Quantizer node at this point
 
         if (node->type() == "Quantizer")
         {   
+            // check if the Quantizer is a residual one, and insert a compensation node if so ...
+
             bool prevNodeIsForking = ((node->getParent(0))->getChildren().size() > 1);
             bool prevNodeIsAffine = isAffine(node->getParent(0));
             bool insertNode = prevNodeIsForking || !prevNodeIsAffine;
 
             if (insertNode)
             {
-                // create and insert the multplicative node
+                // create and insert the multplicative node before the Quantizer
 
                 std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView);
                 std::shared_ptr<Node> mulNode = Mul(mulNodeName);
-
                 mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
                 mulNode->getOperator()->setBackend("cpu");
 
                 graphView->insertParent(node, mulNode, 0, 0, 0);
 
-                // create and insert the producer node
-
-                std::shared_ptr<Tensor> inputTensor = std::static_pointer_cast<Tensor> (mulNode->getOperator()->getRawInput(0));
-                std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>();
+                // Add the coeff producer to the multiplier node
 
-                coeffTensor->setDataType(DataType::Float64); // getDataType(parentNode)
-                coeffTensor->setBackend("cpu"); 
+                std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); 
+                std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax});
+                coeffProducer->getOperator()->setOutput(0, coeffTensor);
 
-                coeffTensor->resize(inputTensor->dims());
-                fillTensor(coeffTensor, 1); 
+                coeffProducer->getOperator()->setDataType(DataType::Float64);
+                coeffProducer->getOperator()->setBackend("cpu"); 
 
-                std::shared_ptr<Node> producerNode = Producer(coeffTensor, makeUniqueName("coeff", graphView));
-                producerNode->addChild(mulNode);
-                graphView->add(producerNode);
+                graphView->add(coeffProducer); // needed ?
 
-                // rescale the coeffs and edit scaling factor
+                // Adapt the scaling factor value accordingly
 
-                fillTensor(coeffTensor, signedMax);
-
-                double currScalingFactor = getScalingFactor(node); // XXX bad naming !
+                double currScalingFactor = getScalingFactor(node); 
                 updateScalingFactor(node, currScalingFactor / signedMax);
-
-                // TODO : double check this !!!
-                //std::cout << getTensorAbsoluteMax(coeffTensor) << std::endl;
             }
         }
     }
@@ -931,7 +953,8 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
-        // Use A meatoperator of type Scaling of MulCompensation instead
+        // TODO : use Compensation nodes instead of Mul nodes
+
         if (isAffine(node) || (node->type() == "Mul"))
         {
             std::shared_ptr<Node> scalingNode = (*node->getChildren().begin());
@@ -940,7 +963,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
 
             double approx = std::pow(2, std::ceil(std::log2(base)));
 
-            updateScalingFactor(scalingNode,approx);
+            updateScalingFactor(scalingNode, approx);
 
             double ratio = base / approx;
 
@@ -954,7 +977,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
                 std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
                 rescaleTensor(biasTensor, ratio);
                 if (!noQuant)
-                roundTensor(biasTensor);
+                    roundTensor(biasTensor);
             }
         }
     }
@@ -1058,8 +1081,8 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     if (useCuda)
         graphView->setBackend("cuda");
 
-    //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
-    //printScalingFactors(graphView);
+    std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
+    printScalingFactors(graphView);
 
     Log::info(" Reseting the scheduler ...");
     SequentialScheduler scheduler(graphView);
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index 38c8182..4b23eba 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -89,19 +89,16 @@ void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbB
 static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
 {
     auto backend = tensor->backend();
+
     if (backend == "cuda")
         tensor->setBackend("cpu");
 
-    float acc = 0;
-    float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr());
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        acc += std::abs(castedTensor[i]);
-    acc /= static_cast<float> (tensor->size());
+    float value = (*tensor).abs().mean().get<float>(0);
 
     if (backend == "cuda")
         tensor->setBackend("cuda");
 
-    return acc;
+    return value;
 }
 
 static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda)
-- 
GitLab


From eae59717221edebb8db5555be182af957af87e3e Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Wed, 8 Jan 2025 10:37:27 +0000
Subject: [PATCH 12/60] comment verbose

---
 src/PTQ/PTQ.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 54d645e..2b50f37 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -1081,8 +1081,8 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     if (useCuda)
         graphView->setBackend("cuda");
 
-    std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
-    printScalingFactors(graphView);
+    //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
+    //printScalingFactors(graphView);
 
     Log::info(" Reseting the scheduler ...");
     SequentialScheduler scheduler(graphView);
-- 
GitLab


From 85791eabedd373ee5c1b57a39d95beb48bc0bc32 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Wed, 8 Jan 2025 16:27:21 +0000
Subject: [PATCH 13/60] minor change

---
 src/PTQ/PTQ.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 2b50f37..88e7ac8 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -215,6 +215,8 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView)
 {
     removeFlatten(graphView);
 
+    sanitizeNodeNames(graphView);
+
     bool containsBatchNorm = false;
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
@@ -1078,6 +1080,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     //printScalingFactors(graphView);
 
     setupDataType(graphView, inputDataSet, initialDataType);
+
     if (useCuda)
         graphView->setBackend("cuda");
 
-- 
GitLab


From 45e8db898ea6e35ccdd9f549bde983414f457495 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Mon, 13 Jan 2025 13:01:34 +0000
Subject: [PATCH 14/60] rework the LSQ code

---
 include/aidge/quantization/QAT/QAT_LSQ.hpp |  18 +-
 python_binding/pybind_QAT_LSQ.cpp          |   5 +-
 src/QAT/QAT_LSQ.cpp                        | 204 +++++++--------------
 3 files changed, 77 insertions(+), 150 deletions(-)

diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp
index 4970be0..d7d03ca 100644
--- a/include/aidge/quantization/QAT/QAT_LSQ.hpp
+++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp
@@ -20,22 +20,14 @@ namespace Aidge {
 namespace QuantLSQ {
 
 /**
- * @brief Insert the LSQ quantizer nodes in a given GraphView
- * @param graphView The GraphView containing the graph to quantize.
+ * @brief Given a GraphView with parameters properly initialized, insert
+ * the LSQ quantizer nodes, and setup the adjustment their step-sizes.
+ * @param graphView The GraphView containing the network to quantize.
  * @param nbBits Number of quantization bits.
- * @param span Fixed output span of the quantizers.
  */
-void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float step_size);
+void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits);
 
-/**
- * @brief Given a GraphView with parameters properly initialized and some calibration data,
- * insert the LSQ quantizer nodes, and adjust their step-sizes.
- * @param graphView The GraphView containing the graph to quantize.
- * @param nbBits Number of quantization bits.
- * @param calibrationData Calibration data used to adjust the spans.
- * @param scale Multiplicative constant applied to the spans.
- */
-void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData);
+void devLSQ(std::shared_ptr<Tensor> tensor);
 
 }
 }
diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp
index 206985e..0b9fcc2 100644
--- a/python_binding/pybind_QAT_LSQ.cpp
+++ b/python_binding/pybind_QAT_LSQ.cpp
@@ -23,8 +23,9 @@ void init_QAT_LSQ(py::module &m) {
 
     auto mQuantLSQ = m.def_submodule("lsq");
 
-    mQuantLSQ.def("insert_quantizers", &QuantLSQ::insertQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("step_size"));
+    mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits"));
+
+    mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor"));
 
-    mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data"));
 }
 } // namespace Aidge
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index 4b23eba..04f2027 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -23,7 +23,42 @@
 
 namespace Aidge {
 
-void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize)
+static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
+{
+    auto valueTensor = (*tensor).abs().mean();
+    std::shared_ptr<Tensor> fallback;
+    const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu");
+    return localTensor.get<float>(0);
+}
+
+// INIT THE STEP SIZE OF A QUANTIZER NODE
+
+static bool initStepSize(std::shared_ptr<Node> quantizer)
+{
+    const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator());
+
+    float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0));
+
+    float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second));
+
+    auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
+
+    // XXX Manage backend here ?
+    stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend());
+    stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType());
+
+    auto stepSizeProducer = quantizer->getParent(1);
+
+    stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor);
+
+    std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl;
+
+    return false;
+}
+
+// INPUT QUANTIZERS INSERTION
+
+static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
     const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
 
@@ -34,177 +69,76 @@ void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbB
         std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
         std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1};
 
-        // INPUT QUANTIZERS INSERTION
+        // Create the input quantizer node
 
-        // TODO : double check this, and use createUniqueName()
-        auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView);  
-        auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName);
+        auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView);  
+        auto quantizerNode = LSQ(signedRange, quantizerName);
 
-        // Set the step size
+        // Init the step-size using the node call stack
 
-        auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator();
-        auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        inputStepSizeOp->setOutput(0, inputStepSizeTensor);
+        quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); });
 
         // Absorb the ReLU when possible ...
 
-        // XXX is this safe ???
-        bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); 
-        // bool nodeHasParent = (linearNode->getParents().size() != 0);
+        bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]);  // XXX is this safe ?
 
         if (nodeHasParent) {
             auto parentNode = linearNode->getParents()[0];
             if (parentNode->type() == "ReLU") {
-                auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator());
-                inputQuantizerOp->range() = unsignedRange;
+                auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator());
+                quantizerOp->range() = unsignedRange;
                 graphView->replace({parentNode}, {}); 
             }
         }
 
-        // We need to handle the case where the linear node is the first one ...
+        // Insert the quantizer in the graphView ...
+        // (We need to handle the case where the linear node is the first one)
 
         if (nodeHasParent) {
-            graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0);
+            graphView->insertParent(linearNode, quantizerNode, 0, 0, 0);
         } else {
-            inputQuantizerNode->addChild(graphView);
-            graphView->add(inputQuantizerNode);
+            quantizerNode->addChild(graphView);
+            graphView->add(quantizerNode);
         }
-
-        // PARAM QUANTIZERS INSERTION
-
-        // TODO : double check this, and use createUniqueName()
-        auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);  
-        auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); 
-        graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0);
-
-        // Set the step size
-
-        auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator();
-        auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        paramStepSizeOp->setOutput(0, paramStepSizeTensor);
     }
-
 }
 
-static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
-{
-    auto backend = tensor->backend();
-
-    if (backend == "cuda")
-        tensor->setBackend("cpu");
-
-    float value = (*tensor).abs().mean().get<float>(0);
-
-    if (backend == "cuda")
-        tensor->setBackend("cuda");
-
-    return value;
-}
+// PARAM QUANTIZERS INSERTION
 
-static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda)
+static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
-    // Propagate the calibration tensor
+    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
 
-    SequentialScheduler scheduler(graphView);
-    scheduler.resetScheduling();
-    scheduler.forward(true, {calibrationData});
+    std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
 
-    // Store the input tensor statistics
+    for (const auto& match : matches) 
+    {       
+        auto linearNode = match.graph->rootNode(); 
 
-    if (useCuda)
-        graphView->setBackend("cpu"); 
+        // TODO : double check this, and use createUniqueName()
+        auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);  
+        auto quantizerNode = LSQ(signedRange, quantizerName); 
 
-    std::map<std::string, float> inputStats;
-    for (auto node : graphView->getNodes())
-    {
-        if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
-        {
-            const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
-            float inputAbsMean = getTensorAbsMean(op->getInput(0));
-            inputStats.insert(std::make_pair(node->name(), inputAbsMean));
-            std::cout << node->name() << " -> " << inputAbsMean << std::endl;
-        }
-    }
+        // Init the step-size using the node call stack
 
-    if (useCuda)
-        graphView->setBackend("cuda");
+        quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); });
 
-    return inputStats;
-}
+        // Insert the quantizer in the graphView
 
-static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda)
-{
-    if (useCuda)
-        graphView->setBackend("cpu");
-
-    std::map<std::string, float> paramStats;
-    for (auto node : graphView->getNodes())
-    {
-        if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
-        {
-            const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
-            float paramAbsMean = getTensorAbsMean(op->getInput(1));
-            paramStats.insert(std::make_pair(node->name(), paramAbsMean));
-            std::cout << node->name() << " -> " << paramAbsMean << std::endl;
-        }
+        graphView->insertParent(linearNode, quantizerNode, 1, 0, 0);
     }
-    
-    if (useCuda)
-        graphView->setBackend("cuda");
-
-    return paramStats;
 }
 
-static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats)
+void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
-    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
-
-    for (const auto& match : matches) 
-    {
-        auto linearNode = match.graph->rootNode();
-
-        // INPUT QUANTIZERS STEP-SIZES
-
-        auto inputQuantNode = linearNode->getParent(0);
-        auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator());
-
-        float absMean = inputStats[linearNode->name()];
-        float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second));
-
-        auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator();
-        // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
-        auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        inputStepSizeOp->setOutput(0, inputStepSizeTensor);
-
-        // PARAM QUANTIZERS STEP-SIZES
-
-        auto paramQuantNode = linearNode->getParent(1);
-        auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator());
-
-        absMean = paramStats[linearNode->name()];
-        stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second));
-
-        auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator();
-        // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
-        auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        paramStepSizeOp->setOutput(0, paramStepSizeTensor);
-    }
+    setupInputQuantizers(graphView, nbBits);
+    setupParamQuantizers(graphView, nbBits);
 }
 
-void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData)
+void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor)
 {
-    bool useCuda = (calibrationData->backend() == "cuda");
-
-    // Collect the tensor statisics
-    auto inputStats = collectInputStats(graphView, calibrationData, useCuda);
-
-    auto paramStats = collectParamStats(graphView, useCuda);
-
-    // Insert the quantizers
-    insertQuantizers(graphView, nbBits, 1.0);
-
-    // Adjust the quantizers step-sizes
-    adjustQuantizersStepSizes(graphView, inputStats, paramStats);
+    float mean = (tensor->mean()).get<float> (0);
+    std::cout << " MEAN  = " << mean << std::endl;
 }
 
 }
\ No newline at end of file
-- 
GitLab


From ccea932f276aad2ed919951693f7d7628cb02472 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Wed, 15 Jan 2025 13:18:27 +0000
Subject: [PATCH 15/60] set the CLE data types to double

---
 include/aidge/quantization/PTQ/CLE.hpp |  2 +-
 src/PTQ/CLE.cpp                        | 24 ++++++++++++------------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/include/aidge/quantization/PTQ/CLE.hpp b/include/aidge/quantization/PTQ/CLE.hpp
index d94b6e9..77eaf7f 100644
--- a/include/aidge/quantization/PTQ/CLE.hpp
+++ b/include/aidge/quantization/PTQ/CLE.hpp
@@ -30,7 +30,7 @@ namespace Aidge
      * @param graphView The GraphView to process.
      * @param targetDelta the stopping criterion (typical value : 0.01)
      */
-    void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDelta = 0.01);
+    void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta = 0.01);
 
 }
 
diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index 1d5ccc7..2c81815 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -32,23 +32,23 @@ static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node)
     return std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2);
 }
 
-static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
+static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
 {
     // Get the tensor data pointer
-    float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr());
+    double * castedTensor = static_cast<double *> (tensor->getImpl()->rawPtr());
 
     // Rescale the tensor
     for(std::size_t i = 0; i < tensor->size(); i++)
         castedTensor[i] *= scaling;
 }
 
-static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
+static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
 {
     // Get the tensor data pointer and edit it
-    float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr());
+    double * castedTensor = static_cast<double*> (tensor->getImpl()->rawPtr());
 
     // Get the tensor absolute max value
-    float maxValue = 0.0f;
+    double maxValue = 0.0f;
     for(std::size_t i = 0; i < tensor->size(); ++i) {
         if(std::fabs(castedTensor[i]) > maxValue) {
             maxValue = std::fabs(castedTensor[i]);
@@ -57,7 +57,7 @@ static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
     return maxValue;
 }
 
-void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDelta)
+void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta)
 {
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
@@ -79,7 +79,7 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDe
         if (isAffine(node))
             affineNodeVector.push_back(node);
 
-    float maxRangeDelta;
+    double maxRangeDelta;
 
     do 
     {
@@ -94,18 +94,18 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDe
             std::shared_ptr<Node> n1 = affineNodeVector[i];
             std::shared_ptr<Node> n2 = affineNodeVector[i+1];
 
-            float r1 = getTensorAbsoluteMax(getWeightTensor(n1));
-            float r2 = getTensorAbsoluteMax(getWeightTensor(n2));
+            double r1 = getTensorAbsoluteMax(getWeightTensor(n1));
+            double r2 = getTensorAbsoluteMax(getWeightTensor(n2));
 
-            float s1 = std::sqrt(r1 * r2) / r1;
-            float s2 = std::sqrt(r1 * r2) / r2;
+            double s1 = std::sqrt(r1 * r2) / r1;
+            double s2 = std::sqrt(r1 * r2) / r2;
 
             rescaleTensor(getWeightTensor(n1), s1);
             rescaleTensor(getWeightTensor(n2), s2);
 
             rescaleTensor(getBiasTensor(n1), s1);
 
-            float rangeDelta = std::abs(r1 - r2);
+            double rangeDelta = std::abs(r1 - r2);
             if (rangeDelta > maxRangeDelta)
                 maxRangeDelta = rangeDelta;
         }
-- 
GitLab


From ad2675740c411b36d73cb8f6fab3689eef739412 Mon Sep 17 00:00:00 2001
From: Olivier BICHLER <olivier.bichler@cea.fr>
Date: Fri, 17 Jan 2025 10:54:17 +0100
Subject: [PATCH 16/60] Hotfix

---
 include/aidge/operator/LSQ.hpp | 2 +-
 src/PTQ/PTQMetaOps.cpp         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/aidge/operator/LSQ.hpp b/include/aidge/operator/LSQ.hpp
index 10ceb81..eb266bc 100644
--- a/include/aidge/operator/LSQ.hpp
+++ b/include/aidge/operator/LSQ.hpp
@@ -95,7 +95,7 @@ public:
  */
 inline std::shared_ptr<Node> LSQ(const std::pair<int, int>& range = {0, 255}, const std::string& name = "") {
     auto lsq = std::make_shared<Node>(std::make_shared<LSQ_Op>(range), name);
-    addProducer(lsq, 1, {1}, "ss");
+    addProducer<1>(lsq, 1, {1}, "ss");
     return lsq;
 }
 }
diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp
index 152a3b0..527d853 100644
--- a/src/PTQ/PTQMetaOps.cpp
+++ b/src/PTQ/PTQMetaOps.cpp
@@ -46,7 +46,7 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli
     // connect the scaling factor producer
 
     std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
-    std::shared_ptr<Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); 
+    std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); 
     scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
     
     // create the metaop graph
@@ -67,7 +67,7 @@ std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name)
 
     std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_Scaling" : "");
 
-    std::shared_ptr<Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); 
+    std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); 
     scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
 
     std::shared_ptr<GraphView> graphView  = Sequential({mulNode});
-- 
GitLab


From 40863ab3ca0c489683b823fc0b163c582761eb89 Mon Sep 17 00:00:00 2001
From: Olivier BICHLER <olivier.bichler@cea.fr>
Date: Sun, 19 Jan 2025 14:07:53 +0100
Subject: [PATCH 17/60] Hotfix: removed std::cout

---
 src/PTQ/PTQ.cpp              | 4 ++--
 src/QAT/QAT_FixedQ.cpp       | 6 +++---
 src/QAT/QAT_LSQ.cpp          | 4 ++--
 src/recipes/QuantRecipes.cpp | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 54b95cb..0e26313 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -995,7 +995,7 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri
     auto scheduling = scheduler.getStaticScheduling();
     for (auto node : scheduling)
         if (node->type() == "Scaling")
-            std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl;
+            fmt::println("{} range = {}", node->name(), valueRanges[node->name()]);
 }
 
 void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose)
@@ -1098,7 +1098,7 @@ void clearBiases(std::shared_ptr<GraphView> graphView)
 void devPTQ(std::shared_ptr<GraphView> graphView) 
 {
     for (std::shared_ptr<Node> node : graphView->getNodes())
-        std::cout << " UUU : " << node->name() << std::endl;   
+        fmt::println(" UUU : {}", node->name());
 }
 
 }
diff --git a/src/QAT/QAT_FixedQ.cpp b/src/QAT/QAT_FixedQ.cpp
index d22074f..9160b4a 100644
--- a/src/QAT/QAT_FixedQ.cpp
+++ b/src/QAT/QAT_FixedQ.cpp
@@ -91,7 +91,7 @@ static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView>
             const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator());
             float inputStd = getTensorStd(op->getInput(0));
             inputStats.insert(std::make_pair(node->name(), inputStd));
-            std::cout << node->name() << " -> " << inputStd << std::endl;
+            fmt::println("{} -> {}", node->name(), inputStd);
         }
     }
 
@@ -108,7 +108,7 @@ static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView>
             const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator());
             float paramStd = getTensorStd(op->getInput(1));
             paramStats.insert(std::make_pair(node->name(), paramStd));
-            std::cout << node->name() << " -> " << paramStd << std::endl;
+            fmt::println("{} -> {}", node->name(), paramStd);
         }
     }
     
@@ -156,7 +156,7 @@ void QuantFixedQ::devQAT(std::shared_ptr<GraphView> graphView)
     scheduler.generateScheduling();
     auto s = scheduler.getStaticScheduling();
     for (std::shared_ptr<Node> node : s)
-        std::cout << " name : " << node->name() << std::endl;
+        fmt::println(" name : {}", node->name());
 }
 
 }
\ No newline at end of file
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index 38c8182..9b51e84 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -125,7 +125,7 @@ static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView>
             const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
             float inputAbsMean = getTensorAbsMean(op->getInput(0));
             inputStats.insert(std::make_pair(node->name(), inputAbsMean));
-            std::cout << node->name() << " -> " << inputAbsMean << std::endl;
+            fmt::println("{} -> {}", node->name(), inputAbsMean);
         }
     }
 
@@ -148,7 +148,7 @@ static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView>
             const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
             float paramAbsMean = getTensorAbsMean(op->getInput(1));
             paramStats.insert(std::make_pair(node->name(), paramAbsMean));
-            std::cout << node->name() << " -> " << paramAbsMean << std::endl;
+            fmt::println("{} -> {}", node->name(), paramAbsMean);
         }
     }
     
diff --git a/src/recipes/QuantRecipes.cpp b/src/recipes/QuantRecipes.cpp
index 562948c..6e1dcdb 100644
--- a/src/recipes/QuantRecipes.cpp
+++ b/src/recipes/QuantRecipes.cpp
@@ -59,7 +59,7 @@ void insertBatchNormNodes(std::shared_ptr<GraphView> graphView)
         {
             std::shared_ptr<Conv_Op<2>> convOperator = std::static_pointer_cast<Conv_Op<2>> (parentNode->getOperator());
             int nb_channels = convOperator->getInput(1)->dims()[0];
-            std::cout << " NB CHANNELS = " << nb_channels << std::endl; // TODO : remove this ...
+            fmt::println(" NB CHANNELS = {}", nb_channels); // TODO : remove this ...
 
             std::string batchnormNodeName = makeUniqueName(parentNode->name() + "_BN", graphView);
             std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nb_channels, 1e-5, 0.1, false, batchnormNodeName);
-- 
GitLab


From 7ad6bbf206ead01aa52a1279f8a807fa6f734f22 Mon Sep 17 00:00:00 2001
From: NAUD Maxence <maxence.naud@cea.fr>
Date: Tue, 21 Jan 2025 11:04:56 +0000
Subject: [PATCH 18/60] ADD: fmt as private library

---
 CMakeLists.txt | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7a2b168..80c5ae7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -85,17 +85,6 @@ endif()
 
 # ##############################################
 # Find system dependencies
-Include(FetchContent)
-
-FetchContent_Declare(
-    fmt
-    GIT_REPOSITORY https://github.com/fmtlib/fmt.git
-    GIT_TAG        10.2.1 # or a later release
-)
-
-set(FMT_SYSTEM_HEADERS ON)
-FetchContent_MakeAvailable(fmt)
-set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON)
 
 if(CUDA)
     find_package(CUDAToolkit REQUIRED)
@@ -169,7 +158,7 @@ if (PYBIND)
 endif()
 
 # XXX HERE !!!
-target_link_libraries(${module_name} PUBLIC fmt::fmt)
+target_link_libraries(${module_name} PRIVATE fmt::fmt)
 target_compile_features(${module_name} PRIVATE cxx_std_14)
 
 target_compile_options(${module_name} PRIVATE
-- 
GitLab


From 98fe14506f1d5268e412c2a36e3a4c51878cff8b Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Tue, 21 Jan 2025 12:28:19 +0000
Subject: [PATCH 19/60] rework the ReLU handling

---
 src/QAT/QAT_LSQ.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index 04f2027..f9ce554 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -82,13 +82,19 @@ static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nb
 
         bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]);  // XXX is this safe ?
 
-        if (nodeHasParent) {
-            auto parentNode = linearNode->getParents()[0];
-            if (parentNode->type() == "ReLU") {
+        if (nodeHasParent) 
+        {
+            bool allParentsAreReLU = true;
+            for (auto parentNode : linearNode->getParents())
+                if (parentNode->type() != "ReLU")
+                    allParentsAreReLU = false;
+
+            if (allParentsAreReLU) {
                 auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator());
                 quantizerOp->range() = unsignedRange;
-                graphView->replace({parentNode}, {}); 
             }
+
+            // TODO : remove the ReLUs when possible
         }
 
         // Insert the quantizer in the graphView ...
-- 
GitLab


From b76a4a55aa321f407c7f7813b02890c8afbc23ad Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Tue, 21 Jan 2025 15:14:32 +0000
Subject: [PATCH 20/60] revert changes for debug

---
 include/aidge/quantization/QAT/QAT_LSQ.hpp |   6 +-
 python_binding/pybind_QAT_LSQ.cpp          |   4 +-
 src/QAT/QAT_LSQ.cpp                        | 199 ++++++++++++++++++++-
 3 files changed, 204 insertions(+), 5 deletions(-)

diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp
index d7d03ca..979e823 100644
--- a/include/aidge/quantization/QAT/QAT_LSQ.hpp
+++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp
@@ -25,9 +25,11 @@ namespace QuantLSQ {
  * @param graphView The GraphView containing the network to quantize.
  * @param nbBits Number of quantization bits.
  */
-void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits);
 
-void devLSQ(std::shared_ptr<Tensor> tensor);
+//void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits);
+//void devLSQ(std::shared_ptr<Tensor> tensor);
+
+void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData);
 
 }
 }
diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp
index 0b9fcc2..cb5b7f0 100644
--- a/python_binding/pybind_QAT_LSQ.cpp
+++ b/python_binding/pybind_QAT_LSQ.cpp
@@ -23,9 +23,11 @@ void init_QAT_LSQ(py::module &m) {
 
     auto mQuantLSQ = m.def_submodule("lsq");
 
+/*
     mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits"));
-
     mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor"));
+*/
+    mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data"));
 
 }
 } // namespace Aidge
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index f9ce554..e52bafb 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -21,6 +21,201 @@
 #include "aidge/graph/Matching.hpp"
 #include "aidge/recipes/QuantRecipes.hpp"
 
+
+namespace Aidge {
+
+static void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize)
+{
+    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
+
+    for (const auto& match : matches) 
+    {
+        auto linearNode = match.graph->rootNode(); 
+
+        std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
+        std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1};
+
+        // INPUT QUANTIZERS INSERTION
+
+        // TODO : double check this, and use createUniqueName()
+        auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView);  
+        auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName);
+
+        // Set the step size
+
+        auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator();
+        auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
+        inputStepSizeOp->setOutput(0, inputStepSizeTensor);
+
+        // Absorb the ReLU when possible ...
+
+        // XXX is this safe ???
+        bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); 
+        // bool nodeHasParent = (linearNode->getParents().size() != 0);
+
+        if (nodeHasParent) {
+            auto parentNode = linearNode->getParents()[0];
+            if (parentNode->type() == "ReLU") {
+                auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator());
+                inputQuantizerOp->range() = unsignedRange;
+                graphView->replace({parentNode}, {}); 
+            }
+        }
+
+        // We need to handle the case where the linear node is the first one ...
+
+        if (nodeHasParent) {
+            graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0);
+        } else {
+            inputQuantizerNode->addChild(graphView);
+            graphView->add(inputQuantizerNode);
+        }
+
+        // PARAM QUANTIZERS INSERTION
+
+        // TODO : double check this, and use createUniqueName()
+        auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);  
+        auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); 
+        graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0);
+
+        // Set the step size
+
+        auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator();
+        auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
+        paramStepSizeOp->setOutput(0, paramStepSizeTensor);
+    }
+
+}
+
+static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
+{
+    auto backend = tensor->backend();
+    if (backend == "cuda")
+        tensor->setBackend("cpu");
+
+    float acc = 0;
+    float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr());
+    for(std::size_t i = 0; i < tensor->size(); i++)
+        acc += std::abs(castedTensor[i]);
+    acc /= static_cast<float> (tensor->size());
+
+    if (backend == "cuda")
+        tensor->setBackend("cuda");
+
+    return acc;
+}
+
+static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda)
+{
+    // Propagate the calibration tensor
+
+    SequentialScheduler scheduler(graphView);
+    scheduler.resetScheduling();
+    scheduler.forward(true, {calibrationData});
+
+    // Store the input tensor statistics
+
+    if (useCuda)
+        graphView->setBackend("cpu"); 
+
+    std::map<std::string, float> inputStats;
+    for (auto node : graphView->getNodes())
+    {
+        if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
+        {
+            const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
+            float inputAbsMean = getTensorAbsMean(op->getInput(0));
+            inputStats.insert(std::make_pair(node->name(), inputAbsMean));
+            std::cout << node->name() << " -> " << inputAbsMean << std::endl;
+        }
+    }
+
+    if (useCuda)
+        graphView->setBackend("cuda");
+
+    return inputStats;
+}
+
+static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda)
+{
+    if (useCuda)
+        graphView->setBackend("cpu");
+
+    std::map<std::string, float> paramStats;
+    for (auto node : graphView->getNodes())
+    {
+        if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
+        {
+            const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
+            float paramAbsMean = getTensorAbsMean(op->getInput(1));
+            paramStats.insert(std::make_pair(node->name(), paramAbsMean));
+            std::cout << node->name() << " -> " << paramAbsMean << std::endl;
+        }
+    }
+    
+    if (useCuda)
+        graphView->setBackend("cuda");
+
+    return paramStats;
+}
+
+static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats)
+{
+    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
+
+    for (const auto& match : matches) 
+    {
+        auto linearNode = match.graph->rootNode();
+
+        // INPUT QUANTIZERS STEP-SIZES
+
+        auto inputQuantNode = linearNode->getParent(0);
+        auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator());
+
+        float absMean = inputStats[linearNode->name()];
+        float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second));
+
+        auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator();
+        // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
+        auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
+        inputStepSizeOp->setOutput(0, inputStepSizeTensor);
+
+        // PARAM QUANTIZERS STEP-SIZES
+
+        auto paramQuantNode = linearNode->getParent(1);
+        auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator());
+
+        absMean = paramStats[linearNode->name()];
+        stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second));
+
+        auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator();
+        // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
+        auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
+        paramStepSizeOp->setOutput(0, paramStepSizeTensor);
+    }
+}
+
+void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData)
+{
+    bool useCuda = (calibrationData->backend() == "cuda");
+
+    // Collect the tensor statisics
+    auto inputStats = collectInputStats(graphView, calibrationData, useCuda);
+
+    auto paramStats = collectParamStats(graphView, useCuda);
+
+    // Insert the quantizers
+    insertQuantizers(graphView, nbBits, 1.0);
+
+    // Adjust the quantizers step-sizes
+    adjustQuantizersStepSizes(graphView, inputStats, paramStats);
+}
+}
+
+
+/*
+    XXX XXX XXX
+
 namespace Aidge {
 
 static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
@@ -146,5 +341,5 @@ void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor)
     float mean = (tensor->mean()).get<float> (0);
     std::cout << " MEAN  = " << mean << std::endl;
 }
-
-}
\ No newline at end of file
+}
+*/
\ No newline at end of file
-- 
GitLab


From d656e1eebfa339c992b7ae9adcd29f8e102bb016 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Wed, 22 Jan 2025 12:47:59 +0000
Subject: [PATCH 21/60] re-apply the LSQ changes

---
 include/aidge/quantization/QAT/QAT_LSQ.hpp |   6 +-
 python_binding/pybind_QAT_LSQ.cpp          |   6 +-
 src/QAT/QAT_LSQ.cpp                        | 258 ++++++++++-----------
 3 files changed, 133 insertions(+), 137 deletions(-)

diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp
index 979e823..f33a7c6 100644
--- a/include/aidge/quantization/QAT/QAT_LSQ.hpp
+++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp
@@ -26,10 +26,8 @@ namespace QuantLSQ {
  * @param nbBits Number of quantization bits.
  */
 
-//void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits);
-//void devLSQ(std::shared_ptr<Tensor> tensor);
-
-void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData);
+void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits);
+void devLSQ(std::shared_ptr<Tensor> tensor);
 
 }
 }
diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp
index cb5b7f0..0dd4267 100644
--- a/python_binding/pybind_QAT_LSQ.cpp
+++ b/python_binding/pybind_QAT_LSQ.cpp
@@ -23,11 +23,11 @@ void init_QAT_LSQ(py::module &m) {
 
     auto mQuantLSQ = m.def_submodule("lsq");
 
-/*
+
     mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits"));
     mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor"));
-*/
-    mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data"));
+
+    //mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data"));
 
 }
 } // namespace Aidge
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index e52bafb..66e8ec7 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -21,6 +21,134 @@
 #include "aidge/graph/Matching.hpp"
 #include "aidge/recipes/QuantRecipes.hpp"
 
+namespace Aidge {
+
+static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
+{
+    auto valueTensor = (*tensor).abs().mean();
+    std::shared_ptr<Tensor> fallback;
+    const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu");
+    return localTensor.get<float>(0);
+}
+
+// INIT THE STEP SIZE OF A QUANTIZER NODE
+
+static bool initStepSize(std::shared_ptr<Node> quantizer)
+{
+    const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator());
+
+    float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0));
+
+    float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second));
+
+    auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
+
+    // XXX Manage backend here ?
+    stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend());
+    stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType());
+
+    auto stepSizeProducer = quantizer->getParent(1);
+
+    stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor);
+
+    std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl;
+
+    return false;
+}
+
+// INPUT QUANTIZERS INSERTION
+
+static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
+{
+    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
+
+    for (const auto& match : matches) 
+    {
+        auto linearNode = match.graph->rootNode(); 
+
+        std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
+        std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1};
+
+        // Create the input quantizer node
+
+        auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView);  
+        auto quantizerNode = LSQ(signedRange, quantizerName);
+
+        // Init the step-size using the node call stack
+
+        quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); });
+
+        // Absorb the ReLU when possible ...
+
+        bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]);  // XXX is this safe ?
+
+        if (nodeHasParent) 
+        {
+            bool allParentsAreReLU = true;
+            for (auto parentNode : linearNode->getParents())
+                if (parentNode->type() != "ReLU")
+                    allParentsAreReLU = false;
+
+            if (allParentsAreReLU) {
+                auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator());
+                quantizerOp->range() = unsignedRange;
+            }
+
+            // TODO : remove the ReLUs when possible
+        }
+
+        // Insert the quantizer in the graphView ...
+        // (We need to handle the case where the linear node is the first one)
+
+        if (nodeHasParent) {
+            graphView->insertParent(linearNode, quantizerNode, 0, 0, 0);
+        } else {
+            quantizerNode->addChild(graphView);
+            graphView->add(quantizerNode);
+        }
+    }
+}
+
+// PARAM QUANTIZERS INSERTION
+
+static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
+{
+    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
+
+    std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
+
+    for (const auto& match : matches) 
+    {       
+        auto linearNode = match.graph->rootNode(); 
+
+        // TODO : double check this, and use createUniqueName()
+        auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);  
+        auto quantizerNode = LSQ(signedRange, quantizerName); 
+
+        // Init the step-size using the node call stack
+
+        quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); });
+
+        // Insert the quantizer in the graphView
+
+        graphView->insertParent(linearNode, quantizerNode, 1, 0, 0);
+    }
+}
+
+void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
+{
+    setupInputQuantizers(graphView, nbBits);
+    setupParamQuantizers(graphView, nbBits);
+}
+
+void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor)
+{
+    float mean = (tensor->mean()).get<float> (0);
+    std::cout << " MEAN  = " << mean << std::endl;
+}
+}
+
+/*
 
 namespace Aidge {
 
@@ -212,134 +340,4 @@ void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, siz
 }
 }
 
-
-/*
-    XXX XXX XXX
-
-namespace Aidge {
-
-static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
-{
-    auto valueTensor = (*tensor).abs().mean();
-    std::shared_ptr<Tensor> fallback;
-    const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu");
-    return localTensor.get<float>(0);
-}
-
-// INIT THE STEP SIZE OF A QUANTIZER NODE
-
-static bool initStepSize(std::shared_ptr<Node> quantizer)
-{
-    const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator());
-
-    float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0));
-
-    float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second));
-
-    auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-
-    // XXX Manage backend here ?
-    stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend());
-    stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType());
-
-    auto stepSizeProducer = quantizer->getParent(1);
-
-    stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor);
-
-    std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl;
-
-    return false;
-}
-
-// INPUT QUANTIZERS INSERTION
-
-static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
-{
-    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
-
-    for (const auto& match : matches) 
-    {
-        auto linearNode = match.graph->rootNode(); 
-
-        std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
-        std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1};
-
-        // Create the input quantizer node
-
-        auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView);  
-        auto quantizerNode = LSQ(signedRange, quantizerName);
-
-        // Init the step-size using the node call stack
-
-        quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); });
-
-        // Absorb the ReLU when possible ...
-
-        bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]);  // XXX is this safe ?
-
-        if (nodeHasParent) 
-        {
-            bool allParentsAreReLU = true;
-            for (auto parentNode : linearNode->getParents())
-                if (parentNode->type() != "ReLU")
-                    allParentsAreReLU = false;
-
-            if (allParentsAreReLU) {
-                auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator());
-                quantizerOp->range() = unsignedRange;
-            }
-
-            // TODO : remove the ReLUs when possible
-        }
-
-        // Insert the quantizer in the graphView ...
-        // (We need to handle the case where the linear node is the first one)
-
-        if (nodeHasParent) {
-            graphView->insertParent(linearNode, quantizerNode, 0, 0, 0);
-        } else {
-            quantizerNode->addChild(graphView);
-            graphView->add(quantizerNode);
-        }
-    }
-}
-
-// PARAM QUANTIZERS INSERTION
-
-static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
-{
-    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
-
-    std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
-
-    for (const auto& match : matches) 
-    {       
-        auto linearNode = match.graph->rootNode(); 
-
-        // TODO : double check this, and use createUniqueName()
-        auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);  
-        auto quantizerNode = LSQ(signedRange, quantizerName); 
-
-        // Init the step-size using the node call stack
-
-        quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); });
-
-        // Insert the quantizer in the graphView
-
-        graphView->insertParent(linearNode, quantizerNode, 1, 0, 0);
-    }
-}
-
-void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
-{
-    setupInputQuantizers(graphView, nbBits);
-    setupParamQuantizers(graphView, nbBits);
-}
-
-void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor)
-{
-    float mean = (tensor->mean()).get<float> (0);
-    std::cout << " MEAN  = " << mean << std::endl;
-}
-}
 */
\ No newline at end of file
-- 
GitLab


From e3a715178125f72d648edeeb7aaafb1c6b0c5e87 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Mon, 27 Jan 2025 13:04:58 +0000
Subject: [PATCH 22/60] refactor the LSQ code

---
 include/aidge/quantization/QAT/QAT_LSQ.hpp |   1 -
 python_binding/pybind_QAT_LSQ.cpp          |   5 -
 src/QAT/QAT_LSQ.cpp                        | 235 +++------------------
 3 files changed, 30 insertions(+), 211 deletions(-)

diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp
index f33a7c6..b9d8b33 100644
--- a/include/aidge/quantization/QAT/QAT_LSQ.hpp
+++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp
@@ -27,7 +27,6 @@ namespace QuantLSQ {
  */
 
 void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits);
-void devLSQ(std::shared_ptr<Tensor> tensor);
 
 }
 }
diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp
index 0dd4267..4bba3b6 100644
--- a/python_binding/pybind_QAT_LSQ.cpp
+++ b/python_binding/pybind_QAT_LSQ.cpp
@@ -23,11 +23,6 @@ void init_QAT_LSQ(py::module &m) {
 
     auto mQuantLSQ = m.def_submodule("lsq");
 
-
     mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits"));
-    mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor"));
-
-    //mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data"));
-
 }
 } // namespace Aidge
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index 66e8ec7..80e8a05 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -21,25 +21,50 @@
 #include "aidge/graph/Matching.hpp"
 #include "aidge/recipes/QuantRecipes.hpp"
 
-namespace Aidge {
+
+namespace Aidge 
+{
 
 static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
 {
+    //std::cout << " GET TENSOR ABS MEAN " << std::endl;
     auto valueTensor = (*tensor).abs().mean();
     std::shared_ptr<Tensor> fallback;
     const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu");
     return localTensor.get<float>(0);
 }
 
+static float getTensorStd(std::shared_ptr<Tensor> tensor)
+{
+    auto valueTensor = (*tensor);
+    
+    auto skewedTensor = valueTensor - valueTensor.mean();
+    auto squaredTensor = skewedTensor * skewedTensor;
+    auto varianceTensor = squaredTensor.mean();
+
+    std::shared_ptr<Tensor> fallback;
+    auto localTensor = varianceTensor.refCastFrom(fallback, DataType::Float32, "cpu");
+    
+    float variance = localTensor.get<float>(0);
+    return std::sqrt(variance);
+}
+
+
 // INIT THE STEP SIZE OF A QUANTIZER NODE
 
 static bool initStepSize(std::shared_ptr<Node> quantizer)
 {
     const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator());
 
-    float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0));
+    // This formula is the one proposed in the paper ...
+
+    // float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0));
+    // float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second));
 
-    float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second));
+    // .. but this formula seems to work better !!!
+
+    float inputStd = getTensorStd(quantizerOp->getInput(0));
+    float stepSize = 8.0f * (inputStd / (quantizerOp->range().second));
 
     auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
 
@@ -56,8 +81,6 @@ static bool initStepSize(std::shared_ptr<Node> quantizer)
     return false;
 }
 
-// INPUT QUANTIZERS INSERTION
-
 static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
     const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
@@ -137,207 +160,9 @@ static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nb
 
 void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
+    sanitizeNodeNames(graphView);
     setupInputQuantizers(graphView, nbBits);
     setupParamQuantizers(graphView, nbBits);
 }
 
-void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor)
-{
-    float mean = (tensor->mean()).get<float> (0);
-    std::cout << " MEAN  = " << mean << std::endl;
-}
-}
-
-/*
-
-namespace Aidge {
-
-static void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize)
-{
-    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
-
-    for (const auto& match : matches) 
-    {
-        auto linearNode = match.graph->rootNode(); 
-
-        std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
-        std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1};
-
-        // INPUT QUANTIZERS INSERTION
-
-        // TODO : double check this, and use createUniqueName()
-        auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView);  
-        auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName);
-
-        // Set the step size
-
-        auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator();
-        auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        inputStepSizeOp->setOutput(0, inputStepSizeTensor);
-
-        // Absorb the ReLU when possible ...
-
-        // XXX is this safe ???
-        bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); 
-        // bool nodeHasParent = (linearNode->getParents().size() != 0);
-
-        if (nodeHasParent) {
-            auto parentNode = linearNode->getParents()[0];
-            if (parentNode->type() == "ReLU") {
-                auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator());
-                inputQuantizerOp->range() = unsignedRange;
-                graphView->replace({parentNode}, {}); 
-            }
-        }
-
-        // We need to handle the case where the linear node is the first one ...
-
-        if (nodeHasParent) {
-            graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0);
-        } else {
-            inputQuantizerNode->addChild(graphView);
-            graphView->add(inputQuantizerNode);
-        }
-
-        // PARAM QUANTIZERS INSERTION
-
-        // TODO : double check this, and use createUniqueName()
-        auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);  
-        auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); 
-        graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0);
-
-        // Set the step size
-
-        auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator();
-        auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        paramStepSizeOp->setOutput(0, paramStepSizeTensor);
-    }
-
-}
-
-static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
-{
-    auto backend = tensor->backend();
-    if (backend == "cuda")
-        tensor->setBackend("cpu");
-
-    float acc = 0;
-    float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr());
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        acc += std::abs(castedTensor[i]);
-    acc /= static_cast<float> (tensor->size());
-
-    if (backend == "cuda")
-        tensor->setBackend("cuda");
-
-    return acc;
-}
-
-static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda)
-{
-    // Propagate the calibration tensor
-
-    SequentialScheduler scheduler(graphView);
-    scheduler.resetScheduling();
-    scheduler.forward(true, {calibrationData});
-
-    // Store the input tensor statistics
-
-    if (useCuda)
-        graphView->setBackend("cpu"); 
-
-    std::map<std::string, float> inputStats;
-    for (auto node : graphView->getNodes())
-    {
-        if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
-        {
-            const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
-            float inputAbsMean = getTensorAbsMean(op->getInput(0));
-            inputStats.insert(std::make_pair(node->name(), inputAbsMean));
-            std::cout << node->name() << " -> " << inputAbsMean << std::endl;
-        }
-    }
-
-    if (useCuda)
-        graphView->setBackend("cuda");
-
-    return inputStats;
-}
-
-static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda)
-{
-    if (useCuda)
-        graphView->setBackend("cpu");
-
-    std::map<std::string, float> paramStats;
-    for (auto node : graphView->getNodes())
-    {
-        if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
-        {
-            const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
-            float paramAbsMean = getTensorAbsMean(op->getInput(1));
-            paramStats.insert(std::make_pair(node->name(), paramAbsMean));
-            std::cout << node->name() << " -> " << paramAbsMean << std::endl;
-        }
-    }
-    
-    if (useCuda)
-        graphView->setBackend("cuda");
-
-    return paramStats;
-}
-
-static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats)
-{
-    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
-
-    for (const auto& match : matches) 
-    {
-        auto linearNode = match.graph->rootNode();
-
-        // INPUT QUANTIZERS STEP-SIZES
-
-        auto inputQuantNode = linearNode->getParent(0);
-        auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator());
-
-        float absMean = inputStats[linearNode->name()];
-        float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second));
-
-        auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator();
-        // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
-        auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        inputStepSizeOp->setOutput(0, inputStepSizeTensor);
-
-        // PARAM QUANTIZERS STEP-SIZES
-
-        auto paramQuantNode = linearNode->getParent(1);
-        auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator());
-
-        absMean = paramStats[linearNode->name()];
-        stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second));
-
-        auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator();
-        // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
-        auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        paramStepSizeOp->setOutput(0, paramStepSizeTensor);
-    }
-}
-
-void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData)
-{
-    bool useCuda = (calibrationData->backend() == "cuda");
-
-    // Collect the tensor statisics
-    auto inputStats = collectInputStats(graphView, calibrationData, useCuda);
-
-    auto paramStats = collectParamStats(graphView, useCuda);
-
-    // Insert the quantizers
-    insertQuantizers(graphView, nbBits, 1.0);
-
-    // Adjust the quantizers step-sizes
-    adjustQuantizersStepSizes(graphView, inputStats, paramStats);
-}
-}
-
-*/
\ No newline at end of file
+}
\ No newline at end of file
-- 
GitLab


From 96e095d4f55962c3b7989a85abd8652d13956f2f Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Mon, 27 Jan 2025 13:07:12 +0000
Subject: [PATCH 23/60] remove commented code

---
 src/backend/cuda/operator/LSQImpl.cpp | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/src/backend/cuda/operator/LSQImpl.cpp b/src/backend/cuda/operator/LSQImpl.cpp
index c66bd8a..fa45f21 100644
--- a/src/backend/cuda/operator/LSQImpl.cpp
+++ b/src/backend/cuda/operator/LSQImpl.cpp
@@ -52,19 +52,6 @@ void Aidge::LSQImpl_cuda::backward() {
     std::shared_ptr<Tensor> gra_int1 = op_.getInput(1)->grad();
     std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();    
 
-    // XXX
-/*
-    size_t tmp;
-
-    cudaDeviceSetLimit(cudaLimitStackSize, 2048);
-    cudaDeviceGetLimit(&tmp, cudaLimitStackSize );
-    printf(" stack limit = %ld \n", tmp);
-
-    cudaDeviceSetLimit(cudaLimitMallocHeapSize, 100000000);
-    cudaDeviceGetLimit(&tmp, cudaLimitMallocHeapSize);
-    printf(" heap limit = %ld \n", tmp);
-*/
-
     if (gra_int0->size() > mWorkspaceSize) {
         // std::cout << " reallocation " << sizeof(gra_int0) << " " << gra_int0->size() << std::endl;
         if (mWorkspace != nullptr) {
@@ -87,12 +74,7 @@ void Aidge::LSQImpl_cuda::backward() {
         gra_int0->getImpl()->rawPtr(),
         gra_int1->getImpl()->rawPtr(),
         mWorkspace);
-/*
-    gra_int1->setBackend("cpu");
-    float *castedTensor = static_cast<float *> (gra_int1->getImpl()->rawPtr());
-    std::cout << castedTensor[0] << std::endl;
-    gra_int1->setBackend("cuda");
-*/
+
 }
 
 Aidge::LSQImpl_cuda::~LSQImpl_cuda() {
-- 
GitLab


From 474fe56eb058fb31ff26ed311a0fac015901eb73 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Mon, 27 Jan 2025 14:14:40 +0000
Subject: [PATCH 24/60] complete the PTQ float to double migration

---
 include/aidge/quantization/PTQ/CLE.hpp |  2 +-
 src/PTQ/CLE.cpp                        | 18 +++++++++---------
 src/PTQ/PTQ.cpp                        |  4 ++--
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/aidge/quantization/PTQ/CLE.hpp b/include/aidge/quantization/PTQ/CLE.hpp
index d94b6e9..77eaf7f 100644
--- a/include/aidge/quantization/PTQ/CLE.hpp
+++ b/include/aidge/quantization/PTQ/CLE.hpp
@@ -30,7 +30,7 @@ namespace Aidge
      * @param graphView The GraphView to process.
      * @param targetDelta the stopping criterion (typical value : 0.01)
      */
-    void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDelta = 0.01);
+    void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta = 0.01);
 
 }
 
diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index 2c6e374..aac0073 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -38,13 +38,13 @@ static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node)
     return std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2);
 }
 
-static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
+static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
 {
     auto mulOp = Mul_Op();
     mulOp.setDataType(tensor->dataType());
     mulOp.setBackend(tensor->backend());
 
-    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling});
+    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling});
     scalingTensor->setDataType(tensor->dataType());
     scalingTensor->setBackend(tensor->backend());
 
@@ -94,7 +94,7 @@ static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor)
     return flatTensor->get<double>(maxIndex);
 }
 
-void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDelta)
+void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta)
 {
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
@@ -116,7 +116,7 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDe
         if (isAffine(node))
             affineNodeVector.push_back(node);
 
-    float maxRangeDelta;
+    double maxRangeDelta;
 
     do 
     {
@@ -131,18 +131,18 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDe
             std::shared_ptr<Node> n1 = affineNodeVector[i];
             std::shared_ptr<Node> n2 = affineNodeVector[i+1];
 
-            float r1 = getTensorAbsoluteMax(getWeightTensor(n1));
-            float r2 = getTensorAbsoluteMax(getWeightTensor(n2));
+            double r1 = getTensorAbsoluteMax(getWeightTensor(n1));
+            double r2 = getTensorAbsoluteMax(getWeightTensor(n2));
 
-            float s1 = std::sqrt(r1 * r2) / r1;
-            float s2 = std::sqrt(r1 * r2) / r2;
+            double s1 = std::sqrt(r1 * r2) / r1;
+            double s2 = std::sqrt(r1 * r2) / r2;
 
             rescaleTensor(getWeightTensor(n1), s1);
             rescaleTensor(getWeightTensor(n2), s2);
 
             rescaleTensor(getBiasTensor(n1), s1);
 
-            float rangeDelta = std::abs(r1 - r2);
+            double rangeDelta = std::abs(r1 - r2);
             if (rangeDelta > maxRangeDelta)
                 maxRangeDelta = rangeDelta;
         }
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 88e7ac8..3b156e7 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -72,13 +72,13 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView)
     return true;
 }
 
-static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
+static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
 {
     auto mulOp = Mul_Op();
     mulOp.setDataType(tensor->dataType());
     mulOp.setBackend(tensor->backend());
 
-    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling});
+    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling});
     scalingTensor->setDataType(tensor->dataType());
     scalingTensor->setBackend(tensor->backend());
 
-- 
GitLab


From d8ea1014323f4a8e8616132313df2bd155790067 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Mon, 27 Jan 2025 14:35:45 +0000
Subject: [PATCH 25/60] replace the couts with logs

---
 src/PTQ/CLE.cpp        |  4 ----
 src/PTQ/Clipping.cpp   |  2 +-
 src/PTQ/PTQ.cpp        | 13 ++++++-------
 src/QAT/QAT_FixedQ.cpp |  6 +++---
 src/QAT/QAT_LSQ.cpp    |  3 +--
 5 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index aac0073..e6bcbc0 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -122,10 +122,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD
     {
         maxRangeDelta = 0.0;
         
-        //std::cout << " ----- " << std::endl;
-        //for (std::shared_ptr<Node> node : affineNodeVector)
-        //    std::cout << getTensorAbsoluteMax(getWeightTensor(node)) << std::endl;
-        
         for (size_t i = 0; i < (affineNodeVector.size() - 1); i++)
         {
             std::shared_ptr<Node> n1 = affineNodeVector[i];
diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp
index 57ad7a8..66b0ab3 100644
--- a/src/PTQ/Clipping.cpp
+++ b/src/PTQ/Clipping.cpp
@@ -26,7 +26,7 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string,
 
     std::shared_ptr<Node> firstNode = retrieveNodeVector(graphView)[0];
 
-    //std::cout << " COMPUTING HISTOGRAMS ... " << std::endl;
+    // Log::debug(" COMPUTING HISTOGRAMS ... ");
 
     std::map<std::string, std::vector<int>> histograms;
 
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 3b156e7..073e5e0 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -987,7 +987,6 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
 
 static void printScalingFactors(std::shared_ptr<GraphView> graphView)
 {
-    Log::info(" === SCALING FACTORS === ");
     for (auto node : retrieveNodeVector(graphView))
         if (node->type() == "Scaling" || node->type() == "Quantizer")
         {
@@ -1020,7 +1019,7 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri
     auto scheduling = scheduler.getStaticScheduling();
     for (auto node : scheduling)
         if (node->type() == "Scaling")
-            std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl;
+            Log::info(" {} range = {} ", node->name(), valueRanges[node->name()]);
 }
 
 void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose)
@@ -1049,13 +1048,13 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     Log::info(" Computing the value ranges ...");
     std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda);
 
-    //std::cout << " === RANGES (BEFORE ADJUST) ===" << std::endl;
+    //Log::info(" === RANGES (BEFORE ADJUST) ===");
     //printRanges(graphView, valueRanges);
 
     Log::info(" Optimizing the clipping values ...");
     valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose);
 
-    //std::cout << " === RANGES (AFTER ADJUST) ===" << std::endl;
+    //Log::info(" === RANGES (AFTER ADJUST) ===");
     //printRanges(graphView, valueRanges);
 
     Log::info(" Normalizing the activations ...");
@@ -1076,7 +1075,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     if (verbose)
         printScalingFactors(graphView);
 
-    //std::cout << " === SCALINGS (BEFORE CAST) ===" << std::endl;
+    //Log::info(" === SCALINGS (BEFORE CAST) ===");
     //printScalingFactors(graphView);
 
     setupDataType(graphView, inputDataSet, initialDataType);
@@ -1084,7 +1083,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     if (useCuda)
         graphView->setBackend("cuda");
 
-    //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
+    //Log::info(" === SCALINGS (AFTER CAST) ===");
     //printScalingFactors(graphView);
 
     Log::info(" Reseting the scheduler ...");
@@ -1124,7 +1123,7 @@ void clearBiases(std::shared_ptr<GraphView> graphView)
 void devPTQ(std::shared_ptr<GraphView> graphView) 
 {
     for (std::shared_ptr<Node> node : graphView->getNodes())
-        std::cout << " UUU : " << node->name() << std::endl;   
+        Log::info(" UUU : {}", node->name());   
 }
 
 }
diff --git a/src/QAT/QAT_FixedQ.cpp b/src/QAT/QAT_FixedQ.cpp
index d22074f..6ada532 100644
--- a/src/QAT/QAT_FixedQ.cpp
+++ b/src/QAT/QAT_FixedQ.cpp
@@ -91,7 +91,7 @@ static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView>
             const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator());
             float inputStd = getTensorStd(op->getInput(0));
             inputStats.insert(std::make_pair(node->name(), inputStd));
-            std::cout << node->name() << " -> " << inputStd << std::endl;
+            Log::info(" {} -> {} ", node->name(), inputStd);
         }
     }
 
@@ -108,7 +108,7 @@ static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView>
             const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator());
             float paramStd = getTensorStd(op->getInput(1));
             paramStats.insert(std::make_pair(node->name(), paramStd));
-            std::cout << node->name() << " -> " << paramStd << std::endl;
+            Log::info(" {} -> {} ", node->name(), paramStd);
         }
     }
     
@@ -156,7 +156,7 @@ void QuantFixedQ::devQAT(std::shared_ptr<GraphView> graphView)
     scheduler.generateScheduling();
     auto s = scheduler.getStaticScheduling();
     for (std::shared_ptr<Node> node : s)
-        std::cout << " name : " << node->name() << std::endl;
+        Log::info(" name : {} ", node->name());
 }
 
 }
\ No newline at end of file
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index 80e8a05..0508fc7 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -27,7 +27,6 @@ namespace Aidge
 
 static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
 {
-    //std::cout << " GET TENSOR ABS MEAN " << std::endl;
     auto valueTensor = (*tensor).abs().mean();
     std::shared_ptr<Tensor> fallback;
     const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu");
@@ -76,7 +75,7 @@ static bool initStepSize(std::shared_ptr<Node> quantizer)
 
     stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor);
 
-    std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl;
+    Log::info(" [ INIT STEP SIZE = {} ] ", stepSize);
 
     return false;
 }
-- 
GitLab


From 06c57eaaa1136a17d9935a2368a6357ecbad1947 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Mon, 27 Jan 2025 14:53:30 +0000
Subject: [PATCH 26/60] minor change

---
 src/recipes/QuantRecipes.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/recipes/QuantRecipes.cpp b/src/recipes/QuantRecipes.cpp
index 562948c..7f01b24 100644
--- a/src/recipes/QuantRecipes.cpp
+++ b/src/recipes/QuantRecipes.cpp
@@ -58,11 +58,11 @@ void insertBatchNormNodes(std::shared_ptr<GraphView> graphView)
         if (parentNode->type() == "Conv2D")
         {
             std::shared_ptr<Conv_Op<2>> convOperator = std::static_pointer_cast<Conv_Op<2>> (parentNode->getOperator());
-            int nb_channels = convOperator->getInput(1)->dims()[0];
-            std::cout << " NB CHANNELS = " << nb_channels << std::endl; // TODO : remove this ...
+            int nbChannels = convOperator->getInput(1)->dims()[0];
+            Log::info(" NB CHANNELS = {} ", nbChannels);
 
             std::string batchnormNodeName = makeUniqueName(parentNode->name() + "_BN", graphView);
-            std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nb_channels, 1e-5, 0.1, false, batchnormNodeName);
+            std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nbChannels, 1e-5, 0.1, false, batchnormNodeName);
             batchnormNode->getOperator()->setDataType(DataType::Float32);
             batchnormNode->getOperator()->setBackend("cpu");
 
-- 
GitLab


From d9c551fd838a2783d4311294d0476348e2ba7cf3 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Mon, 27 Jan 2025 15:03:44 +0000
Subject: [PATCH 27/60] move the PTQMetaOps files

---
 include/aidge/{quantization/PTQ => operator}/PTQMetaOps.hpp | 0
 src/PTQ/PTQ.cpp                                             | 2 +-
 src/{PTQ => operator}/PTQMetaOps.cpp                        | 0
 3 files changed, 1 insertion(+), 1 deletion(-)
 rename include/aidge/{quantization/PTQ => operator}/PTQMetaOps.hpp (100%)
 rename src/{PTQ => operator}/PTQMetaOps.cpp (100%)

diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/operator/PTQMetaOps.hpp
similarity index 100%
rename from include/aidge/quantization/PTQ/PTQMetaOps.hpp
rename to include/aidge/operator/PTQMetaOps.hpp
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 073e5e0..09b039f 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -12,7 +12,7 @@
 #include "aidge/quantization/PTQ/CLE.hpp"
 #include "aidge/quantization/PTQ/Clipping.hpp"
 #include "aidge/quantization/PTQ/PTQ.hpp"
-#include "aidge/quantization/PTQ/PTQMetaOps.hpp"
+#include "aidge/operator/PTQMetaOps.hpp"
 
 
 #include "aidge/data/Tensor.hpp"
diff --git a/src/PTQ/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp
similarity index 100%
rename from src/PTQ/PTQMetaOps.cpp
rename to src/operator/PTQMetaOps.cpp
-- 
GitLab


From 7aff7e0fc383009a282c81153d7f3d72525b5d08 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Tue, 28 Jan 2025 10:06:53 +0000
Subject: [PATCH 28/60] fix an include

---
 src/operator/PTQMetaOps.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp
index 152a3b0..a079ed6 100644
--- a/src/operator/PTQMetaOps.cpp
+++ b/src/operator/PTQMetaOps.cpp
@@ -9,13 +9,12 @@
  *
  ********************************************************************************/
 
-#include "aidge/quantization/PTQ/PTQMetaOps.hpp"
+#include "aidge/operator/PTQMetaOps.hpp"
 
 #include <array>
 #include <memory>
 #include <utility>
 
-//Operator
 #include "aidge/operator/Clip.hpp"
 #include "aidge/operator/Mul.hpp"
 #include "aidge/operator/Round.hpp"
-- 
GitLab


From 5840b845139be68ed2837b34f7339d0382d08c08 Mon Sep 17 00:00:00 2001
From: Maxence Naud <maxence.naud@cea.fr>
Date: Wed, 29 Jan 2025 16:41:43 +0000
Subject: [PATCH 29/60] [Upd] standardization of some files

---
 aidge_quantization/unit_tests/test_ptq.py     | 22 +++---
 include/aidge/operator/FixedQ.hpp             | 43 ++++++------
 include/aidge/operator/LSQ.hpp                |  6 +-
 include/aidge/operator/SAT/DoReFa.hpp         | 67 ++++++++++++-------
 include/aidge/operator/SAT/TanhClamp.hpp      | 33 +++------
 include/aidge/quantization/PTQ/CLE.hpp        | 30 +++++----
 include/aidge/quantization/PTQ/Clipping.hpp   | 22 +++---
 include/aidge/quantization/PTQ/PTQ.hpp        | 27 ++++----
 include/aidge/quantization/PTQ/PTQMetaOps.hpp | 18 ++---
 include/aidge/quantization/QAT/QAT_FixedQ.hpp | 10 +--
 include/aidge/quantization/QAT/QAT_LSQ.hpp    | 20 +++---
 src/PTQ/CLE.cpp                               | 40 +++++++----
 src/PTQ/PTQMetaOps.cpp                        | 22 +++---
 src/operator/FixedQ.cpp                       | 20 ++++++
 src/operator/SAT/DoReFa.cpp                   | 33 +++++++--
 src/operator/SAT/TanhClamp.cpp                | 22 +++++-
 16 files changed, 259 insertions(+), 176 deletions(-)

diff --git a/aidge_quantization/unit_tests/test_ptq.py b/aidge_quantization/unit_tests/test_ptq.py
index dfdedd8..56080bf 100644
--- a/aidge_quantization/unit_tests/test_ptq.py
+++ b/aidge_quantization/unit_tests/test_ptq.py
@@ -21,7 +21,7 @@ ACCURACIES      = (95.4, 94.4)       # (97.9, 97.7)
 NB_BITS         = 4
 
 # --------------------------------------------------------------
-# UTILS 
+# UTILS
 # --------------------------------------------------------------
 
 def propagate(model, scheduler, sample):
@@ -50,7 +50,7 @@ def compute_accuracy(model, samples, labels):
 # --------------------------------------------------------------
 
 class test_ptq(unittest.TestCase):
-    
+
     def setUp(self):
 
         # load the samples / labels (numpy)
@@ -70,19 +70,20 @@ class test_ptq(unittest.TestCase):
     def tearDown(self):
         pass
 
-    
+
     def test_model(self):
 
         Log.set_console_level(Level.Info)
         # compute the base accuracy
         accuracy = compute_accuracy(self.model, self.samples[0:NB_SAMPLES], self.labels)
         self.assertAlmostEqual(accuracy * 100, ACCURACIES[0], msg='base accuracy does not meet the baseline !', delta=0.1)
-    
+
     def test_quant_model(self):
 
-        Log.set_console_level(Level.Info)
+        Log.set_console_level(Level.Debug)
 
         # create the calibration dataset
+
         tensors = []
         for sample in self.samples[0:NB_SAMPLES]:
             sample = prepare_sample(sample)
@@ -91,14 +92,13 @@ class test_ptq(unittest.TestCase):
 
         # quantize the model
 
-        
         aidge_quantization.quantize_network(
-            self.model, 
-            NB_BITS, 
-            tensors, 
-            clipping_mode=aidge_quantization.Clipping.MSE, 
+            self.model,
+            NB_BITS,
+            tensors,
+            clipping_mode=aidge_quantization.Clipping.MSE,
             no_quantization=False,
-            optimize_signs=True, 
+            optimize_signs=True,
             single_shift=False
         )
 
diff --git a/include/aidge/operator/FixedQ.hpp b/include/aidge/operator/FixedQ.hpp
index 96a52b4..3d46dcf 100644
--- a/include/aidge/operator/FixedQ.hpp
+++ b/include/aidge/operator/FixedQ.hpp
@@ -9,11 +9,12 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CORE_OPERATOR_FIXEDQ_H_
-#define AIDGE_CORE_OPERATOR_FIXEDQ_H_
+#ifndef AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_
+#define AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_
 
-#include <cassert>
+#include <cstddef>  // std::size_t
 #include <memory>
+#include <string>
 #include <vector>
 
 #include "aidge/backend/OperatorImpl.hpp"
@@ -21,8 +22,8 @@
 #include "aidge/operator/OperatorTensor.hpp"
 #include "aidge/utils/ErrorHandling.hpp"
 #include "aidge/utils/Registrar.hpp"
-#include "aidge/utils/Types.h"
 #include "aidge/utils/StaticAttributes.hpp"
+#include "aidge/utils/Types.h"
 
 
 namespace Aidge {
@@ -43,24 +44,20 @@ private:
 
 public:
 
-    FixedQ_Op(std::size_t nbBits, float span, bool isOutputUnsigned) :
-    OperatorTensor(Type, {InputCategory::Data}, 1),
-    mAttributes(std::make_shared<Attributes_>(attr<FixedQAttr::NbBits>(nbBits), attr<FixedQAttr::Span>(span), attr<FixedQAttr::IsOutputUnsigned>(isOutputUnsigned)))
+    FixedQ_Op(std::size_t nbBits = 8, float span = 4.0f, bool isOutputUnsigned = false) :
+      OperatorTensor(Type, {InputCategory::Data}, 1),
+      mAttributes(std::make_shared<Attributes_>(
+        attr<FixedQAttr::NbBits>(nbBits),
+        attr<FixedQAttr::Span>(span),
+        attr<FixedQAttr::IsOutputUnsigned>(isOutputUnsigned)))
     {}
 
     /**
-     * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated).
+     * @brief Copy-constructor. Copy the operator attributes and its output
+     * tensor(s), but not its input tensors (the new operator has no input associated).
      * @param op Operator to copy.
      */
-    FixedQ_Op(const FixedQ_Op& op)
-        : OperatorTensor(op),  mAttributes(op.mAttributes)
-    {
-        if (op.mImpl){
-            SET_IMPL_MACRO(FixedQ_Op, *this, op.backend());
-        }else{
-            mImpl = nullptr;
-        }
-    }
+    FixedQ_Op(const FixedQ_Op& op);
 
     /**
      * @brief Clone the operator using its copy-constructor.
@@ -88,14 +85,16 @@ public:
 
 };
 
-inline std::shared_ptr<Node> FixedQ(std::size_t nbBits = 8, float span = 4.0f, bool isOutputUnsigned = false, const std::string& name = "") {
-    return std::make_shared<Node>(std::make_shared<FixedQ_Op>(nbBits, span, isOutputUnsigned), name);
-}
-}
+std::shared_ptr<Node> FixedQ(std::size_t nbBits = 8,
+                            float span = 4.0f,
+                            bool isOutputUnsigned = false,
+                            const std::string& name = "");
+
+}  // namespace Aidge
 
 namespace {
 template <>
 const char* const EnumStrings<Aidge::FixedQAttr>::data[] = {"nb_bits", "span", "is_output_unsigned"};
 }
 
-#endif /* AIDGE_CORE_OPERATOR_FIXEDQ_H_ */
+#endif /* AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_ */
diff --git a/include/aidge/operator/LSQ.hpp b/include/aidge/operator/LSQ.hpp
index eb266bc..970c476 100644
--- a/include/aidge/operator/LSQ.hpp
+++ b/include/aidge/operator/LSQ.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CORE_OPERATOR_LSQ_H_
-#define AIDGE_CORE_OPERATOR_LSQ_H_
+#ifndef AIDGE_QUANTIZATION_OPERATOR_LSQ_H_
+#define AIDGE_QUANTIZATION_OPERATOR_LSQ_H_
 
 #include <cassert>
 #include <memory>
@@ -105,4 +105,4 @@ template <>
 const char *const EnumStrings<Aidge::LSQAttr>::data[] = {"range"};
 }
 
-#endif /* AIDGE_CORE_OPERATOR_LSQ_H_ */
+#endif /* AIDGE_QUANTIZATION_OPERATOR_LSQ_H_ */
diff --git a/include/aidge/operator/SAT/DoReFa.hpp b/include/aidge/operator/SAT/DoReFa.hpp
index 92ce167..d168c38 100644
--- a/include/aidge/operator/SAT/DoReFa.hpp
+++ b/include/aidge/operator/SAT/DoReFa.hpp
@@ -9,17 +9,15 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CORE_OPERATOR_DOREFA_H_
-#define AIDGE_CORE_OPERATOR_DOREFA_H_
+#ifndef AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_
+#define AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_
 
-#include <cassert>
 #include <memory>
 #include <vector>
 
 #include "aidge/backend/OperatorImpl.hpp"
 #include "aidge/graph/Node.hpp"
 #include "aidge/operator/OperatorTensor.hpp"
-#include "aidge/utils/ErrorHandling.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/StaticAttributes.hpp"
 #include "aidge/utils/Types.h"
@@ -43,12 +41,17 @@ public:
     static const std::string Type;
 
 private:
-    using Attributes_ = StaticAttributes<DoReFaAttr, size_t, DoReFaMode>;
+    using Attributes_ = StaticAttributes<DoReFaAttr, std::size_t, DoReFaMode>;
     template <DoReFaAttr e> using attr = typename Attributes_::template attr<e>;
     const std::shared_ptr<Attributes_> mAttributes;
 
 public:
-    DoReFa_Op(size_t range = 255, DoReFaMode mode = DoReFaMode::Default)
+    /**
+     * @brief Constructor for DoReFa_Op
+     * @param range The quantization range (default: 255)
+     * @param mode The quantization mode (default: Default)
+     */
+    DoReFa_Op(std::size_t range = 255, DoReFaMode mode = DoReFaMode::Default)
       : OperatorTensor(Type, {InputCategory::Param}, 1),
         mAttributes(std::make_shared<Attributes_>(
             attr<DoReFaAttr::Range>(range),
@@ -59,30 +62,34 @@ public:
      * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated).
      * @param op Operator to copy.
      */
-    DoReFa_Op(const DoReFa_Op& op)
-        : OperatorTensor(op),
-          mAttributes(op.mAttributes)
-    {
-        if (op.mImpl){
-            SET_IMPL_MACRO(DoReFa_Op, *this, op.backend());
-        }else{
-            mImpl = nullptr;
-        }
-    }
+    DoReFa_Op(const DoReFa_Op& op);
 
     /**
      * @brief Clone the operator using its copy-constructor.
      * @see Operator::DoReFa_Op
+     * @return std::shared_ptr<Operator> A deep copy of the operator
      */
-    std::shared_ptr<Operator> clone() const override {
-        return std::make_shared<DoReFa_Op>(*this);
-    }
+    std::shared_ptr<Operator> clone() const override;
 
+    /**
+     * @brief Get available backends for this operator
+     * @return std::set<std::string> Set of supported backend names
+     */
     std::set<std::string> getAvailableBackends() const override final;
+
+    /**
+     * @brief Set the backend for this operator
+     * @param name Backend name
+     * @param device Device index (default: 0)
+     */
     void setBackend(const std::string& name, DeviceIdx_t device = 0) override final;
 
+    /**
+     * @brief Get operator attributes
+     * @return std::shared_ptr<Attributes> Shared pointer to operator attributes
+     */
     inline std::shared_ptr<Attributes> attributes() const override { return mAttributes; }
-    inline size_t& range() const noexcept { return mAttributes->getAttr<DoReFaAttr::Range>(); }
+    inline std::size_t& range() const noexcept { return mAttributes->getAttr<DoReFaAttr::Range>(); }
     inline DoReFaMode& mode() const noexcept { return mAttributes->getAttr<DoReFaAttr::Mode>(); }
 
     static const std::vector<std::string> getInputsName(){
@@ -93,10 +100,20 @@ public:
     }
 };
 
-inline std::shared_ptr<Node> DoReFa(size_t range = 255, DoReFaMode mode = DoReFaMode::Default, const std::string& name = "") {
-    return std::make_shared<Node>(std::make_shared<DoReFa_Op>(range, mode), name);
-}
-}
+/**
+ * @brief Factory function to create a DoReFa operator node
+ *
+ * @param range Quantization range (default: 255)
+ * @param mode Quantization mode (default: Default)
+ * @param name Node name (default: empty)
+ *
+ * @return std::shared_ptr<Node> Shared pointer to the created node
+ */
+std::shared_ptr<Node> DoReFa(std::size_t range = 255,
+                             DoReFaMode mode = DoReFaMode::Default,
+                             const std::string& name = "");
+
+}  // namespace Aidge
 
 namespace {
 template <>
@@ -106,4 +123,4 @@ template <>
 const char *const EnumStrings<Aidge::DoReFaMode>::data[] = {"default", "symmetric", "asymmetric", "full_range"};
 }
 
-#endif /* AIDGE_CORE_OPERATOR_DOREFA_H_ */
+#endif /* AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_ */
diff --git a/include/aidge/operator/SAT/TanhClamp.hpp b/include/aidge/operator/SAT/TanhClamp.hpp
index def43b8..9d99d70 100644
--- a/include/aidge/operator/SAT/TanhClamp.hpp
+++ b/include/aidge/operator/SAT/TanhClamp.hpp
@@ -9,20 +9,18 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CORE_OPERATOR_TANHCLAMP_H_
-#define AIDGE_CORE_OPERATOR_TANHCLAMP_H_
+#ifndef AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_
+#define AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_
 
-#include <cassert>
 #include <memory>
+#include <set>
+#include <string>
 #include <vector>
 
 #include "aidge/backend/OperatorImpl.hpp"
 #include "aidge/graph/Node.hpp"
 #include "aidge/operator/OperatorTensor.hpp"
-#include "aidge/operator/Producer.hpp"
-#include "aidge/utils/ErrorHandling.hpp"
 #include "aidge/utils/Registrar.hpp"
-#include "aidge/utils/StaticAttributes.hpp"
 #include "aidge/utils/Types.h"
 
 namespace Aidge {
@@ -44,23 +42,13 @@ public:
      * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated).
      * @param op Operator to copy.
      */
-    TanhClamp_Op(const TanhClamp_Op& op)
-        : OperatorTensor(op)
-    {
-        if (op.mImpl){
-            SET_IMPL_MACRO(TanhClamp_Op, *this, op.backend());
-        }else{
-            mImpl = nullptr;
-        }
-    }
+    TanhClamp_Op(const TanhClamp_Op& op);
 
     /**
      * @brief Clone the operator using its copy-constructor.
      * @see Operator::TanhClamp_Op
      */
-    std::shared_ptr<Operator> clone() const override {
-        return std::make_shared<TanhClamp_Op>(*this);
-    }
+    std::shared_ptr<Operator> clone() const override;
 
     bool forwardDims(bool allowDataDependency = false) override final;
     std::set<std::string> getAvailableBackends() const override final;
@@ -75,9 +63,8 @@ public:
     }
 };
 
-inline std::shared_ptr<Node> TanhClamp(const std::string& name = "") {
-    return std::make_shared<Node>(std::make_shared<TanhClamp_Op>(), name);
-}
-}
+std::shared_ptr<Node> TanhClamp(const std::string& name = "");
+
+}  // namespace Aidge
 
-#endif /* AIDGE_CORE_OPERATOR_TANHCLAMP_H_ */
+#endif /* AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_ */
diff --git a/include/aidge/quantization/PTQ/CLE.hpp b/include/aidge/quantization/PTQ/CLE.hpp
index 77eaf7f..f4dc073 100644
--- a/include/aidge/quantization/PTQ/CLE.hpp
+++ b/include/aidge/quantization/PTQ/CLE.hpp
@@ -9,29 +9,33 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_QUANTIZATION_PTQ_CLE_H_
-#define AIDGE_QUANTIZATION_PTQ_CLE_H_
+#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_
+#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_
 
-//#include <cstdint>  
-//#include <map>
-//#include <memory>
-//#include <string>
-//#include <vector>
+#include <memory>
 
-#include "aidge/data/Tensor.hpp"
 #include "aidge/graph/GraphView.hpp"
 
 namespace Aidge
 {
 
     /**
-     * @brief Equalize the ranges of the nodes parameters by proceding iteratively. 
-     * Can only be applied to single branch networks (otherwise does not edit the graphView).
+     * @brief Equalize the ranges of the nodes parameters by proceding iteratively.
+     * Can only be applied to single branch networks (otherwise does not edit the GraphView).
+     *
+     * Cross Layer Equalization (CLE) is used to balance the weights between consecutive
+     * layers to improve quantization performance. It works by iteratively scaling weights
+     * and biases of adjacent layers while preserving the overall function of the network.
+     *
+     * @note The operation modifies weights and biases in-place but preserves the mathematical
+     * function computed by the network.
+     *
      * @param graphView The GraphView to process.
-     * @param targetDelta the stopping criterion (typical value : 0.01)
+     * @param targetDelta the stopping criterion (typical value : 0.01). Smaller values lead
+     *                    to more precise equalization but may require more iterations.
      */
     void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta = 0.01);
 
-}
+}  // namespace Aidge
 
-#endif /* AIDGE_QUANTIZATION_PTQ_CLE_H_ */
\ No newline at end of file
+#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_ */
diff --git a/include/aidge/quantization/PTQ/Clipping.hpp b/include/aidge/quantization/PTQ/Clipping.hpp
index d0622f4..3f65c42 100644
--- a/include/aidge/quantization/PTQ/Clipping.hpp
+++ b/include/aidge/quantization/PTQ/Clipping.hpp
@@ -9,14 +9,14 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_QUANTIZATION_PTQ_CLIP_H_
-#define AIDGE_QUANTIZATION_PTQ_CLIP_H_
+#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_
+#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_
 
-//#include <cstdint>  
-//#include <map>
-//#include <memory>
-//#include <string>
-//#include <vector>
+#include <cstdint>  // std::uint8_t
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
 
 #include "aidge/data/Tensor.hpp"
 #include "aidge/graph/GraphView.hpp"
@@ -56,9 +56,9 @@ namespace Aidge
     double computeKLClipping(std::vector<int> histogram, std::uint8_t nbBits);
 
     /**
-     * @brief Return a corrected map of the provided activation ranges. 
-     * To do so compute the optimal clipping values for every node and multiply the input ranges by those values. 
-     * The method used to compute the clippings can be eihter 'MSE', 'AA', 'KL' or 'MAX'. 
+     * @brief Return a corrected map of the provided activation ranges.
+     * To do so compute the optimal clipping values for every node and multiply the input ranges by those values.
+     * The method used to compute the clippings can be eihter 'MSE', 'AA', 'KL' or 'MAX'.
      * @param clippingMode The method used to compute the optimal clippings.
      * @param valueRanges The map associating each affine node to its output range.
      * @param nbBits The quantization number of bits.
@@ -71,5 +71,5 @@ namespace Aidge
 
 }
 
-#endif /* AIDGE_QUANTIZATION_PTQ_CLIP_H_ */
+#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_ */
 
diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp
index d2b8b7f..4fc38bc 100644
--- a/include/aidge/quantization/PTQ/PTQ.hpp
+++ b/include/aidge/quantization/PTQ/PTQ.hpp
@@ -9,16 +9,19 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_QUANTIZATION_PTQ_PTQ_H_
-#define AIDGE_QUANTIZATION_PTQ_PTQ_H_
+#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_
+#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_
 
-//#include <cstdint>  
-//#include <map>
-//#include <memory>
-//#include <string>
-//#include <vector>
+#include <cstdint>  // std::uint8_t
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>  // std::pair
+#include <vector>
 
 #include "aidge/data/Tensor.hpp"
+#include "aidge/quantization/PTQ/Clipping.hpp"
 #include "aidge/graph/GraphView.hpp"
 
 namespace Aidge {
@@ -104,12 +107,12 @@ namespace Aidge {
      * @brief Normalize the activations of each affine node so that they fit in the [-1:1] range.
      * This is done by reconfiguring the scaling nodes, as well as rescaling the weights and biases tensors.
      * @param graphView The GraphView containing the affine nodes.
-     * @param valueRanges The node output value ranges computed over the calibration dataset. 
+     * @param valueRanges The node output value ranges computed over the calibration dataset.
      */
     void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges);
 
     /**
-     * @brief For each node, compute the sign of its input and output values. 
+     * @brief For each node, compute the sign of its input and output values.
      * The goal of the routine is to maximize the number of unsigned IOs in order to double the value resolution when possible.
      * @param graphView The GraphView to analyze.
      * @param verbose Whether to print the sign map or not.
@@ -135,7 +138,7 @@ namespace Aidge {
      * @param clippingMode: Type of the clipping optimization. Can be either 'MAX', 'MSE', 'AA' or 'KL'.
      * @param applyRounding Whether to apply the rounding operations or not.
      * @param optimizeSigns Whether to take account of the IO signs of the operators or not.
-     * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights. 
+     * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights.
      * @param verbose Whether to print internal informations about the quantization process.
      */
     void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool applyRounding, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose);
@@ -157,8 +160,8 @@ namespace Aidge {
      * @brief Developement and test routine.
      * @param graphView The GraphView under test.
      */
-    void devPTQ(std::shared_ptr<GraphView> graphView); 
+    void devPTQ(std::shared_ptr<GraphView> graphView);
 }
 
-#endif /* AIDGE_QUANTIZATION_PTQ_PTQ_H_ */
+#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_ */
 
diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/quantization/PTQ/PTQMetaOps.hpp
index 62fac87..b9bad0d 100644
--- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp
+++ b/include/aidge/quantization/PTQ/PTQMetaOps.hpp
@@ -8,22 +8,14 @@
  * SPDX-License-Identifier: EPL-2.0
  *
  ********************************************************************************/
-#ifndef AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_
-#define AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_
+#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_
+#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_
 
-#include <array>
 #include <memory>
 #include <string>
-#include <utility>
-
-#include "aidge/operator/Clip.hpp"
-#include "aidge/operator/Mul.hpp"
-#include "aidge/operator/Round.hpp"
 
 #include "aidge/graph/GraphView.hpp"
 #include "aidge/graph/Node.hpp"
-#include "aidge/graph/OpArgs.hpp" // Sequential
-#include "aidge/operator/MetaOperator.hpp"
 
 namespace Aidge {
 
@@ -55,7 +47,7 @@ std::shared_ptr<Aidge::Node> Scaling(double scalingFactor, const std::string& na
 void updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor);
 
 /// @brief Retrieves the current scaling factor of a PTQ meta-operator node.
-/// This function returns the scaling factor associated with the specified PTQ meta-operator node, 
+/// This function returns the scaling factor associated with the specified PTQ meta-operator node,
 /// allowing inspection of the current scalar applied in the [Mul] operation.
 ///
 /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor is being queried.
@@ -66,7 +58,7 @@ double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode);
 /// This function modifies the clip range of a Quantizer node, allowing adjustment of the range within which values are clipped
 /// in the [Clip] operation of the Quantizer sequence.
 ///
-/// @param QuantizerNode A shared pointer to the Quantizer node whose clip range is being set. 
+/// @param QuantizerNode A shared pointer to the Quantizer node whose clip range is being set.
 /// This node should have been created using the Quantizer function.
 /// @param min The minimum value for the clip range. Values below this will be clipped to this minimum.
 /// @param max The maximum value for the clip range. Values above this will be clipped to this maximum.
@@ -75,4 +67,4 @@ void setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, double min, double
 
 }
 
-#endif /* AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ */
+#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_ */
diff --git a/include/aidge/quantization/QAT/QAT_FixedQ.hpp b/include/aidge/quantization/QAT/QAT_FixedQ.hpp
index ecbe742..6a2aa24 100644
--- a/include/aidge/quantization/QAT/QAT_FixedQ.hpp
+++ b/include/aidge/quantization/QAT/QAT_FixedQ.hpp
@@ -9,8 +9,10 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_QUANTIZATION_QAT_FIXEDQ_H_
-#define AIDGE_QUANTIZATION_QAT_FIXEDQ_H_
+#ifndef AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_
+#define AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_
+
+#include <memory>
 
 #include "aidge/graph/Node.hpp"
 #include "aidge/graph/GraphView.hpp"
@@ -41,10 +43,10 @@ void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits
  * @brief Developement and test routine.
  * @param graphView The GraphView under test.
  */
-void devQAT(std::shared_ptr<GraphView> graphView); 
+void devQAT(std::shared_ptr<GraphView> graphView);
 
 }
 }
 
-#endif /* AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ */
+#endif /* AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_ */
 
diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp
index 4970be0..a44c71b 100644
--- a/include/aidge/quantization/QAT/QAT_LSQ.hpp
+++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp
@@ -9,12 +9,14 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_QUANTIZATION_QAT_LSQ_H_
-#define AIDGE_QUANTIZATION_QAT_LSQ_H_
+#ifndef AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_
+#define AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_
+
+#include <cstddef>  // std::size_t
+#include <memory>
 
-#include "aidge/graph/Node.hpp"
-#include "aidge/graph/GraphView.hpp"
 #include "aidge/data/Tensor.hpp"
+#include "aidge/graph/GraphView.hpp"
 
 namespace Aidge {
 namespace QuantLSQ {
@@ -25,7 +27,7 @@ namespace QuantLSQ {
  * @param nbBits Number of quantization bits.
  * @param span Fixed output span of the quantizers.
  */
-void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float step_size);
+void insertQuantizers(std::shared_ptr<GraphView> graphView, std::size_t nbBits, float step_size);
 
 /**
  * @brief Given a GraphView with parameters properly initialized and some calibration data,
@@ -35,10 +37,10 @@ void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float
  * @param calibrationData Calibration data used to adjust the spans.
  * @param scale Multiplicative constant applied to the spans.
  */
-void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData);
+void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, std::size_t nbBits, std::shared_ptr<Tensor> calibrationData);
 
-}
-}
+}  // namespace QuantLSQ
+}  // namespace Aidge
 
-#endif /* AIDGE_QUANTIZATION_QAT_LSQ_H_ */
+#endif /* AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ */
 
diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index 2c81815..5265d9c 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -10,14 +10,19 @@
  ********************************************************************************/
 
 #include "aidge/quantization/PTQ/CLE.hpp"
+
+#include <cmath>    // std::abs, std::fabs, std::sqrt
+#include <cstddef>  // std::size_t
+#include <memory>
+#include <vector>
+
 #include "aidge/quantization/PTQ/Clipping.hpp"
-#include "aidge/quantization/PTQ/PTQ.hpp"
+#include "aidge/quantization/PTQ/PTQ.hpp"  // retrieveNodeVector
 
 #include "aidge/graph/GraphView.hpp"
-#include "aidge/scheduler/SequentialScheduler.hpp"
-#include "aidge/scheduler/Scheduler.hpp"
-#include "aidge/utils/Log.hpp"
+#include "aidge/graph/Node.hpp"
 #include "aidge/operator/OperatorTensor.hpp"
+#include "aidge/utils/Log.hpp"
 
 namespace Aidge
 {
@@ -42,13 +47,13 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
         castedTensor[i] *= scaling;
 }
 
-static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
+static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor)
 {
     // Get the tensor data pointer and edit it
     double * castedTensor = static_cast<double*> (tensor->getImpl()->rawPtr());
 
     // Get the tensor absolute max value
-    double maxValue = 0.0f;
+    double maxValue = 0.0;
     for(std::size_t i = 0; i < tensor->size(); ++i) {
         if(std::fabs(castedTensor[i]) > maxValue) {
             maxValue = std::fabs(castedTensor[i]);
@@ -62,15 +67,14 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
     // Check if the CLE can be applied ...
-
     for (std::shared_ptr<Node> node : nodeVector)
         if (node->getChildren().size() > 1)
         {
-            Log::info(" Network have multiple branches, skipping the CLE ... ");
+            Log::notice("Network have multiple branches, skipping the CLE ... ");
             return;
-        }    
+        }
 
-    Log::info(" Applying the Cross-Layer Equalization ... ");
+    Log::info("Applying the Cross-Layer Equalization ... ");
 
     // Get the vector of affine nodes
 
@@ -79,17 +83,22 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD
         if (isAffine(node))
             affineNodeVector.push_back(node);
 
+    if (affineNodeVector.empty()) {
+        Log::notice("No affine nodes found in the network. CLE cannot be applied.");
+        return;
+    }
     double maxRangeDelta;
+    int iteration = 0;
 
-    do 
+    do
     {
+        ++iteration;
         maxRangeDelta = 0.0;
-        
         //std::cout << " ----- " << std::endl;
         //for (std::shared_ptr<Node> node : affineNodeVector)
         //    std::cout << getTensorAbsoluteMax(getWeightTensor(node)) << std::endl;
-        
-        for (size_t i = 0; i < (affineNodeVector.size() - 1); i++)
+
+        for (std::size_t i = 0; i < (affineNodeVector.size() - 1); i++)
         {
             std::shared_ptr<Node> n1 = affineNodeVector[i];
             std::shared_ptr<Node> n2 = affineNodeVector[i+1];
@@ -111,6 +120,9 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD
         }
     }
     while (maxRangeDelta > targetDelta);
+
+    Log::notice("CLE completed after {} iterations. Final max range delta: {:.6f}",
+                iteration, maxRangeDelta);
 }
 
 }
\ No newline at end of file
diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp
index 527d853..77018c2 100644
--- a/src/PTQ/PTQMetaOps.cpp
+++ b/src/PTQ/PTQMetaOps.cpp
@@ -11,8 +11,8 @@
 
 #include "aidge/quantization/PTQ/PTQMetaOps.hpp"
 
-#include <array>
 #include <memory>
+#include <string>
 #include <utility>
 
 //Operator
@@ -32,7 +32,7 @@
 #include "aidge/utils/Log.hpp"
 
 
-namespace Aidge 
+namespace Aidge
 {
 
 std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name)
@@ -46,19 +46,19 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli
     // connect the scaling factor producer
 
     std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
-    std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); 
+    std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor");
     scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
-    
+
     // create the metaop graph
 
     std::shared_ptr<GraphView> graphView = Sequential({mulNode, roundNode, clipNode});
     std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ???
 
-    // return the metaop 
+    // return the metaop
 
     std::shared_ptr<Node> metaopNode = MetaOperator("Quantizer", connectedGraphView, {}, name); // XXX alternative prototype
 
-    return metaopNode; 
+    return metaopNode;
 }
 
 std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name)
@@ -67,7 +67,7 @@ std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name)
 
     std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_Scaling" : "");
 
-    std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); 
+    std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor");
     scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
 
     std::shared_ptr<GraphView> graphView  = Sequential({mulNode});
@@ -96,7 +96,7 @@ void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor)
     std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor});
 
     std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(metaOpNode->getOperator());
-    
+
     std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul");
 
     if (!mulNode)
@@ -113,7 +113,7 @@ double getScalingFactor(std::shared_ptr<Node> MetaOpNode)
     }
 
     std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(MetaOpNode->getOperator());
-    
+
     std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul");
 
     if (!mulNode) {
@@ -123,8 +123,8 @@ double getScalingFactor(std::shared_ptr<Node> MetaOpNode)
 
     auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1);
     std::shared_ptr<Tensor> fallback;
-    const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); 
-    
+    const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu");
+
     return localTensor.get<double>(0);
 }
 
diff --git a/src/operator/FixedQ.cpp b/src/operator/FixedQ.cpp
index 8791740..9828ce9 100644
--- a/src/operator/FixedQ.cpp
+++ b/src/operator/FixedQ.cpp
@@ -20,6 +20,17 @@
 
 const std::string Aidge::FixedQ_Op::Type = "FixedQ";
 
+Aidge::FixedQ_Op::FixedQ_Op(const Aidge::FixedQ_Op& op)
+    : OperatorTensor(op),
+      mAttributes(op.mAttributes)
+{
+    if (op.mImpl){
+        SET_IMPL_MACRO(FixedQ_Op, *this, op.backend());
+    }else{
+        mImpl = nullptr;
+    }
+}
+
 std::set<std::string> Aidge::FixedQ_Op::getAvailableBackends() const {
     return Registrar<FixedQ_Op>::getKeys();
 }
@@ -28,3 +39,12 @@ void Aidge::FixedQ_Op::setBackend(const std::string& name, DeviceIdx_t device) {
     SET_IMPL_MACRO(FixedQ_Op, *this, name);
     mOutputs[0]->setBackend(name, device);
 }
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::shared_ptr<Aidge::Node> Aidge::FixedQ(std::size_t nbBits,
+                            float span,
+                            bool isOutputUnsigned,
+                            const std::string& name) {
+    return std::make_shared<Node>(std::make_shared<FixedQ_Op>(nbBits, span, isOutputUnsigned), name);
+}
\ No newline at end of file
diff --git a/src/operator/SAT/DoReFa.cpp b/src/operator/SAT/DoReFa.cpp
index b6124ba..426e330 100644
--- a/src/operator/SAT/DoReFa.cpp
+++ b/src/operator/SAT/DoReFa.cpp
@@ -17,13 +17,38 @@
 #include "aidge/data/Tensor.hpp"
 #include "aidge/utils/Types.h"
 
-const std::string Aidge::DoReFa_Op::Type = "DoReFa";
+namespace Aidge {
 
-std::set<std::string> Aidge::DoReFa_Op::getAvailableBackends() const {
+const std::string DoReFa_Op::Type = "DoReFa";
+
+DoReFa_Op::DoReFa_Op(const DoReFa_Op& op)
+    : OperatorTensor(op),
+      mAttributes(op.mAttributes)
+{
+    if (op.mImpl) {
+        SET_IMPL_MACRO(DoReFa_Op, *this, op.backend());
+    } else {
+        mImpl = nullptr;
+    }
+}
+
+std::shared_ptr<Operator> DoReFa_Op::clone() const {
+    return std::make_shared<DoReFa_Op>(*this);
+}
+
+std::set<std::string> DoReFa_Op::getAvailableBackends() const {
     return Registrar<DoReFa_Op>::getKeys();
 }
 
-void Aidge::DoReFa_Op::setBackend(const std::string& name, DeviceIdx_t device) {
+void DoReFa_Op::setBackend(const std::string& name, DeviceIdx_t device) {
     SET_IMPL_MACRO(DoReFa_Op, *this, name);
     mOutputs[0]->setBackend(name, device);
-}
\ No newline at end of file
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::shared_ptr<Node> DoReFa(size_t range, DoReFaMode mode, const std::string& name) {
+    return std::make_shared<Node>(std::make_shared<DoReFa_Op>(range, mode), name);
+}
+
+}  // namespace Aidge
\ No newline at end of file
diff --git a/src/operator/SAT/TanhClamp.cpp b/src/operator/SAT/TanhClamp.cpp
index 2b8d63d..a03fc7d 100644
--- a/src/operator/SAT/TanhClamp.cpp
+++ b/src/operator/SAT/TanhClamp.cpp
@@ -20,6 +20,20 @@
 
 const std::string Aidge::TanhClamp_Op::Type = "TanhClamp";
 
+Aidge::TanhClamp_Op::TanhClamp_Op(const Aidge::TanhClamp_Op& op)
+    : OperatorTensor(op)
+{
+    if (op.mImpl) {
+        SET_IMPL_MACRO(TanhClamp_Op, *this, op.backend());
+    } else {
+        mImpl = nullptr;
+    }
+}
+
+std::shared_ptr<Aidge::Operator> Aidge::TanhClamp_Op::clone() const {
+    return std::make_shared<TanhClamp_Op>(*this);
+}
+
 bool Aidge::TanhClamp_Op::forwardDims(bool /*allowDataDependency*/) {
 
     if (inputsAssociated()) {
@@ -40,5 +54,11 @@ void Aidge::TanhClamp_Op::setBackend(const std::string& name, DeviceIdx_t device
     mOutputs[0]->setBackend(name, device);
 
     // Scale output is always on CPU for now
-    mOutputs[1]->setBackend("cpu"); // XXX why ? 
+    mOutputs[1]->setBackend("cpu"); // XXX why ?
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+std::shared_ptr<Aidge::Node> Aidge::TanhClamp(const std::string& name) {
+    return std::make_shared<Node>(std::make_shared<TanhClamp_Op>(), name);
 }
\ No newline at end of file
-- 
GitLab


From eba01977f3e1fde1bfe162310e981e5b87f6da7f Mon Sep 17 00:00:00 2001
From: NAUD Maxence <maxence.naud@cea.fr>
Date: Wed, 29 Jan 2025 22:26:39 +0000
Subject: [PATCH 30/60] Change Python minimum version 3.7 -> 3.8

---
 pyproject.toml | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index deb91c7..c7cd4c2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ description="Quantization algorithms to compress aidge networks."
 dependencies = [
     "numpy>=1.21.6",
 ]
-requires-python = ">= 3.7"
+requires-python = ">= 3.8"
 readme = "README.md"
 license = { file = "LICENSE" }
 classifiers = [
@@ -56,6 +56,19 @@ test-command = "pytest {package}/aidge_quantization/unit_tests"
 # "cp39-win_amd64",
 # "cp310-win_amd64",
 # ]
+# PYLINT
+[tool.pylint.main]
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-allow-list = ["aidge_core", "aidge_backend_cpu", "aidge_quantization", "onnx"]
+# Files or directories to be skipped. They should be base names, not paths.
+ignore = ["CVS"]
+# List of module names for which member attributes should not be checked (useful
+# for modules/projects where namespaces are manipulated during runtime and thus
+# existing member attributes cannot be deduced by static analysis). It supports
+# qualified module names, as well as Unix pattern matching.
+ignored-modules = ["aidge_core", "aidge_backend_cpu", "aidge_quantization", "onnx"]
 ## AIDGE DEPENDENCIES DECLARATION
 [tool.cibuildwheel.environment]
 AIDGE_DEPENDENCIES = "aidge_core aidge_backend_cpu aidge_onnx" # format => "dep_1 dep_2 ... dep_n"
-- 
GitLab


From 6109a9d6eeb5d025da27cb56c1e2927e3b2add59 Mon Sep 17 00:00:00 2001
From: NAUD Maxence <maxence.naud@cea.fr>
Date: Wed, 29 Jan 2025 22:27:21 +0000
Subject: [PATCH 31/60] UPD: 'setup.py' to access compilation options from
 environment variables set by 'setup.sh'

---
 setup.py | 55 ++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/setup.py b/setup.py
index 8774d01..1bfc0ac 100644
--- a/setup.py
+++ b/setup.py
@@ -37,6 +37,7 @@ class AidgePkgBuild(build_ext):
         # This lists the number of processors available on the machine
         # The compilation will use half of them
         max_jobs = str(ceil(multiprocessing.cpu_count() / 2))
+        max_jobs = os.environ.get("AIDGE_NB_PROC", max_jobs)
 
         cwd = pathlib.Path().absolute()
 
@@ -51,14 +52,20 @@ class AidgePkgBuild(build_ext):
         package_prefix = build_lib if not self.editable_mode else SETUP_DIR
         pybind_install_prefix = (package_prefix / PROJECT_NAME).absolute()
 
-        os.chdir(str(build_temp))
-
-        compile_type = os.environ.get("AIDGE_PYTHON_BUILD_TYPE", "Release")
         install_path = (
             os.path.join(sys.prefix, "lib", "libAidge")
             if "AIDGE_INSTALL" not in os.environ
             else os.environ["AIDGE_INSTALL"]
         )
+
+        # Read environment variables for CMake options
+        c_compiler = os.environ.get("AIDGE_C_COMPILER", "gcc")
+        cxx_compiler = os.environ.get("AIDGE_CXX_COMPILER", "g++")
+        build_type = os.environ.get("AIDGE_BUILD_TYPE", "Release")
+        asan = os.environ.get("AIDGE_ASAN", "OFF")
+        with_cuda = os.environ.get("AIDGE_WITH_CUDA", "OFF")
+        cmake_arch = os.environ.get("AIDGE_CMAKE_ARCH", "")
+
         build_gen = os.environ.get("AIDGE_BUILD_GEN", "")
         build_gen_opts = (
             ["-G", build_gen]
@@ -67,26 +74,36 @@ class AidgePkgBuild(build_ext):
         )
         test_onoff = os.environ.get("AIDGE_BUILD_TEST", "OFF")
 
-        self.spawn(
-            [
-                "cmake",
-                *build_gen_opts,
-                str(cwd),
-                f"-DTEST={test_onoff}",
-                f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}",
-                f"-DCMAKE_BUILD_TYPE={compile_type}",
-                "-DPYBIND=ON",
-                f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}",
-                "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON",
-                "-DCOVERAGE=OFF",
-            ]
-        )
+        os.chdir(str(build_temp))
+
+        cmake_cmd = [
+            "cmake",
+            *build_gen_opts,
+            str(cwd),
+            f"-DTEST={test_onoff}",
+            f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}",
+            f"-DCMAKE_BUILD_TYPE={build_type}",
+            f"-DCMAKE_C_COMPILER={c_compiler}",
+            f"-DCMAKE_CXX_COMPILER={cxx_compiler}",
+            f"-DENABLE_ASAN={asan}",
+            f"-DCUDA={with_cuda}",
+            "-DPYBIND=ON",
+            f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}",
+            "-DCMAKE_EXPORT_COMPILE_COMMANDS=1",
+            "-DCOVERAGE=OFF",
+        ]
+
+        # Append architecture-specific arguments if provided
+        if cmake_arch:
+            cmake_cmd.append(cmake_arch)
+
+        self.spawn(cmake_cmd)
 
         if not self.dry_run:
             self.spawn(
-                ["cmake", "--build", ".", "--config", compile_type, "-j", max_jobs]
+                ["cmake", "--build", ".", "--config", build_type, "-j", max_jobs]
             )
-            self.spawn(["cmake", "--install", ".", "--config", compile_type])
+            self.spawn(["cmake", "--install", ".", "--config", build_type])
         os.chdir(str(cwd))
 
 
-- 
GitLab


From e5f28102bedaf39193b95a172891a782d5977330 Mon Sep 17 00:00:00 2001
From: NAUD Maxence <maxence.naud@cea.fr>
Date: Wed, 29 Jan 2025 23:48:19 +0000
Subject: [PATCH 32/60] FEAT: unit-tests/CMakeLists.txt add minimum version for
 Catch2

---
 unit_tests/CMakeLists.txt | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt
index 9d9f815..cfdbf0a 100644
--- a/unit_tests/CMakeLists.txt
+++ b/unit_tests/CMakeLists.txt
@@ -1,12 +1,23 @@
-Include(FetchContent)
+# Catch2 configuration
+set(CATCH2_MIN_VERSION 3.3.0)
 
-FetchContent_Declare(
-  Catch2
-  GIT_REPOSITORY https://github.com/catchorg/Catch2.git
-  GIT_TAG        v3.0.1 # or a later release
-)
+# Try to find system installed Catch2
+find_package(Catch2 ${CATCH2_MIN_VERSION} QUIET)
 
-FetchContent_MakeAvailable(Catch2)
+if(NOT Catch2_FOUND)
+    message(STATUS "Catch2 not found in system, retrieving from git")
+    Include(FetchContent)
+
+    FetchContent_Declare(
+      Catch2
+      GIT_REPOSITORY https://github.com/catchorg/Catch2.git
+      GIT_TAG        devel # or a later release
+    )
+    FetchContent_MakeAvailable(Catch2)
+    message(STATUS "Fetched Catch2 version ${Catch2_VERSION}")
+else()
+    message(STATUS "Using system Catch2 version ${Catch2_VERSION}")
+endif()
 
 file(GLOB_RECURSE src_files "*.cpp")
 
-- 
GitLab


From ec304b0c321ad42c3856f9097ec407dc7e6d8877 Mon Sep 17 00:00:00 2001
From: NAUD Maxence <maxence.naud@cea.fr>
Date: Wed, 29 Jan 2025 23:50:48 +0000
Subject: [PATCH 33/60] UPD: CMakeLists.txt enforce C++14 and try to reorder
 sections

---
 CMakeLists.txt | 172 +++++++++++++++++++++++++------------------------
 1 file changed, 89 insertions(+), 83 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 80c5ae7..b3c6d45 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,15 +1,22 @@
 # CMake >= 3.18 is required for good support of FindCUDAToolkit
-cmake_minimum_required(VERSION 3.18) # XXX 3.18
-set(CXX_STANDARD 14)
+cmake_minimum_required(VERSION 3.18)
 
-file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version)
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS  OFF)
+
+# Read project metadata
 file(STRINGS "${CMAKE_SOURCE_DIR}/project_name.txt" project)
+message(STATUS "Project name: ${project}")
 
+file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version)
 # Parse version.txt to retrieve Major, Minor and Path
 string(REGEX MATCH "([0-9]+)\\.([0-9]+)\\.([0-9]+)" _ MATCHES ${version})
 set(PROJECT_VERSION_MAJOR ${CMAKE_MATCH_1})
 set(PROJECT_VERSION_MINOR ${CMAKE_MATCH_2})
 set(PROJECT_VERSION_PATCH ${CMAKE_MATCH_3})
+message(STATUS "Project version: ${version}")
+
 
 # Retrieve latest git commit
 execute_process(
@@ -19,17 +26,25 @@ execute_process(
     OUTPUT_STRIP_TRAILING_WHITESPACE
     ERROR_QUIET
 )
-
-message(STATUS "Project name: ${project}")
-message(STATUS "Project version: ${version}")
 message(STATUS "Latest git commit: ${GIT_COMMIT_HASH}")
 
-message(STATUS "Creating ${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h")
 
 project(${project}
         VERSION ${version}
         DESCRIPTION "Quantization methods for the Aidge framework."
         LANGUAGES CXX)
+
+if(NOT $ENV{AIDGE_INSTALL} STREQUAL "")
+    set(CMAKE_INSTALL_PREFIX $ENV{AIDGE_INSTALL})
+    list(APPEND CMAKE_PREFIX_PATH $ENV{AIDGE_INSTALL})
+    message(WARNING "Env var AIDGE_INSTALL detected : $ENV{AIDGE_INSTALL}. Set CMAKE_INSTALL_PREFIX to AIDGE_INSTALL & added to CMAKE_PREFIX_PATH"
+                    "\n\tCMAKE_INSTALL_PREFIX = ${CMAKE_INSTALL_PREFIX}"
+                    "\n\tCMAKE_PREFIX_PATH = ${CMAKE_PREFIX_PATH}")
+endif()
+
+message(STATUS "Creating ${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h")
+
+
 # Note: Using configure_file later in the code make so that version variables are lost...
 # I tried to set in internal cache but it failed.
 # Current code is working, but there might be a scope issue.
@@ -39,21 +54,12 @@ configure_file(
     "${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h"
 )
 
-# Note : project name is {project} and python module name is also {project}
-set(module_name _${project}) # target name
-set(pybind_module_name ${CMAKE_PROJECT_NAME}) # name of submodule for python bindings
-
-set(CXX_STANDARD 14)
-
-##############################################
-# Import utils CMakeLists
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
 
 ##############################################
 # Define options
-option(PYBIND "python binding" ON)
+option(PYBIND "python binding" OFF)
 option(WERROR "Warning as error" OFF)
-option(TEST "Enable tests" ON)
+option(TEST "Enable tests" OFF)
 option(COVERAGE "Enable coverage" OFF)
 option(CUDA "Enable CUDA backend" OFF) # XXX OFF
 option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memory use (over/underflow, memory leak, ...)" OFF)
@@ -61,74 +67,55 @@ option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memor
 ##############################################
 # Import utils CMakeLists
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
-include(PybindModuleCreation)
 
 if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE)
     Include(CodeCoverage)
 endif()
 
+# Set variables
 if(CUDA)
     enable_language(CUDA)
-
     message(STATUS "Cuda compiler version = ${CMAKE_CUDA_COMPILER_VERSION}")
     # Define a preprocessor macro with the Cuda compiler version
     add_definitions(-DCUDA_COMPILER_VERSION="${CMAKE_CUDA_COMPILER_VERSION}")
 endif()
 
-if(NOT $ENV{AIDGE_INSTALL} STREQUAL "")
-    set(CMAKE_INSTALL_PREFIX $ENV{AIDGE_INSTALL})
-    list(APPEND CMAKE_PREFIX_PATH $ENV{AIDGE_INSTALL})
-    message(WARNING "Env var AIDGE_INSTALL detected : $ENV{AIDGE_INSTALL}. Set CMAKE_INSTALL_PREFIX to AIDGE_INSTALL & added to CMAKE_PREFIX_PATH"
-                    "\n\tCMAKE_INSTALL_PREFIX = ${CMAKE_INSTALL_PREFIX}"
-                    "\n\tCMAKE_PREFIX_PATH = ${CMAKE_PREFIX_PATH}")
+# Source files
+if(CUDA)
+    file(GLOB_RECURSE src_files "src/*.cpp" "src/*.cu")
+else()
+    file(GLOB_RECURSE src_files "src/*.cpp")
 endif()
 
-# ##############################################
-# Find system dependencies
+# Header files
+file(GLOB_RECURSE inc_files "include/*.hpp")
 
-if(CUDA)
-    find_package(CUDAToolkit REQUIRED)
-endif()
+# Note: cxx project name is {CMAKE_PROJECT_NAME} and python module name is also {CMAKE_PROJECT_NAME}
+set(module_name _${CMAKE_PROJECT_NAME}) # target name
+add_library(${module_name} ${src_files} ${inc_files})
+set(pybind_module_name ${CMAKE_PROJECT_NAME}) # name of submodule for python bindings
 
 
-##############################################
-# Find system dependencies
+# Dependencies and linking
 find_package(aidge_core REQUIRED)
 find_package(aidge_backend_cpu REQUIRED)
+target_link_libraries(${module_name}
+    PUBLIC
+        _aidge_core
+        _aidge_backend_cpu
+)
 if(CUDA)
+    find_package(CUDAToolkit REQUIRED)
     find_package(aidge_backend_cuda REQUIRED)
-endif()
-
-##############################################
-# Create target and set properties
-
-if(CUDA)
-    file(GLOB_RECURSE src_files "src/*.cpp" "src/*.cu")
-    file(GLOB_RECURSE inc_files "include/*.hpp")
-
-    add_library(${module_name} ${src_files} ${inc_files})
     target_link_libraries(${module_name}
         PUBLIC
-            _aidge_core # _ is added because we link the target not the project
-            _aidge_backend_cpu
-            # _aidge_backend_cuda # XXX
             CUDA::cudart
             CUDA::cublas
             cudnn
     )
-else()
-    file(GLOB_RECURSE src_files "src/*.cpp")
-    file(GLOB_RECURSE inc_files "include/*.hpp")
-
-    add_library(${module_name} ${src_files} ${inc_files})
-    target_link_libraries(${module_name}
-        PUBLIC
-            _aidge_core # _ is added because we link the target not the project
-            _aidge_backend_cpu
-    )
 endif()
 
-#Set target properties
+# Include directories
 target_include_directories(${module_name}
     PUBLIC
         $<INSTALL_INTERFACE:include>
@@ -137,6 +124,7 @@ target_include_directories(${module_name}
         ${CMAKE_CURRENT_SOURCE_DIR}/src
 )
 
+# Compilation settings
 if(CUDA)
     if(NOT DEFINED CMAKE_CUDA_STANDARD)
         set(CMAKE_CUDA_STANDARD 14)
@@ -157,23 +145,44 @@ if (PYBIND)
     generate_python_binding(${pybind_module_name} ${module_name})
 endif()
 
-# XXX HERE !!!
-target_link_libraries(${module_name} PRIVATE fmt::fmt)
 target_compile_features(${module_name} PRIVATE cxx_std_14)
+target_link_libraries(${module_name} PRIVATE fmt::fmt)
 
+####################################
+# Compilation options and warnings
 target_compile_options(${module_name} PRIVATE
+    # Options for Clang, AppleClang, and GCC compilers
     $<$<COMPILE_LANGUAGE:CPP>:$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-    -Wall -Wextra -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow $<$<BOOL:${WERROR}>:-Werror>>>)
+        -Wall               # Enable all warnings
+        -Wextra             # Enable extra warnings
+        -Wold-style-cast    # Warn about C-style casts
+        -Winline            # Warn if inline expansion fails
+        -pedantic           # Enforce strict ISO C++ standards
+        -Werror=narrowing   # Treat narrowing conversions as errors
+        -Wshadow            # Warn about variable shadowing
+        $<$<BOOL:${WERROR}>:-Werror>  # Optionally treat warnings as errors
+    >>
+)
+
+# Additional MSVC-specific warning level
+target_compile_options(${module_name} PRIVATE
+    $<$<CXX_COMPILER_ID:MSVC>:
+        /W4  # Warning level 4 (highest for MSVC)
+    >
+)
+
+# CUDA-specific compile options
 if(CUDA)
     target_compile_options(${module_name} PRIVATE
         $<$<COMPILE_LANGUAGE:CUDA>:
-        -Wall>)
+            -Wall  # Enable all warnings for CUDA
+        >
+    )
 endif()
-target_compile_options(${module_name} PRIVATE
-    $<$<CXX_COMPILER_ID:MSVC>:
-    /W4>)
 
+# Coverage flags for GCC
 if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE)
+    include(CodeCoverage)
     append_coverage_compiler_flags()
 endif()
 
@@ -183,29 +192,31 @@ endif()
 include(GNUInstallDirs)
 set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/${project})
 
+# Install the library target
 install(TARGETS ${module_name} EXPORT ${project}-targets
-  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
 )
 
+# Install header files
 install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
 
-#Export the targets to a script
-
+# Export targets for other projects to use
 install(EXPORT ${project}-targets
- FILE "${project}-targets.cmake"
- DESTINATION ${INSTALL_CONFIGDIR}
- COMPONENT ${module_name}
+    FILE "${project}-targets.cmake"
+    DESTINATION ${INSTALL_CONFIGDIR}
+    COMPONENT ${module_name}
 )
 
-if (PYBIND)
+# Python binding installation
+if(PYBIND)
     install(TARGETS ${pybind_module_name}
         DESTINATION ${PYBIND_INSTALL_PREFIX}
     )
 endif()
 
-#Create a ConfigVersion.cmake file
+# Create and install CMake configuration files
 include(CMakePackageConfigHelpers)
 write_basic_package_version_file(
     "${CMAKE_CURRENT_BINARY_DIR}/${project}-config-version.cmake"
@@ -218,15 +229,14 @@ configure_package_config_file("${project}-config.cmake.in"
     INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
 )
 
-#Install the config, configversion and custom find modules
+# Install CMake configuration files
 install(FILES
     "${CMAKE_CURRENT_BINARY_DIR}/${project}-config.cmake"
     "${CMAKE_CURRENT_BINARY_DIR}/${project}-config-version.cmake"
     DESTINATION ${INSTALL_CONFIGDIR}
 )
 
-##############################################
-## Exporting from the build tree
+# Export from build tree
 export(EXPORT ${project}-targets
     FILE "${CMAKE_CURRENT_BINARY_DIR}/${project}-targets.cmake")
 
@@ -234,10 +244,6 @@ export(EXPORT ${project}-targets
 ##############################################
 ## Add test
 if(TEST)
-    if (AIDGE_REQUIRES_PYTHON AND NOT AIDGE_PYTHON_HAS_EMBED)
-        message(WARNING "Skipping compilation of tests: missing Python embedded interpreter")
-    else()
-        enable_testing()
-        add_subdirectory(unit_tests)
-    endif()
+    enable_testing()
+    add_subdirectory(unit_tests)
 endif()
-- 
GitLab


From b4d50dfb37aab037ea18f1f86fc5b820dba39408 Mon Sep 17 00:00:00 2001
From: NAUD Maxence <maxence.naud@cea.fr>
Date: Wed, 29 Jan 2025 23:52:56 +0000
Subject: [PATCH 34/60] ADD: basic test

---
 unit_tests/Test_QuantPTQ.cpp | 50 +++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/unit_tests/Test_QuantPTQ.cpp b/unit_tests/Test_QuantPTQ.cpp
index 36377e8..e7211ce 100644
--- a/unit_tests/Test_QuantPTQ.cpp
+++ b/unit_tests/Test_QuantPTQ.cpp
@@ -1,21 +1,19 @@
-// #include <catch2/catch_test_macros.hpp>
-
-// #include "aidge/data/Tensor.hpp"
-// #include "aidge/backend/TensorImpl.hpp"
-// #include "aidge/backend/cpu.hpp"
-// #include "aidge/operator/Conv.hpp"
-// #include "aidge/operator/Scaling.hpp"
-// #include "aidge/operator/GenericOperator.hpp"
-// #include "aidge/graph/GraphView.hpp"
-// #include "aidge/QuantPTQ.hpp"
-// #include "aidge/scheduler/Scheduler.hpp"
-// #include "aidge/hook/OutputRange.hpp"
-// #include "aidge/operator/Producer.hpp"
-
-// #include <unordered_map>
-
-// using namespace Aidge;
-// //using namespace Aidge_HELPER;
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+
+TEST_CASE("[tmp] basic test") {
+    REQUIRE(true == true);
+}
 
 // TEST_CASE("[aidge_module_template/ref_cpp/quantization] PTQ : Quantize Graph") {
 
@@ -79,7 +77,7 @@
 
 //     std::shared_ptr<Tensor> myInput =
 //             std::make_shared<Tensor>(
-//                 Array4D<float,2,3,5,5> { 
+//                 Array4D<float,2,3,5,5> {
 //                     {
 //                         {
 //                             {{  0.,   1.,   2.,   3.,   4.},
@@ -124,7 +122,7 @@
 //             );
 
 //     auto dataProvider = Producer(myInput, "dataProvider");
-//     Tensor myOutput = Array4D<float,2,4,3,3> { 
+//     Tensor myOutput = Array4D<float,2,4,3,3> {
 //         {
 //             {
 //                 {{ 15226.,  15577.,  15928.},
@@ -188,9 +186,9 @@
 //                        "%f"
 //                        "\n",
 //                        max_output_conv);
-        
+
 //     }
-    
+
 //     float max_output_relu = std::static_pointer_cast<OutputRange>(myReLU1->getOperator()->getHook("output_range"))->getOutput(0);
 //     if(verbose) {
 //                 printf("[hook] OutputRange(forward) :: ReLU output max: "
@@ -222,10 +220,10 @@
 //                        "\n",
 //                        (nodePtr->type()).c_str(), (nodePtr->name()).c_str());
 //         }
-//     }    
-    
+//     }
+
 //     SequentialScheduler scheduler_v2(g1);
-    
+
 //     scheduler_v2.forward();
 //     scheduler_v2.generateScheduling(false);
 //     std::vector<std::shared_ptr<Node>> ordered_graph_view_v2 = scheduler_v2.getStaticScheduling();
@@ -242,7 +240,7 @@
 //                        "\n",
 //                        (nodePtr->type()).c_str(), (nodePtr->name()).c_str());
 //         }
-//     } 
+//     }
 
 
 // }
\ No newline at end of file
-- 
GitLab


From e01454400c872b2eb402ad5a9741f81506bac3e3 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Fri, 31 Jan 2025 15:25:10 +0000
Subject: [PATCH 35/60] enable the cuda backend

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b3c6d45..17dd74a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -61,7 +61,7 @@ option(PYBIND "python binding" OFF)
 option(WERROR "Warning as error" OFF)
 option(TEST "Enable tests" OFF)
 option(COVERAGE "Enable coverage" OFF)
-option(CUDA "Enable CUDA backend" OFF) # XXX OFF
+option(CUDA "Enable CUDA backend" ON) # XXX OFF
 option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memory use (over/underflow, memory leak, ...)" OFF)
 
 ##############################################
-- 
GitLab


From 134827717cebaa2fb6e952c356117b764b6eb06b Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Fri, 31 Jan 2025 15:26:07 +0000
Subject: [PATCH 36/60] remove unused log

---
 src/PTQ/CLE.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index cbfb91f..40b9e42 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -124,7 +124,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD
 
     do
     {
-        ++iteration;
         maxRangeDelta = 0.0;
         
         for (size_t i = 0; i < (affineNodeVector.size() - 1); i++)
@@ -149,9 +148,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD
         }
     }
     while (maxRangeDelta > targetDelta);
-
-    Log::notice("CLE completed after {} iterations. Final max range delta: {:.6f}",
-                iteration, maxRangeDelta);
 }
 
 }
\ No newline at end of file
-- 
GitLab


From bccee6f45385093c984110635f83bc798a183cf1 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Tue, 11 Feb 2025 15:50:16 +0000
Subject: [PATCH 37/60] handle PaddedConv2Ds in the QAT and BatchNorm insertion
 code

---
 setup.py                     |  2 +-
 src/QAT/QAT_LSQ.cpp          |  8 ++++++--
 src/recipes/QuantRecipes.cpp | 24 ++++++++----------------
 3 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/setup.py b/setup.py
index 1bfc0ac..cde7c1e 100644
--- a/setup.py
+++ b/setup.py
@@ -63,7 +63,7 @@ class AidgePkgBuild(build_ext):
         cxx_compiler = os.environ.get("AIDGE_CXX_COMPILER", "g++")
         build_type = os.environ.get("AIDGE_BUILD_TYPE", "Release")
         asan = os.environ.get("AIDGE_ASAN", "OFF")
-        with_cuda = os.environ.get("AIDGE_WITH_CUDA", "OFF")
+        with_cuda = os.environ.get("AIDGE_WITH_CUDA", "ON") # default could be "OFF"
         cmake_arch = os.environ.get("AIDGE_CMAKE_ARCH", "")
 
         build_gen = os.environ.get("AIDGE_BUILD_GEN", "")
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index 0508fc7..ff1c44a 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -82,12 +82,14 @@ static bool initStepSize(std::shared_ptr<Node> quantizer)
 
 static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
-    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
+    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|PaddedConv2D#|FC#)");
 
     for (const auto& match : matches) 
     {
         auto linearNode = match.graph->rootNode(); 
 
+        // Log::notice(" SET INPUT QUANTIZER : {} ", linearNode->type());
+
         std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
         std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1};
 
@@ -135,7 +137,7 @@ static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nb
 
 static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
-    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
+    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|PaddedConv2D#|FC#)");
 
     std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
 
@@ -143,6 +145,8 @@ static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nb
     {       
         auto linearNode = match.graph->rootNode(); 
 
+        // Log::notice(" SET PARAM QUANTIZER : {} ", linearNode->type());
+
         // TODO : double check this, and use createUniqueName()
         auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);  
         auto quantizerNode = LSQ(signedRange, quantizerName); 
diff --git a/src/recipes/QuantRecipes.cpp b/src/recipes/QuantRecipes.cpp
index 7f01b24..f03eb46 100644
--- a/src/recipes/QuantRecipes.cpp
+++ b/src/recipes/QuantRecipes.cpp
@@ -9,24 +9,13 @@
  *
  ********************************************************************************/
 
-/*
-#include "aidge/data/Tensor.hpp"
-#include "aidge/graph/GraphView.hpp"
-#include "aidge/graph/Node.hpp"
-#include "aidge/scheduler/SequentialScheduler.hpp"
-#include "aidge/scheduler/Scheduler.hpp"
-#include "aidge/utils/Log.hpp"
-
-#include "aidge/operator/Producer.hpp"
-#include "aidge/operator/Mul.hpp"
-#include "aidge/operator/ReLU.hpp"
-#include "aidge/operator/Scaling.hpp"
-*/
 
 #include "aidge/operator/Conv.hpp"
 #include "aidge/operator/BatchNorm.hpp"
 //#include "aidge/quantization/PTQ/PTQ.hpp"
 #include "aidge/recipes/QuantRecipes.hpp"
+#include "aidge/graph/Node.hpp"
+
 
 namespace Aidge 
 {
@@ -55,11 +44,13 @@ void insertBatchNormNodes(std::shared_ptr<GraphView> graphView)
 {
     for (std::shared_ptr<Node> parentNode : graphView->getNodes())
     {
-        if (parentNode->type() == "Conv2D")
+        // TODO : use graph matching
+
+        if (parentNode->type() == "Conv2D" || parentNode->type() == "PaddedConv2D")
         {
-            std::shared_ptr<Conv_Op<2>> convOperator = std::static_pointer_cast<Conv_Op<2>> (parentNode->getOperator());
+            std::shared_ptr<OperatorTensor> convOperator = std::static_pointer_cast<OperatorTensor> (parentNode->getOperator());
             int nbChannels = convOperator->getInput(1)->dims()[0];
-            Log::info(" NB CHANNELS = {} ", nbChannels);
+            Log::notice(" NB CHANNELS = {} ", nbChannels);
 
             std::string batchnormNodeName = makeUniqueName(parentNode->name() + "_BN", graphView);
             std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nbChannels, 1e-5, 0.1, false, batchnormNodeName);
@@ -118,6 +109,7 @@ std::string makeUniqueName(std::string baseName, std::shared_ptr<GraphView> grap
     return newName;
 }
 
+
 void sanitizeNodeNames(std::shared_ptr<GraphView> graphView)
 {
     for (std::shared_ptr<Node> node : graphView->getNodes())
-- 
GitLab


From 16c8b22ba3319d1c0300fd590f91077c173e653a Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Fri, 14 Feb 2025 13:28:57 +0000
Subject: [PATCH 38/60] minor changes

---
 include/aidge/quantization/PTQ/PTQ.hpp | 8 ++++----
 python_binding/pybind_PTQ.cpp          | 4 ++--
 src/QAT/QAT_LSQ.cpp                    | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp
index 4fc38bc..bfe671e 100644
--- a/include/aidge/quantization/PTQ/PTQ.hpp
+++ b/include/aidge/quantization/PTQ/PTQ.hpp
@@ -124,11 +124,11 @@ namespace Aidge {
      * @brief Quantize an already normalized (in term of parameters and activations) network.
      * @param graphView The GraphView to be quantized.
      * @param nbBits The desired number of bits of the quantization.
-     * @param applyRounding Whether to apply the rounding operations or not.
+     * @param noQuant Whether to apply the rounding operations or not.
      * @param optimizeSigns Whether to take account of the IO signs of the operators or not.
      * @param verbose Whether to print the sign map or not.
      */
-    void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, bool applyRounding, bool optimizeSigns, bool verbose);
+    void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, bool noQuant, bool optimizeSigns, bool verbose);
 
     /**
      * @brief Main quantization routine. Performs every step of the quantization pipeline.
@@ -136,12 +136,12 @@ namespace Aidge {
      * @param nbBits The desired number of bits of the quantization.
      * @param inputDataSet The input dataset on which the value ranges are computed.
      * @param clippingMode: Type of the clipping optimization. Can be either 'MAX', 'MSE', 'AA' or 'KL'.
-     * @param applyRounding Whether to apply the rounding operations or not.
+     * @param noQuant Whether to apply the rounding operations or not.
      * @param optimizeSigns Whether to take account of the IO signs of the operators or not.
      * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights.
      * @param verbose Whether to print internal informations about the quantization process.
      */
-    void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool applyRounding, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose);
+    void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose);
 
     /**
      * @brief Compute the weight ranges of every affine node. Provided for debugging purposes.
diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp
index b5193bd..1de7976 100644
--- a/python_binding/pybind_PTQ.cpp
+++ b/python_binding/pybind_PTQ.cpp
@@ -78,7 +78,7 @@ void init_PTQ(py::module &m) {
     :type value_ranges: list of float.
     )mydelimiter");
 
-    m.def("quantize_normalized_network", &quantizeNormalizedNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("no_quant")=false, py::arg("optimize_signs"), py::arg("verbose") = false,
+    m.def("quantize_normalized_network", &quantizeNormalizedNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("no_quantization")=false, py::arg("optimize_signs"), py::arg("verbose") = false,
     R"mydelimiter(
     Quantize an already normalized (in term of parameters and activations) network.
     :param network: The GraphView to be quantized.
@@ -93,7 +93,7 @@ void init_PTQ(py::module &m) {
     :type verbose: bool
     )mydelimiter");
 
-    m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("clipping_mode") = Clipping::MAX , py::arg("no_quantization") = true, py::arg("optimize_signs") = false, py::arg("single_shift") = false,  py::arg("use_cuda") = false, py::arg("verbose") = false,
+    m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("clipping_mode") = Clipping::MAX , py::arg("no_quantization") = false, py::arg("optimize_signs") = false, py::arg("single_shift") = false,  py::arg("use_cuda") = false, py::arg("verbose") = false,
     R"mydelimiter(
     Main quantization routine. Performs every step of the quantization pipeline.
     :param network: The GraphView to be quantized.
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index ff1c44a..da09d62 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -75,7 +75,7 @@ static bool initStepSize(std::shared_ptr<Node> quantizer)
 
     stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor);
 
-    Log::info(" [ INIT STEP SIZE = {} ] ", stepSize);
+    Log::notice(" [ INIT STEP SIZE = {} ] ", stepSize);
 
     return false;
 }
-- 
GitLab


From 22e47ad9fb629f85ed4f1c5fa981c1d195c0201b Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Fri, 14 Feb 2025 13:43:58 +0000
Subject: [PATCH 39/60] use the scalar tensor constructor

---
 src/PTQ/CLE.cpp     | 2 +-
 src/PTQ/PTQ.cpp     | 4 ++--
 src/QAT/QAT_LSQ.cpp | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index 40b9e42..28858d0 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -49,7 +49,7 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
     mulOp.setDataType(tensor->dataType());
     mulOp.setBackend(tensor->backend());
 
-    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling});
+    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(scaling);
     scalingTensor->setDataType(tensor->dataType());
     scalingTensor->setBackend(tensor->backend());
 
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 09b039f..7c29ee0 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -78,7 +78,7 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
     mulOp.setDataType(tensor->dataType());
     mulOp.setBackend(tensor->backend());
 
-    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling});
+    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(scaling);
     scalingTensor->setDataType(tensor->dataType());
     scalingTensor->setBackend(tensor->backend());
 
@@ -932,7 +932,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
                 // Add the coeff producer to the multiplier node
 
                 std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); 
-                std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax});
+                std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(signedMax);
                 coeffProducer->getOperator()->setOutput(0, coeffTensor);
 
                 coeffProducer->getOperator()->setDataType(DataType::Float64);
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index da09d62..6eae077 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -65,7 +65,8 @@ static bool initStepSize(std::shared_ptr<Node> quantizer)
     float inputStd = getTensorStd(quantizerOp->getInput(0));
     float stepSize = 8.0f * (inputStd / (quantizerOp->range().second));
 
-    auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
+    // TODO : use the scalar constructor
+    auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); 
 
     // XXX Manage backend here ?
     stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend());
-- 
GitLab


From 4260a27622bfd2a41dfd420614520b1de288a46c Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Wed, 8 Jan 2025 10:32:06 +0000
Subject: [PATCH 40/60] improve tensor manipulation routines + enhance
 insertCompensationNodes

---
 src/PTQ/CLE.cpp     |  71 +++++++++++++++++------
 src/PTQ/PTQ.cpp     | 137 ++++++++++++++++++++++++++------------------
 src/QAT/QAT_LSQ.cpp |   9 +--
 3 files changed, 137 insertions(+), 80 deletions(-)

diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index 5265d9c..63d3b45 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -24,6 +24,12 @@
 #include "aidge/operator/OperatorTensor.hpp"
 #include "aidge/utils/Log.hpp"
 
+#include "aidge/operator/Mul.hpp"
+#include "aidge/operator/ArgMax.hpp"
+#include "aidge/operator/Abs.hpp"
+#include "aidge/operator/Reshape.hpp"
+#include "aidge/operator/Round.hpp"
+
 namespace Aidge
 {
 
@@ -39,27 +45,58 @@ static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node)
 
 static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
 {
-    // Get the tensor data pointer
-    double * castedTensor = static_cast<double *> (tensor->getImpl()->rawPtr());
-
-    // Rescale the tensor
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        castedTensor[i] *= scaling;
+    auto mulOp = Mul_Op();
+    mulOp.setDataType(tensor->dataType());
+    mulOp.setBackend(tensor->backend());
+
+    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling});
+    scalingTensor->setDataType(tensor->dataType());
+    scalingTensor->setBackend(tensor->backend());
+
+    mulOp.associateInput(0, tensor);
+    mulOp.associateInput(1, scalingTensor);
+
+    mulOp.forward();
+    
+    auto outTensor = mulOp.getOutput(0);
+    *tensor = *outTensor;
+    //tensor->copyCast(*outTensor);
 }
 
+// TODO : make the retreival of argmax values backend independant (refCastFrom)
 static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor)
 {
-    // Get the tensor data pointer and edit it
-    double * castedTensor = static_cast<double*> (tensor->getImpl()->rawPtr());
-
-    // Get the tensor absolute max value
-    double maxValue = 0.0;
-    for(std::size_t i = 0; i < tensor->size(); ++i) {
-        if(std::fabs(castedTensor[i]) > maxValue) {
-            maxValue = std::fabs(castedTensor[i]);
-        }
-    }
-    return maxValue;
+    // get the abs tensor
+
+    std::shared_ptr<Tensor> absTensor = std::make_shared<Tensor>(tensor->abs());
+
+    // flatten the abs tensor
+
+    std::int64_t nbElement = tensor->size();
+
+    auto reshapeOp = Reshape_Op({nbElement});
+    reshapeOp.setDataType(tensor->dataType());
+    reshapeOp.setBackend(tensor->backend());
+
+    reshapeOp.associateInput(0, absTensor);
+    reshapeOp.forward();
+    std::shared_ptr<Tensor> flatTensor = reshapeOp.getOutput(0);
+
+    // Get the argmax
+
+    auto argmaxOp = ArgMax_Op(0, true, false);
+    argmaxOp.setDataType(tensor->dataType());
+    argmaxOp.setBackend(tensor->backend());
+
+    argmaxOp.associateInput(0, flatTensor);
+    argmaxOp.forward();
+    std::shared_ptr<Tensor> argmaxTensor = argmaxOp.getOutput(0);
+
+    // Return the max
+
+    int maxIndex = std::round(argmaxTensor->get<double>(0));
+
+    return flatTensor->get<double>(maxIndex);
 }
 
 void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta)
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 0e26313..6e0b29e 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -28,6 +28,12 @@
 #include "aidge/operator/BatchNorm.hpp"
 #include "aidge/operator/Conv.hpp"
 
+#include "aidge/operator/ArgMax.hpp"
+#include "aidge/operator/Abs.hpp"
+#include "aidge/operator/Reshape.hpp"
+#include "aidge/operator/Round.hpp"
+
+
 #include "aidge/recipes/Recipes.hpp"
 #include "aidge/recipes/QuantRecipes.hpp"
 
@@ -66,51 +72,75 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView)
     return true;
 }
 
-static void fillTensor(std::shared_ptr<Tensor> tensor, double value)
+static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
 {
-    // Get the tensor data pointer
-    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
+    auto mulOp = Mul_Op();
+    mulOp.setDataType(tensor->dataType());
+    mulOp.setBackend(tensor->backend());
 
-    // Fill the tensor
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        castedTensor[i] = value;
-}
+    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling});
+    scalingTensor->setDataType(tensor->dataType());
+    scalingTensor->setBackend(tensor->backend());
 
-static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
-{
-    // Get the tensor data pointer
-    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
+    mulOp.associateInput(0, tensor);
+    mulOp.associateInput(1, scalingTensor);
 
-    // Rescale the tensor
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        castedTensor[i] *= scaling;
+    mulOp.forward();
+    
+    auto outTensor = mulOp.getOutput(0);
+    *tensor = *outTensor;
 }
 
 static void roundTensor(std::shared_ptr<Tensor> tensor)
 {
-    // Get the tensor data pointer
-    double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr());
+    auto roundOp = Round_Op();
+    roundOp.setDataType(tensor->dataType());
+    roundOp.setBackend(tensor->backend());
 
-    // Rescale the tensor
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        castedTensor[i] = std::nearbyint(castedTensor[i]);//Round
+    roundOp.associateInput(0, tensor);
+    roundOp.forward();
+    
+    auto outTensor = roundOp.getOutput(0);
+    *tensor = *outTensor;
 }
 
-static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor)
+// TODO : make the retreival of argmax values backend independant (refCastFrom)
+static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor)
 {
-    // Get the tensor data pointer and edit it
-    double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr());
-
-    // Get the tensor absolute max value
-    double maxValue = 0.0f;
-    for(std::size_t i = 0; i < tensor->size(); ++i) {
-        if(std::fabs(castedTensor[i]) > maxValue) {
-            maxValue = std::fabs(castedTensor[i]);
-        }
-    }
-    return maxValue;
+    // get the abs tensor
+
+    std::shared_ptr<Tensor> absTensor = std::make_shared<Tensor>(tensor->abs());
+
+    // flatten the abs tensor
+
+    std::int64_t nbElement = tensor->size();
+
+    auto reshapeOp = Reshape_Op({nbElement});
+    reshapeOp.setDataType(tensor->dataType());
+    reshapeOp.setBackend(tensor->backend());
+
+    reshapeOp.associateInput(0, absTensor);
+    reshapeOp.forward();
+    std::shared_ptr<Tensor> flatTensor = reshapeOp.getOutput(0);
+
+    // Get the argmax
+
+    auto argmaxOp = ArgMax_Op(0, true, false);
+    argmaxOp.setDataType(tensor->dataType());
+    argmaxOp.setBackend(tensor->backend());
+
+    argmaxOp.associateInput(0, flatTensor);
+    argmaxOp.forward();
+    std::shared_ptr<Tensor> argmaxTensor = argmaxOp.getOutput(0);
+
+    // Return the max
+
+    int maxIndex = std::round(argmaxTensor->get<double>(0));
+
+    return flatTensor->get<double>(maxIndex);
 }
 
+
 // TODO : pass nodeVector by reference ...
 static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::shared_ptr<Node>> nodeVector, std::string nodeType)
 {
@@ -876,50 +906,42 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
-        // A merging node is always followed by a scaling node at this point ...
+        // A merging node is always followed by a Quantizer node at this point
 
         if (node->type() == "Quantizer")
         {   
+            // check if the Quantizer is a residual one, and insert a compensation node if so ...
+
             bool prevNodeIsForking = ((node->getParent(0))->getChildren().size() > 1);
             bool prevNodeIsAffine = isAffine(node->getParent(0));
             bool insertNode = prevNodeIsForking || !prevNodeIsAffine;
 
             if (insertNode)
             {
-                // create and insert the multplicative node
+                // create and insert the multplicative node before the Quantizer
 
                 std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView);
                 std::shared_ptr<Node> mulNode = Mul(mulNodeName);
-
                 mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode)
                 mulNode->getOperator()->setBackend("cpu");
 
                 graphView->insertParent(node, mulNode, 0, 0, 0);
 
-                // create and insert the producer node
-
-                std::shared_ptr<Tensor> inputTensor = std::static_pointer_cast<Tensor> (mulNode->getOperator()->getRawInput(0));
-                std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>();
+                // Add the coeff producer to the multiplier node
 
-                coeffTensor->setDataType(DataType::Float64); // getDataType(parentNode)
-                coeffTensor->setBackend("cpu"); 
+                std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); 
+                std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax});
+                coeffProducer->getOperator()->setOutput(0, coeffTensor);
 
-                coeffTensor->resize(inputTensor->dims());
-                fillTensor(coeffTensor, 1); 
+                coeffProducer->getOperator()->setDataType(DataType::Float64);
+                coeffProducer->getOperator()->setBackend("cpu"); 
 
-                std::shared_ptr<Node> producerNode = Producer(coeffTensor, makeUniqueName("coeff", graphView));
-                producerNode->addChild(mulNode);
-                graphView->add(producerNode);
+                graphView->add(coeffProducer); // needed ?
 
-                // rescale the coeffs and edit scaling factor
+                // Adapt the scaling factor value accordingly
 
-                fillTensor(coeffTensor, signedMax);
-
-                double currScalingFactor = getScalingFactor(node); // XXX bad naming !
+                double currScalingFactor = getScalingFactor(node); 
                 updateScalingFactor(node, currScalingFactor / signedMax);
-
-                // TODO : double check this !!!
-                //std::cout << getTensorAbsoluteMax(coeffTensor) << std::endl;
             }
         }
     }
@@ -931,7 +953,8 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
 
     for (std::shared_ptr<Node> node : nodeVector)
     {
-        // Use A meatoperator of type Scaling of MulCompensation instead
+        // TODO : use Compensation nodes instead of Mul nodes
+
         if (isAffine(node) || (node->type() == "Mul"))
         {
             std::shared_ptr<Node> scalingNode = (*node->getChildren().begin());
@@ -940,7 +963,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
 
             double approx = std::pow(2, std::ceil(std::log2(base)));
 
-            updateScalingFactor(scalingNode,approx);
+            updateScalingFactor(scalingNode, approx);
 
             double ratio = base / approx;
 
@@ -954,7 +977,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
                 std::shared_ptr<Tensor> biasTensor = getBiasTensor(node);
                 rescaleTensor(biasTensor, ratio);
                 if (!noQuant)
-                roundTensor(biasTensor);
+                    roundTensor(biasTensor);
             }
         }
     }
@@ -1058,8 +1081,8 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     if (useCuda)
         graphView->setBackend("cuda");
 
-    //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
-    //printScalingFactors(graphView);
+    std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
+    printScalingFactors(graphView);
 
     Log::info(" Reseting the scheduler ...");
     SequentialScheduler scheduler(graphView);
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index 9b51e84..a09dbb2 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -89,19 +89,16 @@ void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbB
 static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
 {
     auto backend = tensor->backend();
+
     if (backend == "cuda")
         tensor->setBackend("cpu");
 
-    float acc = 0;
-    float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr());
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        acc += std::abs(castedTensor[i]);
-    acc /= static_cast<float> (tensor->size());
+    float value = (*tensor).abs().mean().get<float>(0);
 
     if (backend == "cuda")
         tensor->setBackend("cuda");
 
-    return acc;
+    return value;
 }
 
 static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda)
-- 
GitLab


From 8a91f5210a0dc0be26a0491bcd39420bf2d9f1fe Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Wed, 8 Jan 2025 10:37:27 +0000
Subject: [PATCH 41/60] comment verbose

---
 src/PTQ/PTQ.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 6e0b29e..7f750f0 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -1081,8 +1081,8 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     if (useCuda)
         graphView->setBackend("cuda");
 
-    std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
-    printScalingFactors(graphView);
+    //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
+    //printScalingFactors(graphView);
 
     Log::info(" Reseting the scheduler ...");
     SequentialScheduler scheduler(graphView);
-- 
GitLab


From f1323476ae0d66a18efb299aeb04a398d09515c7 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Wed, 8 Jan 2025 16:27:21 +0000
Subject: [PATCH 42/60] minor change

---
 src/PTQ/PTQ.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 7f750f0..3677ae0 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -215,6 +215,8 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView)
 {
     removeFlatten(graphView);
 
+    sanitizeNodeNames(graphView);
+
     bool containsBatchNorm = false;
     std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView);
 
@@ -1078,6 +1080,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     //printScalingFactors(graphView);
 
     setupDataType(graphView, inputDataSet, initialDataType);
+
     if (useCuda)
         graphView->setBackend("cuda");
 
-- 
GitLab


From 878bb4cfda4bacffb963828ebbad2456e3a702cc Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Mon, 13 Jan 2025 13:01:34 +0000
Subject: [PATCH 43/60] rework the LSQ code

---
 include/aidge/quantization/QAT/QAT_LSQ.hpp |  18 +-
 python_binding/pybind_QAT_LSQ.cpp          |   5 +-
 src/QAT/QAT_LSQ.cpp                        | 204 +++++++--------------
 3 files changed, 77 insertions(+), 150 deletions(-)

diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp
index a44c71b..9827ee2 100644
--- a/include/aidge/quantization/QAT/QAT_LSQ.hpp
+++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp
@@ -22,22 +22,14 @@ namespace Aidge {
 namespace QuantLSQ {
 
 /**
- * @brief Insert the LSQ quantizer nodes in a given GraphView
- * @param graphView The GraphView containing the graph to quantize.
+ * @brief Given a GraphView with parameters properly initialized, insert
+ * the LSQ quantizer nodes, and setup the adjustment their step-sizes.
+ * @param graphView The GraphView containing the network to quantize.
  * @param nbBits Number of quantization bits.
- * @param span Fixed output span of the quantizers.
  */
-void insertQuantizers(std::shared_ptr<GraphView> graphView, std::size_t nbBits, float step_size);
+void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits);
 
-/**
- * @brief Given a GraphView with parameters properly initialized and some calibration data,
- * insert the LSQ quantizer nodes, and adjust their step-sizes.
- * @param graphView The GraphView containing the graph to quantize.
- * @param nbBits Number of quantization bits.
- * @param calibrationData Calibration data used to adjust the spans.
- * @param scale Multiplicative constant applied to the spans.
- */
-void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, std::size_t nbBits, std::shared_ptr<Tensor> calibrationData);
+void devLSQ(std::shared_ptr<Tensor> tensor);
 
 }  // namespace QuantLSQ
 }  // namespace Aidge
diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp
index 206985e..0b9fcc2 100644
--- a/python_binding/pybind_QAT_LSQ.cpp
+++ b/python_binding/pybind_QAT_LSQ.cpp
@@ -23,8 +23,9 @@ void init_QAT_LSQ(py::module &m) {
 
     auto mQuantLSQ = m.def_submodule("lsq");
 
-    mQuantLSQ.def("insert_quantizers", &QuantLSQ::insertQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("step_size"));
+    mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits"));
+
+    mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor"));
 
-    mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data"));
 }
 } // namespace Aidge
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index a09dbb2..04f2027 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -23,7 +23,42 @@
 
 namespace Aidge {
 
-void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize)
+static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
+{
+    auto valueTensor = (*tensor).abs().mean();
+    std::shared_ptr<Tensor> fallback;
+    const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu");
+    return localTensor.get<float>(0);
+}
+
+// INIT THE STEP SIZE OF A QUANTIZER NODE
+
+static bool initStepSize(std::shared_ptr<Node> quantizer)
+{
+    const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator());
+
+    float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0));
+
+    float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second));
+
+    auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
+
+    // XXX Manage backend here ?
+    stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend());
+    stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType());
+
+    auto stepSizeProducer = quantizer->getParent(1);
+
+    stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor);
+
+    std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl;
+
+    return false;
+}
+
+// INPUT QUANTIZERS INSERTION
+
+static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
     const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
 
@@ -34,177 +69,76 @@ void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbB
         std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
         std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1};
 
-        // INPUT QUANTIZERS INSERTION
+        // Create the input quantizer node
 
-        // TODO : double check this, and use createUniqueName()
-        auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView);  
-        auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName);
+        auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView);  
+        auto quantizerNode = LSQ(signedRange, quantizerName);
 
-        // Set the step size
+        // Init the step-size using the node call stack
 
-        auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator();
-        auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        inputStepSizeOp->setOutput(0, inputStepSizeTensor);
+        quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); });
 
         // Absorb the ReLU when possible ...
 
-        // XXX is this safe ???
-        bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); 
-        // bool nodeHasParent = (linearNode->getParents().size() != 0);
+        bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]);  // XXX is this safe ?
 
         if (nodeHasParent) {
             auto parentNode = linearNode->getParents()[0];
             if (parentNode->type() == "ReLU") {
-                auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator());
-                inputQuantizerOp->range() = unsignedRange;
+                auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator());
+                quantizerOp->range() = unsignedRange;
                 graphView->replace({parentNode}, {}); 
             }
         }
 
-        // We need to handle the case where the linear node is the first one ...
+        // Insert the quantizer in the graphView ...
+        // (We need to handle the case where the linear node is the first one)
 
         if (nodeHasParent) {
-            graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0);
+            graphView->insertParent(linearNode, quantizerNode, 0, 0, 0);
         } else {
-            inputQuantizerNode->addChild(graphView);
-            graphView->add(inputQuantizerNode);
+            quantizerNode->addChild(graphView);
+            graphView->add(quantizerNode);
         }
-
-        // PARAM QUANTIZERS INSERTION
-
-        // TODO : double check this, and use createUniqueName()
-        auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);  
-        auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); 
-        graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0);
-
-        // Set the step size
-
-        auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator();
-        auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        paramStepSizeOp->setOutput(0, paramStepSizeTensor);
     }
-
 }
 
-static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
-{
-    auto backend = tensor->backend();
-
-    if (backend == "cuda")
-        tensor->setBackend("cpu");
-
-    float value = (*tensor).abs().mean().get<float>(0);
-
-    if (backend == "cuda")
-        tensor->setBackend("cuda");
-
-    return value;
-}
+// PARAM QUANTIZERS INSERTION
 
-static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda)
+static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
-    // Propagate the calibration tensor
+    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
 
-    SequentialScheduler scheduler(graphView);
-    scheduler.resetScheduling();
-    scheduler.forward(true, {calibrationData});
+    std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
 
-    // Store the input tensor statistics
+    for (const auto& match : matches) 
+    {       
+        auto linearNode = match.graph->rootNode(); 
 
-    if (useCuda)
-        graphView->setBackend("cpu"); 
+        // TODO : double check this, and use createUniqueName()
+        auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);  
+        auto quantizerNode = LSQ(signedRange, quantizerName); 
 
-    std::map<std::string, float> inputStats;
-    for (auto node : graphView->getNodes())
-    {
-        if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
-        {
-            const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
-            float inputAbsMean = getTensorAbsMean(op->getInput(0));
-            inputStats.insert(std::make_pair(node->name(), inputAbsMean));
-            fmt::println("{} -> {}", node->name(), inputAbsMean);
-        }
-    }
+        // Init the step-size using the node call stack
 
-    if (useCuda)
-        graphView->setBackend("cuda");
+        quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); });
 
-    return inputStats;
-}
+        // Insert the quantizer in the graphView
 
-static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda)
-{
-    if (useCuda)
-        graphView->setBackend("cpu");
-
-    std::map<std::string, float> paramStats;
-    for (auto node : graphView->getNodes())
-    {
-        if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
-        {
-            const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
-            float paramAbsMean = getTensorAbsMean(op->getInput(1));
-            paramStats.insert(std::make_pair(node->name(), paramAbsMean));
-            fmt::println("{} -> {}", node->name(), paramAbsMean);
-        }
+        graphView->insertParent(linearNode, quantizerNode, 1, 0, 0);
     }
-    
-    if (useCuda)
-        graphView->setBackend("cuda");
-
-    return paramStats;
 }
 
-static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats)
+void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
-    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
-
-    for (const auto& match : matches) 
-    {
-        auto linearNode = match.graph->rootNode();
-
-        // INPUT QUANTIZERS STEP-SIZES
-
-        auto inputQuantNode = linearNode->getParent(0);
-        auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator());
-
-        float absMean = inputStats[linearNode->name()];
-        float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second));
-
-        auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator();
-        // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
-        auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        inputStepSizeOp->setOutput(0, inputStepSizeTensor);
-
-        // PARAM QUANTIZERS STEP-SIZES
-
-        auto paramQuantNode = linearNode->getParent(1);
-        auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator());
-
-        absMean = paramStats[linearNode->name()];
-        stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second));
-
-        auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator();
-        // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
-        auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        paramStepSizeOp->setOutput(0, paramStepSizeTensor);
-    }
+    setupInputQuantizers(graphView, nbBits);
+    setupParamQuantizers(graphView, nbBits);
 }
 
-void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData)
+void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor)
 {
-    bool useCuda = (calibrationData->backend() == "cuda");
-
-    // Collect the tensor statisics
-    auto inputStats = collectInputStats(graphView, calibrationData, useCuda);
-
-    auto paramStats = collectParamStats(graphView, useCuda);
-
-    // Insert the quantizers
-    insertQuantizers(graphView, nbBits, 1.0);
-
-    // Adjust the quantizers step-sizes
-    adjustQuantizersStepSizes(graphView, inputStats, paramStats);
+    float mean = (tensor->mean()).get<float> (0);
+    std::cout << " MEAN  = " << mean << std::endl;
 }
 
 }
\ No newline at end of file
-- 
GitLab


From 712bdd8a6fe9699e5f8abf4312a3d2cfff081ce2 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Tue, 21 Jan 2025 12:28:19 +0000
Subject: [PATCH 44/60] rework the ReLU handling

---
 src/QAT/QAT_LSQ.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index 04f2027..f9ce554 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -82,13 +82,19 @@ static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nb
 
         bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]);  // XXX is this safe ?
 
-        if (nodeHasParent) {
-            auto parentNode = linearNode->getParents()[0];
-            if (parentNode->type() == "ReLU") {
+        if (nodeHasParent) 
+        {
+            bool allParentsAreReLU = true;
+            for (auto parentNode : linearNode->getParents())
+                if (parentNode->type() != "ReLU")
+                    allParentsAreReLU = false;
+
+            if (allParentsAreReLU) {
                 auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator());
                 quantizerOp->range() = unsignedRange;
-                graphView->replace({parentNode}, {}); 
             }
+
+            // TODO : remove the ReLUs when possible
         }
 
         // Insert the quantizer in the graphView ...
-- 
GitLab


From 352026d34e883b39e91b622865cbec36c6df48d5 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Tue, 21 Jan 2025 15:14:32 +0000
Subject: [PATCH 45/60] revert changes for debug

---
 include/aidge/quantization/QAT/QAT_LSQ.hpp |   6 +-
 python_binding/pybind_QAT_LSQ.cpp          |   4 +-
 src/QAT/QAT_LSQ.cpp                        | 199 ++++++++++++++++++++-
 3 files changed, 204 insertions(+), 5 deletions(-)

diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp
index 9827ee2..4dc7048 100644
--- a/include/aidge/quantization/QAT/QAT_LSQ.hpp
+++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp
@@ -27,9 +27,11 @@ namespace QuantLSQ {
  * @param graphView The GraphView containing the network to quantize.
  * @param nbBits Number of quantization bits.
  */
-void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits);
 
-void devLSQ(std::shared_ptr<Tensor> tensor);
+//void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits);
+//void devLSQ(std::shared_ptr<Tensor> tensor);
+
+void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData);
 
 }  // namespace QuantLSQ
 }  // namespace Aidge
diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp
index 0b9fcc2..cb5b7f0 100644
--- a/python_binding/pybind_QAT_LSQ.cpp
+++ b/python_binding/pybind_QAT_LSQ.cpp
@@ -23,9 +23,11 @@ void init_QAT_LSQ(py::module &m) {
 
     auto mQuantLSQ = m.def_submodule("lsq");
 
+/*
     mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits"));
-
     mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor"));
+*/
+    mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data"));
 
 }
 } // namespace Aidge
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index f9ce554..e52bafb 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -21,6 +21,201 @@
 #include "aidge/graph/Matching.hpp"
 #include "aidge/recipes/QuantRecipes.hpp"
 
+
+namespace Aidge {
+
+static void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize)
+{
+    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
+
+    for (const auto& match : matches) 
+    {
+        auto linearNode = match.graph->rootNode(); 
+
+        std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
+        std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1};
+
+        // INPUT QUANTIZERS INSERTION
+
+        // TODO : double check this, and use createUniqueName()
+        auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView);  
+        auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName);
+
+        // Set the step size
+
+        auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator();
+        auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
+        inputStepSizeOp->setOutput(0, inputStepSizeTensor);
+
+        // Absorb the ReLU when possible ...
+
+        // XXX is this safe ???
+        bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); 
+        // bool nodeHasParent = (linearNode->getParents().size() != 0);
+
+        if (nodeHasParent) {
+            auto parentNode = linearNode->getParents()[0];
+            if (parentNode->type() == "ReLU") {
+                auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator());
+                inputQuantizerOp->range() = unsignedRange;
+                graphView->replace({parentNode}, {}); 
+            }
+        }
+
+        // We need to handle the case where the linear node is the first one ...
+
+        if (nodeHasParent) {
+            graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0);
+        } else {
+            inputQuantizerNode->addChild(graphView);
+            graphView->add(inputQuantizerNode);
+        }
+
+        // PARAM QUANTIZERS INSERTION
+
+        // TODO : double check this, and use createUniqueName()
+        auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);  
+        auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); 
+        graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0);
+
+        // Set the step size
+
+        auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator();
+        auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
+        paramStepSizeOp->setOutput(0, paramStepSizeTensor);
+    }
+
+}
+
+static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
+{
+    auto backend = tensor->backend();
+    if (backend == "cuda")
+        tensor->setBackend("cpu");
+
+    float acc = 0;
+    float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr());
+    for(std::size_t i = 0; i < tensor->size(); i++)
+        acc += std::abs(castedTensor[i]);
+    acc /= static_cast<float> (tensor->size());
+
+    if (backend == "cuda")
+        tensor->setBackend("cuda");
+
+    return acc;
+}
+
+static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda)
+{
+    // Propagate the calibration tensor
+
+    SequentialScheduler scheduler(graphView);
+    scheduler.resetScheduling();
+    scheduler.forward(true, {calibrationData});
+
+    // Store the input tensor statistics
+
+    if (useCuda)
+        graphView->setBackend("cpu"); 
+
+    std::map<std::string, float> inputStats;
+    for (auto node : graphView->getNodes())
+    {
+        if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
+        {
+            const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
+            float inputAbsMean = getTensorAbsMean(op->getInput(0));
+            inputStats.insert(std::make_pair(node->name(), inputAbsMean));
+            std::cout << node->name() << " -> " << inputAbsMean << std::endl;
+        }
+    }
+
+    if (useCuda)
+        graphView->setBackend("cuda");
+
+    return inputStats;
+}
+
+static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda)
+{
+    if (useCuda)
+        graphView->setBackend("cpu");
+
+    std::map<std::string, float> paramStats;
+    for (auto node : graphView->getNodes())
+    {
+        if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
+        {
+            const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
+            float paramAbsMean = getTensorAbsMean(op->getInput(1));
+            paramStats.insert(std::make_pair(node->name(), paramAbsMean));
+            std::cout << node->name() << " -> " << paramAbsMean << std::endl;
+        }
+    }
+    
+    if (useCuda)
+        graphView->setBackend("cuda");
+
+    return paramStats;
+}
+
+static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats)
+{
+    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
+
+    for (const auto& match : matches) 
+    {
+        auto linearNode = match.graph->rootNode();
+
+        // INPUT QUANTIZERS STEP-SIZES
+
+        auto inputQuantNode = linearNode->getParent(0);
+        auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator());
+
+        float absMean = inputStats[linearNode->name()];
+        float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second));
+
+        auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator();
+        // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
+        auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
+        inputStepSizeOp->setOutput(0, inputStepSizeTensor);
+
+        // PARAM QUANTIZERS STEP-SIZES
+
+        auto paramQuantNode = linearNode->getParent(1);
+        auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator());
+
+        absMean = paramStats[linearNode->name()];
+        stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second));
+
+        auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator();
+        // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
+        auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
+        paramStepSizeOp->setOutput(0, paramStepSizeTensor);
+    }
+}
+
+void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData)
+{
+    bool useCuda = (calibrationData->backend() == "cuda");
+
+    // Collect the tensor statisics
+    auto inputStats = collectInputStats(graphView, calibrationData, useCuda);
+
+    auto paramStats = collectParamStats(graphView, useCuda);
+
+    // Insert the quantizers
+    insertQuantizers(graphView, nbBits, 1.0);
+
+    // Adjust the quantizers step-sizes
+    adjustQuantizersStepSizes(graphView, inputStats, paramStats);
+}
+}
+
+
+/*
+    XXX XXX XXX
+
 namespace Aidge {
 
 static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
@@ -146,5 +341,5 @@ void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor)
     float mean = (tensor->mean()).get<float> (0);
     std::cout << " MEAN  = " << mean << std::endl;
 }
-
-}
\ No newline at end of file
+}
+*/
\ No newline at end of file
-- 
GitLab


From e6d14185f40985e5f79ecfcbd06bd81cc0c8255e Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Wed, 22 Jan 2025 12:47:59 +0000
Subject: [PATCH 46/60] re-apply the LSQ changes

---
 include/aidge/quantization/QAT/QAT_LSQ.hpp |   6 +-
 python_binding/pybind_QAT_LSQ.cpp          |   6 +-
 src/QAT/QAT_LSQ.cpp                        | 258 ++++++++++-----------
 3 files changed, 133 insertions(+), 137 deletions(-)

diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp
index 4dc7048..68ce8e7 100644
--- a/include/aidge/quantization/QAT/QAT_LSQ.hpp
+++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp
@@ -28,10 +28,8 @@ namespace QuantLSQ {
  * @param nbBits Number of quantization bits.
  */
 
-//void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits);
-//void devLSQ(std::shared_ptr<Tensor> tensor);
-
-void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData);
+void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits);
+void devLSQ(std::shared_ptr<Tensor> tensor);
 
 }  // namespace QuantLSQ
 }  // namespace Aidge
diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp
index cb5b7f0..0dd4267 100644
--- a/python_binding/pybind_QAT_LSQ.cpp
+++ b/python_binding/pybind_QAT_LSQ.cpp
@@ -23,11 +23,11 @@ void init_QAT_LSQ(py::module &m) {
 
     auto mQuantLSQ = m.def_submodule("lsq");
 
-/*
+
     mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits"));
     mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor"));
-*/
-    mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data"));
+
+    //mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data"));
 
 }
 } // namespace Aidge
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index e52bafb..66e8ec7 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -21,6 +21,134 @@
 #include "aidge/graph/Matching.hpp"
 #include "aidge/recipes/QuantRecipes.hpp"
 
+namespace Aidge {
+
+static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
+{
+    auto valueTensor = (*tensor).abs().mean();
+    std::shared_ptr<Tensor> fallback;
+    const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu");
+    return localTensor.get<float>(0);
+}
+
+// INIT THE STEP SIZE OF A QUANTIZER NODE
+
+static bool initStepSize(std::shared_ptr<Node> quantizer)
+{
+    const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator());
+
+    float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0));
+
+    float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second));
+
+    auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
+
+    // XXX Manage backend here ?
+    stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend());
+    stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType());
+
+    auto stepSizeProducer = quantizer->getParent(1);
+
+    stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor);
+
+    std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl;
+
+    return false;
+}
+
+// INPUT QUANTIZERS INSERTION
+
+static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
+{
+    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
+
+    for (const auto& match : matches) 
+    {
+        auto linearNode = match.graph->rootNode(); 
+
+        std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
+        std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1};
+
+        // Create the input quantizer node
+
+        auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView);  
+        auto quantizerNode = LSQ(signedRange, quantizerName);
+
+        // Init the step-size using the node call stack
+
+        quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); });
+
+        // Absorb the ReLU when possible ...
+
+        bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]);  // XXX is this safe ?
+
+        if (nodeHasParent) 
+        {
+            bool allParentsAreReLU = true;
+            for (auto parentNode : linearNode->getParents())
+                if (parentNode->type() != "ReLU")
+                    allParentsAreReLU = false;
+
+            if (allParentsAreReLU) {
+                auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator());
+                quantizerOp->range() = unsignedRange;
+            }
+
+            // TODO : remove the ReLUs when possible
+        }
+
+        // Insert the quantizer in the graphView ...
+        // (We need to handle the case where the linear node is the first one)
+
+        if (nodeHasParent) {
+            graphView->insertParent(linearNode, quantizerNode, 0, 0, 0);
+        } else {
+            quantizerNode->addChild(graphView);
+            graphView->add(quantizerNode);
+        }
+    }
+}
+
+// PARAM QUANTIZERS INSERTION
+
+static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
+{
+    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
+
+    std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
+
+    for (const auto& match : matches) 
+    {       
+        auto linearNode = match.graph->rootNode(); 
+
+        // TODO : double check this, and use createUniqueName()
+        auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);  
+        auto quantizerNode = LSQ(signedRange, quantizerName); 
+
+        // Init the step-size using the node call stack
+
+        quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); });
+
+        // Insert the quantizer in the graphView
+
+        graphView->insertParent(linearNode, quantizerNode, 1, 0, 0);
+    }
+}
+
+void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
+{
+    setupInputQuantizers(graphView, nbBits);
+    setupParamQuantizers(graphView, nbBits);
+}
+
+void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor)
+{
+    float mean = (tensor->mean()).get<float> (0);
+    std::cout << " MEAN  = " << mean << std::endl;
+}
+}
+
+/*
 
 namespace Aidge {
 
@@ -212,134 +340,4 @@ void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, siz
 }
 }
 
-
-/*
-    XXX XXX XXX
-
-namespace Aidge {
-
-static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
-{
-    auto valueTensor = (*tensor).abs().mean();
-    std::shared_ptr<Tensor> fallback;
-    const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu");
-    return localTensor.get<float>(0);
-}
-
-// INIT THE STEP SIZE OF A QUANTIZER NODE
-
-static bool initStepSize(std::shared_ptr<Node> quantizer)
-{
-    const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator());
-
-    float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0));
-
-    float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second));
-
-    auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-
-    // XXX Manage backend here ?
-    stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend());
-    stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType());
-
-    auto stepSizeProducer = quantizer->getParent(1);
-
-    stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor);
-
-    std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl;
-
-    return false;
-}
-
-// INPUT QUANTIZERS INSERTION
-
-static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
-{
-    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
-
-    for (const auto& match : matches) 
-    {
-        auto linearNode = match.graph->rootNode(); 
-
-        std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
-        std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1};
-
-        // Create the input quantizer node
-
-        auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView);  
-        auto quantizerNode = LSQ(signedRange, quantizerName);
-
-        // Init the step-size using the node call stack
-
-        quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); });
-
-        // Absorb the ReLU when possible ...
-
-        bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]);  // XXX is this safe ?
-
-        if (nodeHasParent) 
-        {
-            bool allParentsAreReLU = true;
-            for (auto parentNode : linearNode->getParents())
-                if (parentNode->type() != "ReLU")
-                    allParentsAreReLU = false;
-
-            if (allParentsAreReLU) {
-                auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator());
-                quantizerOp->range() = unsignedRange;
-            }
-
-            // TODO : remove the ReLUs when possible
-        }
-
-        // Insert the quantizer in the graphView ...
-        // (We need to handle the case where the linear node is the first one)
-
-        if (nodeHasParent) {
-            graphView->insertParent(linearNode, quantizerNode, 0, 0, 0);
-        } else {
-            quantizerNode->addChild(graphView);
-            graphView->add(quantizerNode);
-        }
-    }
-}
-
-// PARAM QUANTIZERS INSERTION
-
-static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
-{
-    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
-
-    std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
-
-    for (const auto& match : matches) 
-    {       
-        auto linearNode = match.graph->rootNode(); 
-
-        // TODO : double check this, and use createUniqueName()
-        auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);  
-        auto quantizerNode = LSQ(signedRange, quantizerName); 
-
-        // Init the step-size using the node call stack
-
-        quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); });
-
-        // Insert the quantizer in the graphView
-
-        graphView->insertParent(linearNode, quantizerNode, 1, 0, 0);
-    }
-}
-
-void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
-{
-    setupInputQuantizers(graphView, nbBits);
-    setupParamQuantizers(graphView, nbBits);
-}
-
-void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor)
-{
-    float mean = (tensor->mean()).get<float> (0);
-    std::cout << " MEAN  = " << mean << std::endl;
-}
-}
 */
\ No newline at end of file
-- 
GitLab


From 7307439f9a8e33c919ddf9372538c70aa3d1ff0e Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Mon, 27 Jan 2025 13:04:58 +0000
Subject: [PATCH 47/60] refactor the LSQ code

---
 include/aidge/quantization/QAT/QAT_LSQ.hpp |   1 -
 python_binding/pybind_QAT_LSQ.cpp          |   5 -
 src/QAT/QAT_LSQ.cpp                        | 235 +++------------------
 3 files changed, 30 insertions(+), 211 deletions(-)

diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp
index 68ce8e7..922187a 100644
--- a/include/aidge/quantization/QAT/QAT_LSQ.hpp
+++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp
@@ -29,7 +29,6 @@ namespace QuantLSQ {
  */
 
 void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits);
-void devLSQ(std::shared_ptr<Tensor> tensor);
 
 }  // namespace QuantLSQ
 }  // namespace Aidge
diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp
index 0dd4267..4bba3b6 100644
--- a/python_binding/pybind_QAT_LSQ.cpp
+++ b/python_binding/pybind_QAT_LSQ.cpp
@@ -23,11 +23,6 @@ void init_QAT_LSQ(py::module &m) {
 
     auto mQuantLSQ = m.def_submodule("lsq");
 
-
     mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits"));
-    mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor"));
-
-    //mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data"));
-
 }
 } // namespace Aidge
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index 66e8ec7..80e8a05 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -21,25 +21,50 @@
 #include "aidge/graph/Matching.hpp"
 #include "aidge/recipes/QuantRecipes.hpp"
 
-namespace Aidge {
+
+namespace Aidge 
+{
 
 static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
 {
+    //std::cout << " GET TENSOR ABS MEAN " << std::endl;
     auto valueTensor = (*tensor).abs().mean();
     std::shared_ptr<Tensor> fallback;
     const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu");
     return localTensor.get<float>(0);
 }
 
+static float getTensorStd(std::shared_ptr<Tensor> tensor)
+{
+    auto valueTensor = (*tensor);
+    
+    auto skewedTensor = valueTensor - valueTensor.mean();
+    auto squaredTensor = skewedTensor * skewedTensor;
+    auto varianceTensor = squaredTensor.mean();
+
+    std::shared_ptr<Tensor> fallback;
+    auto localTensor = varianceTensor.refCastFrom(fallback, DataType::Float32, "cpu");
+    
+    float variance = localTensor.get<float>(0);
+    return std::sqrt(variance);
+}
+
+
 // INIT THE STEP SIZE OF A QUANTIZER NODE
 
 static bool initStepSize(std::shared_ptr<Node> quantizer)
 {
     const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator());
 
-    float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0));
+    // This formula is the one proposed in the paper ...
+
+    // float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0));
+    // float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second));
 
-    float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second));
+    // .. but this formula seems to work better !!!
+
+    float inputStd = getTensorStd(quantizerOp->getInput(0));
+    float stepSize = 8.0f * (inputStd / (quantizerOp->range().second));
 
     auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
 
@@ -56,8 +81,6 @@ static bool initStepSize(std::shared_ptr<Node> quantizer)
     return false;
 }
 
-// INPUT QUANTIZERS INSERTION
-
 static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
     const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
@@ -137,207 +160,9 @@ static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nb
 
 void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
+    sanitizeNodeNames(graphView);
     setupInputQuantizers(graphView, nbBits);
     setupParamQuantizers(graphView, nbBits);
 }
 
-void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor)
-{
-    float mean = (tensor->mean()).get<float> (0);
-    std::cout << " MEAN  = " << mean << std::endl;
-}
-}
-
-/*
-
-namespace Aidge {
-
-static void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize)
-{
-    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
-
-    for (const auto& match : matches) 
-    {
-        auto linearNode = match.graph->rootNode(); 
-
-        std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
-        std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1};
-
-        // INPUT QUANTIZERS INSERTION
-
-        // TODO : double check this, and use createUniqueName()
-        auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView);  
-        auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName);
-
-        // Set the step size
-
-        auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator();
-        auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        inputStepSizeOp->setOutput(0, inputStepSizeTensor);
-
-        // Absorb the ReLU when possible ...
-
-        // XXX is this safe ???
-        bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); 
-        // bool nodeHasParent = (linearNode->getParents().size() != 0);
-
-        if (nodeHasParent) {
-            auto parentNode = linearNode->getParents()[0];
-            if (parentNode->type() == "ReLU") {
-                auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator());
-                inputQuantizerOp->range() = unsignedRange;
-                graphView->replace({parentNode}, {}); 
-            }
-        }
-
-        // We need to handle the case where the linear node is the first one ...
-
-        if (nodeHasParent) {
-            graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0);
-        } else {
-            inputQuantizerNode->addChild(graphView);
-            graphView->add(inputQuantizerNode);
-        }
-
-        // PARAM QUANTIZERS INSERTION
-
-        // TODO : double check this, and use createUniqueName()
-        auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);  
-        auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); 
-        graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0);
-
-        // Set the step size
-
-        auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator();
-        auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        paramStepSizeOp->setOutput(0, paramStepSizeTensor);
-    }
-
-}
-
-static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
-{
-    auto backend = tensor->backend();
-    if (backend == "cuda")
-        tensor->setBackend("cpu");
-
-    float acc = 0;
-    float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr());
-    for(std::size_t i = 0; i < tensor->size(); i++)
-        acc += std::abs(castedTensor[i]);
-    acc /= static_cast<float> (tensor->size());
-
-    if (backend == "cuda")
-        tensor->setBackend("cuda");
-
-    return acc;
-}
-
-static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda)
-{
-    // Propagate the calibration tensor
-
-    SequentialScheduler scheduler(graphView);
-    scheduler.resetScheduling();
-    scheduler.forward(true, {calibrationData});
-
-    // Store the input tensor statistics
-
-    if (useCuda)
-        graphView->setBackend("cpu"); 
-
-    std::map<std::string, float> inputStats;
-    for (auto node : graphView->getNodes())
-    {
-        if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
-        {
-            const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
-            float inputAbsMean = getTensorAbsMean(op->getInput(0));
-            inputStats.insert(std::make_pair(node->name(), inputAbsMean));
-            std::cout << node->name() << " -> " << inputAbsMean << std::endl;
-        }
-    }
-
-    if (useCuda)
-        graphView->setBackend("cuda");
-
-    return inputStats;
-}
-
-static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda)
-{
-    if (useCuda)
-        graphView->setBackend("cpu");
-
-    std::map<std::string, float> paramStats;
-    for (auto node : graphView->getNodes())
-    {
-        if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
-        {
-            const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
-            float paramAbsMean = getTensorAbsMean(op->getInput(1));
-            paramStats.insert(std::make_pair(node->name(), paramAbsMean));
-            std::cout << node->name() << " -> " << paramAbsMean << std::endl;
-        }
-    }
-    
-    if (useCuda)
-        graphView->setBackend("cuda");
-
-    return paramStats;
-}
-
-static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats)
-{
-    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
-
-    for (const auto& match : matches) 
-    {
-        auto linearNode = match.graph->rootNode();
-
-        // INPUT QUANTIZERS STEP-SIZES
-
-        auto inputQuantNode = linearNode->getParent(0);
-        auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator());
-
-        float absMean = inputStats[linearNode->name()];
-        float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second));
-
-        auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator();
-        // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
-        auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        inputStepSizeOp->setOutput(0, inputStepSizeTensor);
-
-        // PARAM QUANTIZERS STEP-SIZES
-
-        auto paramQuantNode = linearNode->getParent(1);
-        auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator());
-
-        absMean = paramStats[linearNode->name()];
-        stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second));
-
-        auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator();
-        // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
-        auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
-        paramStepSizeOp->setOutput(0, paramStepSizeTensor);
-    }
-}
-
-void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData)
-{
-    bool useCuda = (calibrationData->backend() == "cuda");
-
-    // Collect the tensor statisics
-    auto inputStats = collectInputStats(graphView, calibrationData, useCuda);
-
-    auto paramStats = collectParamStats(graphView, useCuda);
-
-    // Insert the quantizers
-    insertQuantizers(graphView, nbBits, 1.0);
-
-    // Adjust the quantizers step-sizes
-    adjustQuantizersStepSizes(graphView, inputStats, paramStats);
-}
-}
-
-*/
\ No newline at end of file
+}
\ No newline at end of file
-- 
GitLab


From 064fef3b9b7392bb700b631d117348a27136965c Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Mon, 27 Jan 2025 13:07:12 +0000
Subject: [PATCH 48/60] remove commented code

---
 src/backend/cuda/operator/LSQImpl.cpp | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/src/backend/cuda/operator/LSQImpl.cpp b/src/backend/cuda/operator/LSQImpl.cpp
index c66bd8a..fa45f21 100644
--- a/src/backend/cuda/operator/LSQImpl.cpp
+++ b/src/backend/cuda/operator/LSQImpl.cpp
@@ -52,19 +52,6 @@ void Aidge::LSQImpl_cuda::backward() {
     std::shared_ptr<Tensor> gra_int1 = op_.getInput(1)->grad();
     std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();    
 
-    // XXX
-/*
-    size_t tmp;
-
-    cudaDeviceSetLimit(cudaLimitStackSize, 2048);
-    cudaDeviceGetLimit(&tmp, cudaLimitStackSize );
-    printf(" stack limit = %ld \n", tmp);
-
-    cudaDeviceSetLimit(cudaLimitMallocHeapSize, 100000000);
-    cudaDeviceGetLimit(&tmp, cudaLimitMallocHeapSize);
-    printf(" heap limit = %ld \n", tmp);
-*/
-
     if (gra_int0->size() > mWorkspaceSize) {
         // std::cout << " reallocation " << sizeof(gra_int0) << " " << gra_int0->size() << std::endl;
         if (mWorkspace != nullptr) {
@@ -87,12 +74,7 @@ void Aidge::LSQImpl_cuda::backward() {
         gra_int0->getImpl()->rawPtr(),
         gra_int1->getImpl()->rawPtr(),
         mWorkspace);
-/*
-    gra_int1->setBackend("cpu");
-    float *castedTensor = static_cast<float *> (gra_int1->getImpl()->rawPtr());
-    std::cout << castedTensor[0] << std::endl;
-    gra_int1->setBackend("cuda");
-*/
+
 }
 
 Aidge::LSQImpl_cuda::~LSQImpl_cuda() {
-- 
GitLab


From 95ba99c966b6dd3002bbef24d20fb43cd9d437ed Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Mon, 27 Jan 2025 14:14:40 +0000
Subject: [PATCH 49/60] complete the PTQ float to double migration

---
 src/PTQ/CLE.cpp | 7 +------
 src/PTQ/PTQ.cpp | 4 ++--
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index 63d3b45..c47c619 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -49,7 +49,7 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
     mulOp.setDataType(tensor->dataType());
     mulOp.setBackend(tensor->backend());
 
-    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling});
+    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling});
     scalingTensor->setDataType(tensor->dataType());
     scalingTensor->setBackend(tensor->backend());
 
@@ -120,12 +120,7 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD
         if (isAffine(node))
             affineNodeVector.push_back(node);
 
-    if (affineNodeVector.empty()) {
-        Log::notice("No affine nodes found in the network. CLE cannot be applied.");
-        return;
-    }
     double maxRangeDelta;
-    int iteration = 0;
 
     do
     {
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 3677ae0..e510880 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -72,13 +72,13 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView)
     return true;
 }
 
-static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling)
+static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
 {
     auto mulOp = Mul_Op();
     mulOp.setDataType(tensor->dataType());
     mulOp.setBackend(tensor->backend());
 
-    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling});
+    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling});
     scalingTensor->setDataType(tensor->dataType());
     scalingTensor->setBackend(tensor->backend());
 
-- 
GitLab


From a30e2e52999b5874008971bde3005840295382c0 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Mon, 27 Jan 2025 14:35:45 +0000
Subject: [PATCH 50/60] replace the couts with logs

---
 src/PTQ/CLE.cpp        |  7 ++-----
 src/PTQ/Clipping.cpp   |  2 +-
 src/PTQ/PTQ.cpp        | 13 ++++++-------
 src/QAT/QAT_FixedQ.cpp |  6 +++---
 src/QAT/QAT_LSQ.cpp    |  3 +--
 5 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index c47c619..cbfb91f 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -126,11 +126,8 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD
     {
         ++iteration;
         maxRangeDelta = 0.0;
-        //std::cout << " ----- " << std::endl;
-        //for (std::shared_ptr<Node> node : affineNodeVector)
-        //    std::cout << getTensorAbsoluteMax(getWeightTensor(node)) << std::endl;
-
-        for (std::size_t i = 0; i < (affineNodeVector.size() - 1); i++)
+        
+        for (size_t i = 0; i < (affineNodeVector.size() - 1); i++)
         {
             std::shared_ptr<Node> n1 = affineNodeVector[i];
             std::shared_ptr<Node> n2 = affineNodeVector[i+1];
diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp
index 57ad7a8..66b0ab3 100644
--- a/src/PTQ/Clipping.cpp
+++ b/src/PTQ/Clipping.cpp
@@ -26,7 +26,7 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string,
 
     std::shared_ptr<Node> firstNode = retrieveNodeVector(graphView)[0];
 
-    //std::cout << " COMPUTING HISTOGRAMS ... " << std::endl;
+    // Log::debug(" COMPUTING HISTOGRAMS ... ");
 
     std::map<std::string, std::vector<int>> histograms;
 
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index e510880..073e5e0 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -987,7 +987,6 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool
 
 static void printScalingFactors(std::shared_ptr<GraphView> graphView)
 {
-    Log::info(" === SCALING FACTORS === ");
     for (auto node : retrieveNodeVector(graphView))
         if (node->type() == "Scaling" || node->type() == "Quantizer")
         {
@@ -1020,7 +1019,7 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri
     auto scheduling = scheduler.getStaticScheduling();
     for (auto node : scheduling)
         if (node->type() == "Scaling")
-            fmt::println("{} range = {}", node->name(), valueRanges[node->name()]);
+            Log::info(" {} range = {} ", node->name(), valueRanges[node->name()]);
 }
 
 void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose)
@@ -1049,13 +1048,13 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     Log::info(" Computing the value ranges ...");
     std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda);
 
-    //std::cout << " === RANGES (BEFORE ADJUST) ===" << std::endl;
+    //Log::info(" === RANGES (BEFORE ADJUST) ===");
     //printRanges(graphView, valueRanges);
 
     Log::info(" Optimizing the clipping values ...");
     valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose);
 
-    //std::cout << " === RANGES (AFTER ADJUST) ===" << std::endl;
+    //Log::info(" === RANGES (AFTER ADJUST) ===");
     //printRanges(graphView, valueRanges);
 
     Log::info(" Normalizing the activations ...");
@@ -1076,7 +1075,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     if (verbose)
         printScalingFactors(graphView);
 
-    //std::cout << " === SCALINGS (BEFORE CAST) ===" << std::endl;
+    //Log::info(" === SCALINGS (BEFORE CAST) ===");
     //printScalingFactors(graphView);
 
     setupDataType(graphView, inputDataSet, initialDataType);
@@ -1084,7 +1083,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
     if (useCuda)
         graphView->setBackend("cuda");
 
-    //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl;
+    //Log::info(" === SCALINGS (AFTER CAST) ===");
     //printScalingFactors(graphView);
 
     Log::info(" Reseting the scheduler ...");
@@ -1124,7 +1123,7 @@ void clearBiases(std::shared_ptr<GraphView> graphView)
 void devPTQ(std::shared_ptr<GraphView> graphView) 
 {
     for (std::shared_ptr<Node> node : graphView->getNodes())
-        fmt::println(" UUU : {}", node->name());
+        Log::info(" UUU : {}", node->name());   
 }
 
 }
diff --git a/src/QAT/QAT_FixedQ.cpp b/src/QAT/QAT_FixedQ.cpp
index 9160b4a..6ada532 100644
--- a/src/QAT/QAT_FixedQ.cpp
+++ b/src/QAT/QAT_FixedQ.cpp
@@ -91,7 +91,7 @@ static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView>
             const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator());
             float inputStd = getTensorStd(op->getInput(0));
             inputStats.insert(std::make_pair(node->name(), inputStd));
-            fmt::println("{} -> {}", node->name(), inputStd);
+            Log::info(" {} -> {} ", node->name(), inputStd);
         }
     }
 
@@ -108,7 +108,7 @@ static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView>
             const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator());
             float paramStd = getTensorStd(op->getInput(1));
             paramStats.insert(std::make_pair(node->name(), paramStd));
-            fmt::println("{} -> {}", node->name(), paramStd);
+            Log::info(" {} -> {} ", node->name(), paramStd);
         }
     }
     
@@ -156,7 +156,7 @@ void QuantFixedQ::devQAT(std::shared_ptr<GraphView> graphView)
     scheduler.generateScheduling();
     auto s = scheduler.getStaticScheduling();
     for (std::shared_ptr<Node> node : s)
-        fmt::println(" name : {}", node->name());
+        Log::info(" name : {} ", node->name());
 }
 
 }
\ No newline at end of file
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index 80e8a05..0508fc7 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -27,7 +27,6 @@ namespace Aidge
 
 static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
 {
-    //std::cout << " GET TENSOR ABS MEAN " << std::endl;
     auto valueTensor = (*tensor).abs().mean();
     std::shared_ptr<Tensor> fallback;
     const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu");
@@ -76,7 +75,7 @@ static bool initStepSize(std::shared_ptr<Node> quantizer)
 
     stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor);
 
-    std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl;
+    Log::info(" [ INIT STEP SIZE = {} ] ", stepSize);
 
     return false;
 }
-- 
GitLab


From 6a52ae3d95cf9108e27ea1c80095cb8ac75ef943 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Mon, 27 Jan 2025 14:53:30 +0000
Subject: [PATCH 51/60] minor change

---
 src/recipes/QuantRecipes.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/recipes/QuantRecipes.cpp b/src/recipes/QuantRecipes.cpp
index 6e1dcdb..7f01b24 100644
--- a/src/recipes/QuantRecipes.cpp
+++ b/src/recipes/QuantRecipes.cpp
@@ -58,11 +58,11 @@ void insertBatchNormNodes(std::shared_ptr<GraphView> graphView)
         if (parentNode->type() == "Conv2D")
         {
             std::shared_ptr<Conv_Op<2>> convOperator = std::static_pointer_cast<Conv_Op<2>> (parentNode->getOperator());
-            int nb_channels = convOperator->getInput(1)->dims()[0];
-            fmt::println(" NB CHANNELS = {}", nb_channels); // TODO : remove this ...
+            int nbChannels = convOperator->getInput(1)->dims()[0];
+            Log::info(" NB CHANNELS = {} ", nbChannels);
 
             std::string batchnormNodeName = makeUniqueName(parentNode->name() + "_BN", graphView);
-            std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nb_channels, 1e-5, 0.1, false, batchnormNodeName);
+            std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nbChannels, 1e-5, 0.1, false, batchnormNodeName);
             batchnormNode->getOperator()->setDataType(DataType::Float32);
             batchnormNode->getOperator()->setBackend("cpu");
 
-- 
GitLab


From 2353215b2678de012bd0f256449a436c47cc4dac Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Mon, 27 Jan 2025 15:03:44 +0000
Subject: [PATCH 52/60] move the PTQMetaOps files

---
 include/aidge/{quantization/PTQ => operator}/PTQMetaOps.hpp | 0
 src/PTQ/PTQ.cpp                                             | 2 +-
 src/{PTQ => operator}/PTQMetaOps.cpp                        | 0
 3 files changed, 1 insertion(+), 1 deletion(-)
 rename include/aidge/{quantization/PTQ => operator}/PTQMetaOps.hpp (100%)
 rename src/{PTQ => operator}/PTQMetaOps.cpp (100%)

diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/operator/PTQMetaOps.hpp
similarity index 100%
rename from include/aidge/quantization/PTQ/PTQMetaOps.hpp
rename to include/aidge/operator/PTQMetaOps.hpp
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 073e5e0..09b039f 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -12,7 +12,7 @@
 #include "aidge/quantization/PTQ/CLE.hpp"
 #include "aidge/quantization/PTQ/Clipping.hpp"
 #include "aidge/quantization/PTQ/PTQ.hpp"
-#include "aidge/quantization/PTQ/PTQMetaOps.hpp"
+#include "aidge/operator/PTQMetaOps.hpp"
 
 
 #include "aidge/data/Tensor.hpp"
diff --git a/src/PTQ/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp
similarity index 100%
rename from src/PTQ/PTQMetaOps.cpp
rename to src/operator/PTQMetaOps.cpp
-- 
GitLab


From a0245411c756cd56e31e1b2addf9837520d9ea1f Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Tue, 28 Jan 2025 10:06:53 +0000
Subject: [PATCH 53/60] fix an include

---
 src/operator/PTQMetaOps.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp
index 77018c2..56245da 100644
--- a/src/operator/PTQMetaOps.cpp
+++ b/src/operator/PTQMetaOps.cpp
@@ -9,13 +9,12 @@
  *
  ********************************************************************************/
 
-#include "aidge/quantization/PTQ/PTQMetaOps.hpp"
+#include "aidge/operator/PTQMetaOps.hpp"
 
 #include <memory>
 #include <string>
 #include <utility>
 
-//Operator
 #include "aidge/operator/Clip.hpp"
 #include "aidge/operator/Mul.hpp"
 #include "aidge/operator/Round.hpp"
-- 
GitLab


From 31385b7462bf438d0049eda771161ee5e9e55141 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Fri, 31 Jan 2025 15:25:10 +0000
Subject: [PATCH 54/60] enable the cuda backend

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b3c6d45..17dd74a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -61,7 +61,7 @@ option(PYBIND "python binding" OFF)
 option(WERROR "Warning as error" OFF)
 option(TEST "Enable tests" OFF)
 option(COVERAGE "Enable coverage" OFF)
-option(CUDA "Enable CUDA backend" OFF) # XXX OFF
+option(CUDA "Enable CUDA backend" ON) # XXX OFF
 option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memory use (over/underflow, memory leak, ...)" OFF)
 
 ##############################################
-- 
GitLab


From e4332f40767585a27e494445887088a52f2d711b Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Fri, 31 Jan 2025 15:26:07 +0000
Subject: [PATCH 55/60] remove unused log

---
 src/PTQ/CLE.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index cbfb91f..40b9e42 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -124,7 +124,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD
 
     do
     {
-        ++iteration;
         maxRangeDelta = 0.0;
         
         for (size_t i = 0; i < (affineNodeVector.size() - 1); i++)
@@ -149,9 +148,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD
         }
     }
     while (maxRangeDelta > targetDelta);
-
-    Log::notice("CLE completed after {} iterations. Final max range delta: {:.6f}",
-                iteration, maxRangeDelta);
 }
 
 }
\ No newline at end of file
-- 
GitLab


From d7df89e8eadfcc5889dda15d3350fefeb69d7823 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Tue, 11 Feb 2025 15:50:16 +0000
Subject: [PATCH 56/60] handle PaddedConv2Ds in the QAT and BatchNorm insertion
 code

---
 setup.py                     |  2 +-
 src/QAT/QAT_LSQ.cpp          |  8 ++++++--
 src/recipes/QuantRecipes.cpp | 24 ++++++++----------------
 3 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/setup.py b/setup.py
index 1bfc0ac..cde7c1e 100644
--- a/setup.py
+++ b/setup.py
@@ -63,7 +63,7 @@ class AidgePkgBuild(build_ext):
         cxx_compiler = os.environ.get("AIDGE_CXX_COMPILER", "g++")
         build_type = os.environ.get("AIDGE_BUILD_TYPE", "Release")
         asan = os.environ.get("AIDGE_ASAN", "OFF")
-        with_cuda = os.environ.get("AIDGE_WITH_CUDA", "OFF")
+        with_cuda = os.environ.get("AIDGE_WITH_CUDA", "ON") # default could be "OFF"
         cmake_arch = os.environ.get("AIDGE_CMAKE_ARCH", "")
 
         build_gen = os.environ.get("AIDGE_BUILD_GEN", "")
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index 0508fc7..ff1c44a 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -82,12 +82,14 @@ static bool initStepSize(std::shared_ptr<Node> quantizer)
 
 static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
-    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
+    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|PaddedConv2D#|FC#)");
 
     for (const auto& match : matches) 
     {
         auto linearNode = match.graph->rootNode(); 
 
+        // Log::notice(" SET INPUT QUANTIZER : {} ", linearNode->type());
+
         std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
         std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1};
 
@@ -135,7 +137,7 @@ static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nb
 
 static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
 {
-    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
+    const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|PaddedConv2D#|FC#)");
 
     std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
 
@@ -143,6 +145,8 @@ static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nb
     {       
         auto linearNode = match.graph->rootNode(); 
 
+        // Log::notice(" SET PARAM QUANTIZER : {} ", linearNode->type());
+
         // TODO : double check this, and use createUniqueName()
         auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);  
         auto quantizerNode = LSQ(signedRange, quantizerName); 
diff --git a/src/recipes/QuantRecipes.cpp b/src/recipes/QuantRecipes.cpp
index 7f01b24..f03eb46 100644
--- a/src/recipes/QuantRecipes.cpp
+++ b/src/recipes/QuantRecipes.cpp
@@ -9,24 +9,13 @@
  *
  ********************************************************************************/
 
-/*
-#include "aidge/data/Tensor.hpp"
-#include "aidge/graph/GraphView.hpp"
-#include "aidge/graph/Node.hpp"
-#include "aidge/scheduler/SequentialScheduler.hpp"
-#include "aidge/scheduler/Scheduler.hpp"
-#include "aidge/utils/Log.hpp"
-
-#include "aidge/operator/Producer.hpp"
-#include "aidge/operator/Mul.hpp"
-#include "aidge/operator/ReLU.hpp"
-#include "aidge/operator/Scaling.hpp"
-*/
 
 #include "aidge/operator/Conv.hpp"
 #include "aidge/operator/BatchNorm.hpp"
 //#include "aidge/quantization/PTQ/PTQ.hpp"
 #include "aidge/recipes/QuantRecipes.hpp"
+#include "aidge/graph/Node.hpp"
+
 
 namespace Aidge 
 {
@@ -55,11 +44,13 @@ void insertBatchNormNodes(std::shared_ptr<GraphView> graphView)
 {
     for (std::shared_ptr<Node> parentNode : graphView->getNodes())
     {
-        if (parentNode->type() == "Conv2D")
+        // TODO : use graph matching
+
+        if (parentNode->type() == "Conv2D" || parentNode->type() == "PaddedConv2D")
         {
-            std::shared_ptr<Conv_Op<2>> convOperator = std::static_pointer_cast<Conv_Op<2>> (parentNode->getOperator());
+            std::shared_ptr<OperatorTensor> convOperator = std::static_pointer_cast<OperatorTensor> (parentNode->getOperator());
             int nbChannels = convOperator->getInput(1)->dims()[0];
-            Log::info(" NB CHANNELS = {} ", nbChannels);
+            Log::notice(" NB CHANNELS = {} ", nbChannels);
 
             std::string batchnormNodeName = makeUniqueName(parentNode->name() + "_BN", graphView);
             std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nbChannels, 1e-5, 0.1, false, batchnormNodeName);
@@ -118,6 +109,7 @@ std::string makeUniqueName(std::string baseName, std::shared_ptr<GraphView> grap
     return newName;
 }
 
+
 void sanitizeNodeNames(std::shared_ptr<GraphView> graphView)
 {
     for (std::shared_ptr<Node> node : graphView->getNodes())
-- 
GitLab


From 3237e7a73e31ef1b66c554d7f8af5c7fbb8fbe66 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Fri, 14 Feb 2025 13:28:57 +0000
Subject: [PATCH 57/60] minor changes

---
 include/aidge/quantization/PTQ/PTQ.hpp | 8 ++++----
 python_binding/pybind_PTQ.cpp          | 4 ++--
 src/QAT/QAT_LSQ.cpp                    | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp
index 4fc38bc..bfe671e 100644
--- a/include/aidge/quantization/PTQ/PTQ.hpp
+++ b/include/aidge/quantization/PTQ/PTQ.hpp
@@ -124,11 +124,11 @@ namespace Aidge {
      * @brief Quantize an already normalized (in term of parameters and activations) network.
      * @param graphView The GraphView to be quantized.
      * @param nbBits The desired number of bits of the quantization.
-     * @param applyRounding Whether to apply the rounding operations or not.
+     * @param noQuant Whether to apply the rounding operations or not.
      * @param optimizeSigns Whether to take account of the IO signs of the operators or not.
      * @param verbose Whether to print the sign map or not.
      */
-    void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, bool applyRounding, bool optimizeSigns, bool verbose);
+    void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, bool noQuant, bool optimizeSigns, bool verbose);
 
     /**
      * @brief Main quantization routine. Performs every step of the quantization pipeline.
@@ -136,12 +136,12 @@ namespace Aidge {
      * @param nbBits The desired number of bits of the quantization.
      * @param inputDataSet The input dataset on which the value ranges are computed.
      * @param clippingMode: Type of the clipping optimization. Can be either 'MAX', 'MSE', 'AA' or 'KL'.
-     * @param applyRounding Whether to apply the rounding operations or not.
+     * @param noQuant Whether to apply the rounding operations or not.
      * @param optimizeSigns Whether to take account of the IO signs of the operators or not.
      * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights.
      * @param verbose Whether to print internal informations about the quantization process.
      */
-    void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool applyRounding, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose);
+    void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose);
 
     /**
      * @brief Compute the weight ranges of every affine node. Provided for debugging purposes.
diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp
index b5193bd..1de7976 100644
--- a/python_binding/pybind_PTQ.cpp
+++ b/python_binding/pybind_PTQ.cpp
@@ -78,7 +78,7 @@ void init_PTQ(py::module &m) {
     :type value_ranges: list of float.
     )mydelimiter");
 
-    m.def("quantize_normalized_network", &quantizeNormalizedNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("no_quant")=false, py::arg("optimize_signs"), py::arg("verbose") = false,
+    m.def("quantize_normalized_network", &quantizeNormalizedNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("no_quantization")=false, py::arg("optimize_signs"), py::arg("verbose") = false,
     R"mydelimiter(
     Quantize an already normalized (in term of parameters and activations) network.
     :param network: The GraphView to be quantized.
@@ -93,7 +93,7 @@ void init_PTQ(py::module &m) {
     :type verbose: bool
     )mydelimiter");
 
-    m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("clipping_mode") = Clipping::MAX , py::arg("no_quantization") = true, py::arg("optimize_signs") = false, py::arg("single_shift") = false,  py::arg("use_cuda") = false, py::arg("verbose") = false,
+    m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("clipping_mode") = Clipping::MAX , py::arg("no_quantization") = false, py::arg("optimize_signs") = false, py::arg("single_shift") = false,  py::arg("use_cuda") = false, py::arg("verbose") = false,
     R"mydelimiter(
     Main quantization routine. Performs every step of the quantization pipeline.
     :param network: The GraphView to be quantized.
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index ff1c44a..da09d62 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -75,7 +75,7 @@ static bool initStepSize(std::shared_ptr<Node> quantizer)
 
     stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor);
 
-    Log::info(" [ INIT STEP SIZE = {} ] ", stepSize);
+    Log::notice(" [ INIT STEP SIZE = {} ] ", stepSize);
 
     return false;
 }
-- 
GitLab


From c43e17242f1dc5974a43aa748b953eced73031f3 Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Fri, 14 Feb 2025 13:43:58 +0000
Subject: [PATCH 58/60] use the scalar tensor constructor

---
 src/PTQ/CLE.cpp     | 2 +-
 src/PTQ/PTQ.cpp     | 4 ++--
 src/QAT/QAT_LSQ.cpp | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp
index 40b9e42..28858d0 100644
--- a/src/PTQ/CLE.cpp
+++ b/src/PTQ/CLE.cpp
@@ -49,7 +49,7 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
     mulOp.setDataType(tensor->dataType());
     mulOp.setBackend(tensor->backend());
 
-    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling});
+    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(scaling);
     scalingTensor->setDataType(tensor->dataType());
     scalingTensor->setBackend(tensor->backend());
 
diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp
index 09b039f..7c29ee0 100644
--- a/src/PTQ/PTQ.cpp
+++ b/src/PTQ/PTQ.cpp
@@ -78,7 +78,7 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling)
     mulOp.setDataType(tensor->dataType());
     mulOp.setBackend(tensor->backend());
 
-    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling});
+    std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(scaling);
     scalingTensor->setDataType(tensor->dataType());
     scalingTensor->setBackend(tensor->backend());
 
@@ -932,7 +932,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u
                 // Add the coeff producer to the multiplier node
 
                 std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); 
-                std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax});
+                std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(signedMax);
                 coeffProducer->getOperator()->setOutput(0, coeffTensor);
 
                 coeffProducer->getOperator()->setDataType(DataType::Float64);
diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp
index da09d62..6eae077 100644
--- a/src/QAT/QAT_LSQ.cpp
+++ b/src/QAT/QAT_LSQ.cpp
@@ -65,7 +65,8 @@ static bool initStepSize(std::shared_ptr<Node> quantizer)
     float inputStd = getTensorStd(quantizerOp->getInput(0));
     float stepSize = 8.0f * (inputStd / (quantizerOp->range().second));
 
-    auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
+    // TODO : use the scalar constructor
+    auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); 
 
     // XXX Manage backend here ?
     stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend());
-- 
GitLab


From ba6d6d8b62e3c4ccd98d0e6aa7108d4fceffbd23 Mon Sep 17 00:00:00 2001
From: Olivier BICHLER <olivier.bichler@cea.fr>
Date: Mon, 17 Feb 2025 16:09:29 +0100
Subject: [PATCH 59/60] Fix Coverage related issue

---
 CMakeLists.txt                                            | 1 -
 include/aidge/backend/cpu/operator/FixedQImpl_kernels.hpp | 8 ++++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 17dd74a..afb882a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -182,7 +182,6 @@ endif()
 
 # Coverage flags for GCC
 if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE)
-    include(CodeCoverage)
     append_coverage_compiler_flags()
 endif()
 
diff --git a/include/aidge/backend/cpu/operator/FixedQImpl_kernels.hpp b/include/aidge/backend/cpu/operator/FixedQImpl_kernels.hpp
index 9d7a106..935d8f0 100644
--- a/include/aidge/backend/cpu/operator/FixedQImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/FixedQImpl_kernels.hpp
@@ -23,7 +23,7 @@ void FixedQImpl_cpu_forward_kernel(
     std::size_t nbBits,
     float span_,
     bool isOutputUnsigned,
-    std::size_t inputLenght,
+    std::size_t inputLength,
     const void* input_,
     void* output_) 
 {
@@ -40,7 +40,7 @@ void FixedQImpl_cpu_forward_kernel(
     const I* input = static_cast<const I*>(input_);
     O* output = static_cast<O*>(output_);
 
-    for (std::size_t i = 0; i < inputLenght; ++i) {
+    for (std::size_t i = 0; i < inputLength; ++i) {
         I clipped = std::max(lower, std::min(input[i], upper));
         output[i] = std::round(clipped / stepSize) * stepSize;
     }
@@ -49,14 +49,14 @@ void FixedQImpl_cpu_forward_kernel(
 
 template <class GI, class GO>
 void FixedQImpl_cpu_backward_kernel(
-    const std::size_t inputLenght,
+    const std::size_t inputLength,
     const void* grad_output_,
 	void* grad_input_) 
 {
     const GO* grad_output = static_cast<const GO*>(grad_output_);
     GI* grad_input = static_cast<GI*>(grad_input_);
 
-    for (std::size_t i = 0; i < inputLenght; ++i) {
+    for (std::size_t i = 0; i < inputLength; ++i) {
         // Straight Through Estimator
         grad_input[i] = grad_output[i];
     }
-- 
GitLab


From 68e85246993cb6bf2ef64ed14b4f791581c3ca6f Mon Sep 17 00:00:00 2001
From: Olivier BICHLER <olivier.bichler@cea.fr>
Date: Mon, 17 Feb 2025 16:14:00 +0100
Subject: [PATCH 60/60] Fixed bad merge

---
 pyproject.toml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index bf4155e..088200e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,6 @@ readme = "README.md"
 license = { file = "LICENSE" }
 classifiers = [
     "Development Status :: 2 - Pre-Alpha",
-<<<<<<< HEAD
     "Intended Audience :: Developers",
     "Intended Audience :: Education",
     "Intended Audience :: Science/Research",
@@ -28,10 +27,6 @@ classifiers = [
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
     "Topic :: Software Development"
 ]
-=======
-    "Programming Language :: Python :: 3"
-    ]
->>>>>>> 22e47ad9fb629f85ed4f1c5fa981c1d195c0201b
 dynamic = ["version"] # defined by pbr
 
 [build-system]
-- 
GitLab