From 616bc9b95666ba1d4303c75813436c2eaf8575c4 Mon Sep 17 00:00:00 2001 From: thibault allenet <thibault.allenet@cea.fr> Date: Mon, 9 Dec 2024 16:40:56 +0000 Subject: [PATCH 01/60] Fix Optimized Signs --- src/PTQ/PTQ.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 76fe8f2..4f88aed 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -699,8 +699,12 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap { // Thoses nodes always have a single parent std::shared_ptr<Node> parent = node->getParent(0); - signMap[node->name()].first = signMap[parent->name()].second; - signMap[node->name()].second = signMap[node->name()].first; + if (parent) + { + signMap[node->name()].first = signMap[parent->name()].second; + signMap[node->name()].second = signMap[node->name()].first; + } + } } -- GitLab From e780f3a40c4bb35663282499885e36f0fb65c1e2 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Tue, 17 Dec 2024 15:40:19 +0000 Subject: [PATCH 02/60] fix the PTQ for float64 support and multi-outputs handling --- include/aidge/quantization/PTQ/Clipping.hpp | 8 +- include/aidge/quantization/PTQ/PTQ.hpp | 8 +- include/aidge/quantization/PTQ/PTQMetaOps.hpp | 10 +- src/PTQ/Clipping.cpp | 69 +++--- src/PTQ/PTQ.cpp | 202 +++++++++++------- src/PTQ/PTQMetaOps.cpp | 37 ++-- 6 files changed, 198 insertions(+), 136 deletions(-) diff --git a/include/aidge/quantization/PTQ/Clipping.hpp b/include/aidge/quantization/PTQ/Clipping.hpp index 08a0b0a..d0622f4 100644 --- a/include/aidge/quantization/PTQ/Clipping.hpp +++ b/include/aidge/quantization/PTQ/Clipping.hpp @@ -36,7 +36,7 @@ namespace Aidge * @param inputDataSet The input dataset, consisting of a vector of input samples. * @return A map associating each node name to it's corresponding activation histogram. */ - std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, float> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda); + std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda); /** * @brief Given an input activation histogram, compute the optimal clipping value in the sense of the Lp norm. @@ -45,7 +45,7 @@ namespace Aidge * @param exponent: The exponent of the Lp norm (e.g. 2 for the MSE). * @return The optimal clipping value. */ - float computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, float exponent); + double computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, double exponent); /** * @brief Given an input activation histogram, compute the optimal clipping value in the sense of the KL divergence. @@ -53,7 +53,7 @@ namespace Aidge * @param nbBits: The quantization number of bits. * @return The optimal clipping value. */ - float computeKLClipping(std::vector<int> histogram, std::uint8_t nbBits); + double computeKLClipping(std::vector<int> histogram, std::uint8_t nbBits); /** * @brief Return a corrected map of the provided activation ranges. @@ -67,7 +67,7 @@ namespace Aidge * @param verbose Whether to print the clipping values or not. * @return The corrected map associating each provided node to its clipped range. */ - std::map<std::string, float> adjustRanges(Clipping clippingMode, std::map<std::string, float> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose); + std::map<std::string, double> adjustRanges(Clipping clippingMode, std::map<std::string, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose); } diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index 52d83d6..d2b8b7f 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -31,7 +31,7 @@ namespace Aidge { /** * @brief Set of the types of the nodes which does not affect the PTQ process */ - static const std::set<std::string> seamlessNodeTypes({"LeakyReLU", "Pad2D", "MaxPooling2D", "AvgPooling2D", "PaddedMaxPooling2D", "PaddedAvgPooling2D", "GlobalAveragePooling", "Reshape", "Transpose", "Gather"}); + static const std::set<std::string> seamlessNodeTypes({"LeakyReLU", "Pad2D", "MaxPooling2D", "AvgPooling2D", "PaddedMaxPooling2D", "PaddedAvgPooling2D", "GlobalAveragePooling", "Reshape", "Transpose", "Gather", "Resize"}); /** * @brief Set of the types of the nodes that merge multiple branches into one @@ -98,7 +98,7 @@ namespace Aidge { * @param scalingNodesOnly Whether to restrain the retreival of the ranges to scaling nodes only or not. * @return A map associating each affine node name to it's corresponding output range. */ - std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda); + std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda); /** * @brief Normalize the activations of each affine node so that they fit in the [-1:1] range. @@ -106,7 +106,7 @@ namespace Aidge { * @param graphView The GraphView containing the affine nodes. * @param valueRanges The node output value ranges computed over the calibration dataset. */ - void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, float> valueRanges); + void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges); /** * @brief For each node, compute the sign of its input and output values. @@ -145,7 +145,7 @@ namespace Aidge { * @param graphView The GraphView containing the affine nodes. * @return A map associating each affine node name to it's corresponding weight range. */ - std::map<std::string, float> getWeightRanges(std::shared_ptr<GraphView> graphView); + std::map<std::string, double> getWeightRanges(std::shared_ptr<GraphView> graphView); /** * @brief Clear the affine nodes biases. Provided form debugging purposes. diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/quantization/PTQ/PTQMetaOps.hpp index c4f2ac7..29bb7f2 100644 --- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp +++ b/include/aidge/quantization/PTQ/PTQMetaOps.hpp @@ -33,7 +33,7 @@ /// @param clip_max The maximum value for the clip operation. /// @param name The name of the meta-operator node created. /// @return A shared pointer to an instance of the meta-operator node. -std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float clip_max,const std::string& name); +std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,double clip_max,const std::string& name); /// @brief The purpose of Scaling is to encapsulate the Mul operator and tag it as a PTQ node rather than a regular Mul operator. /// Therefore, this meta-operator consists solely of a [Mul] operation. @@ -41,7 +41,7 @@ std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float /// @param scalingFactor The scaling factor to apply to the input (a scalar to multiply the input with). /// @param name The name of the meta-operator node created. /// @return A shared pointer to an instance of the scaling node. -std::shared_ptr<Aidge::Node> Scaling(float scalingFactor,const std::string& name = ""); +std::shared_ptr<Aidge::Node> Scaling(double scalingFactor,const std::string& name = ""); /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter. /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation. @@ -50,7 +50,7 @@ std::shared_ptr<Aidge::Node> Scaling(float scalingFactor,const std::string& name /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor will be updated. /// @param newScalingFactor The new scaling factor to apply to the meta-operator node. /// @return True if the scaling factor was successfully updated, false if the operation failed (e.g., if MetaOpNode is null or incompatible). -bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, float newScalingFactor); +bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor); /// @brief Retrieves the current scaling factor of a PTQ meta-operator node. /// This function returns the scaling factor associated with the specified PTQ meta-operator node, @@ -58,7 +58,7 @@ bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, float newScali /// /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor is being queried. /// @return The scaling factor currently applied to the meta-operator node, or -1 if the operation fails (e.g., if MetaOpNode is null or incompatible). -float getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode); +double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode); /// @brief Sets the clip range for an existing Quantizer node by specifying minimum and maximum clipping values. /// This function modifies the clip range of a Quantizer node, allowing adjustment of the range within which values are clipped @@ -69,6 +69,6 @@ float getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode); /// @param min The minimum value for the clip range. Values below this will be clipped to this minimum. /// @param max The maximum value for the clip range. Values above this will be clipped to this maximum. /// @return True if the clip range was successfully set, false if the operation failed (e.g., if QuantizerNode is null). -bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, float min, float max); +bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, double min, double max); #endif /* AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ */ diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp index e001408..f8765f3 100644 --- a/src/PTQ/Clipping.cpp +++ b/src/PTQ/Clipping.cpp @@ -19,7 +19,7 @@ namespace Aidge { -std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, float> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda) +std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda) { if (useCuda) graphView->setBackend("cuda"); @@ -72,7 +72,7 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, bool isInsideRanges = (valueRanges.find(node->name()) != valueRanges.end()); if (isInsideRanges) { - float valueRange = valueRanges[node->name()]; + double valueRange = valueRanges[node->name()]; std::shared_ptr<Operator> nodeOperator = node->getOperator(); std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0)); @@ -80,15 +80,17 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, if (useCuda) valueTensor->setBackend("cpu"); - float * castedTensor = static_cast<float *> (valueTensor->getImpl()->rawPtr()); + double * castedTensor = static_cast<double *> (valueTensor->getImpl()->rawPtr()); std::vector<int> nodeHistogram = histograms[node->name()]; for(std::size_t i = 0; i < valueTensor->size(); i++) { - int bin = std::round(std::abs(castedTensor[i] / valueRange * nbBins)); + std::size_t bin = std::round(std::abs(castedTensor[i] / valueRange * nbBins)); + bin = std::min(bin, nodeHistogram.size() - 1); nodeHistogram[bin]++; } - histograms[node->name()] = nodeHistogram; + + histograms[node->name()] = nodeHistogram; if (useCuda) valueTensor->setBackend("cuda"); @@ -105,52 +107,52 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, return histograms; } -float computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, float exponent) +double computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, double exponent) { int nbBins = histogram.size(); int nbIter = 100; int signedMax = (1 << (nbBits - 1)) - 1; - std::vector<float> clippingErrors; + std::vector<double> clippingErrors; for (int it = 1; it < nbIter; it++) { // Compute the rounding cost of this particular clipping ... - float accumulatedError = 0.0; - float clipping = it / static_cast<float> (nbIter); + double accumulatedError = 0.0; + double clipping = it / static_cast<double> (nbIter); for (int bin = 0; bin < nbBins; bin++) { - float value = (bin + 0.5) / nbBins; - float scaling = signedMax / clipping; - float rounded = std::round(value * scaling) / scaling; - float clipped = std::min(clipping, rounded); + double value = (bin + 0.5) / nbBins; + double scaling = signedMax / clipping; + double rounded = std::round(value * scaling) / scaling; + double clipped = std::min(clipping, rounded); - float approxError = std::abs(clipped - value); + double approxError = std::abs(clipped - value); accumulatedError += std::pow(approxError, exponent) * histogram[bin]; } clippingErrors.push_back(accumulatedError); } - std::vector<float>::iterator it = std::min_element(clippingErrors.begin(), clippingErrors.end()); - float bestClipping = static_cast<float> (std::distance(clippingErrors.begin(), it)) / static_cast<float> (nbIter); + std::vector<double>::iterator it = std::min_element(clippingErrors.begin(), clippingErrors.end()); + double bestClipping = static_cast<double> (std::distance(clippingErrors.begin(), it)) / static_cast<double> (nbIter); return bestClipping; } -float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) +double computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) { // KL Clipping int nbIter = 100; int signedMax = (1 << (nbBits - 1)) - 1; - float refNorm = 0; + double refNorm = 0; for (int n : refHistogram) - refNorm += static_cast<float> (n); + refNorm += static_cast<double> (n); - std::vector<float> clippingErrors; + std::vector<double> clippingErrors; for (int it = 1; it < nbIter; it++) { - float clipping = it / static_cast<float> (nbIter); + double clipping = it / static_cast<double> (nbIter); // Create the histogram for this particular clipping ... @@ -160,7 +162,7 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) for (std::size_t refBin = 0; refBin < refHistogram.size(); refBin++) { - float value = (static_cast<float> (refBin) + 0.5f) / static_cast<float> (refHistogram.size()); + double value = (static_cast<double> (refBin) + 0.5f) / static_cast<double> (refHistogram.size()); int quantBin = std::floor(value / clipping * signedMax); quantBin = std::min(quantBin, signedMax-1); quantHistogram[quantBin] += refHistogram[refBin]; @@ -168,10 +170,10 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) // Compute the mass of the histogram - float quantNorm = 0; + double quantNorm = 0; for (std::size_t refBin = 0; refBin < refHistogram.size(); refBin++) { - float value = (static_cast<float> (refBin) + 0.5f) / static_cast<float> (refHistogram.size()); + double value = (static_cast<double> (refBin) + 0.5f) / static_cast<double> (refHistogram.size()); int quantBin = std::floor(value / clipping * signedMax); if (quantBin < static_cast<int> (quantHistogram.size())) quantNorm += quantHistogram[quantBin]; @@ -179,15 +181,15 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) // Compute the KL divergence - float accumulatedError = 0.0; + double accumulatedError = 0.0; for (std::size_t refBin = 0; refBin < refHistogram.size(); refBin++) { - float value = (static_cast<float> (refBin) + 0.5f) / static_cast<float> (refHistogram.size()); + double value = (static_cast<double> (refBin) + 0.5f) / static_cast<double> (refHistogram.size()); int quantBin = std::floor(value / clipping * signedMax); - float p = static_cast<float> (refHistogram[refBin]) / refNorm; - float q = (quantBin < static_cast<int> (quantHistogram.size())) ? - static_cast<float> (quantHistogram[quantBin]) / quantNorm : 0; + double p = static_cast<double> (refHistogram[refBin]) / refNorm; + double q = (quantBin < static_cast<int> (quantHistogram.size())) ? + static_cast<double> (quantHistogram[quantBin]) / quantNorm : 0; if (p != 0 && q != 0) accumulatedError += q * std::log(q / p); @@ -196,16 +198,16 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) clippingErrors.push_back(accumulatedError); } - std::vector<float>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end()); - float bestClipping = static_cast<float> (std::distance(clippingErrors.begin(), it)) / static_cast<float> (nbIter); + std::vector<double>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end()); + double bestClipping = static_cast<double> (std::distance(clippingErrors.begin(), it)) / static_cast<double> (nbIter); return bestClipping; } -std::map<std::string, float> adjustRanges(Clipping clippingMode, std::map<std::string, float> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose) +std::map<std::string, double> adjustRanges(Clipping clippingMode, std::map<std::string, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose) { - float clipping = 1.0f; + double clipping = 1.0f; int nbBins = (1 << (nbBits + 4)) ; // XXX Enhance this !!! @@ -213,6 +215,7 @@ std::map<std::string, float> adjustRanges(Clipping clippingMode, std::map<std::s { if (verbose) Log::info(" === CLIPPING VALUES === "); + std::map<std::string, std::vector<int>> histograms = computeHistograms(valueRanges, nbBins, graphView, inputDataSet, useCuda); for (std::shared_ptr<Node> node : graphView->getNodes()) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 76fe8f2..bfc5e3f 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -66,20 +66,20 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } -static void fillTensor(std::shared_ptr<Tensor> tensor, float value) +static void fillTensor(std::shared_ptr<Tensor> tensor, double value) { // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); // Fill the tensor for(std::size_t i = 0; i < tensor->size(); i++) castedTensor[i] = value; } -static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) { // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) @@ -89,20 +89,20 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) static void roundTensor(std::shared_ptr<Tensor> tensor) { // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) castedTensor[i] = std::nearbyint(castedTensor[i]);//Round } -static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) { // Get the tensor data pointer and edit it - float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr()); // Get the tensor absolute max value - float maxValue = 0.0f; + double maxValue = 0.0f; for(std::size_t i = 0; i < tensor->size(); ++i) { if(std::fabs(castedTensor[i]) > maxValue) { maxValue = std::fabs(castedTensor[i]); @@ -187,6 +187,7 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView) bool containsBatchNorm = false; std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + for (std::shared_ptr<Node> node : nodeVector) if (node->type() == "BatchNorm") { @@ -200,6 +201,12 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView) popSoftMax(graphView); } +// TODO : enhance this by modifying OperatorImpl in "core" ... +static DataType getDataType(std::shared_ptr<Node> node) +{ + auto op = std::static_pointer_cast<OperatorTensor>(node->getOperator()); + return op->getOutput(0)->dataType(); +} // XXX HERE : Branches containing only Seamless nodes should be considered as residual too !!! void insertResidualNodes(std::shared_ptr<GraphView> graphView) @@ -217,6 +224,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) { std::shared_ptr<Node> parentNode = node->getParent(i); bool parentIsForking = (parentNode->getChildren().size() > 1); + if (parentIsForking) { // temporary verbose ... @@ -224,8 +232,9 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) Log::info(" ### inserting multiplicative node ..."); std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); - std::shared_ptr<Node> residualNode = Scaling(1.0,residualNodeName); - residualNode->getOperator()->setDataType(DataType::Float32); + std::shared_ptr<Node> residualNode = Scaling(1.0, residualNodeName); + + residualNode->getOperator()->setDataType(DataType::Float64); //getDataType(parentNode) residualNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, residualNode, i, 0, 0); @@ -255,7 +264,8 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) { std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName); - scalingNode->getOperator()->setDataType(DataType::Float32); + + scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) scalingNode->getOperator()->setBackend("cpu"); if (parentNode->getChildren().size() > 0) @@ -283,7 +293,8 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) else { // Log::info(" last node reached ! "); - graphView->addChild(scalingNode); + parentNode->addChild(scalingNode, 0, 0); + graphView->add(scalingNode); } } } @@ -322,7 +333,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); - std::map<std::string, float> accumulatedRatios; + std::map<std::string, double> accumulatedRatios; for (std::shared_ptr<Node> node : nodeVector) { accumulatedRatios.insert(std::make_pair(node->name(), 1.0)); @@ -349,8 +360,8 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) { // Rescale the weight tensor std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - float scaling = getTensorAbsoluteMax(weightTensor); - float ratio = 1.0 / scaling; + double scaling = getTensorAbsoluteMax(weightTensor); + double ratio = 1.0 / scaling; rescaleTensor(weightTensor, ratio); // Accumulate the ratio @@ -378,10 +389,10 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::vector<std::shared_ptr<Node>> mergingNodes = node->getParents(); // Compute the max ratio ... - float maxRatio = 0; + double maxRatio = 0; for (std::shared_ptr<Node> mergingNode : mergingNodes) { - float merginNodeRatio = accumulatedRatios[mergingNode->name()]; + double merginNodeRatio = accumulatedRatios[mergingNode->name()]; if (merginNodeRatio > maxRatio) maxRatio = merginNodeRatio; } @@ -391,12 +402,12 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) // Rescale the previous scaling Nodes for (std::shared_ptr<Node> mergingNode : mergingNodes) { - float mergingNodeRatio = accumulatedRatios[mergingNode->name()]; - float rescaling = mergingNodeRatio / maxRatio; + double mergingNodeRatio = accumulatedRatios[mergingNode->name()]; + double rescaling = mergingNodeRatio / maxRatio; std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); - float scaling_factor = getScalingFactor(scalingNode); + double scaling_factor = getScalingFactor(scalingNode); updateScalingFactor(scalingNode,scaling_factor / rescaling); accumulatedRatios[mergingNode->name()] /= rescaling; // optional ... } @@ -405,9 +416,9 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) } // XXX TODO : take care of the CUDA backend for this too !!! -std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> inputTensor, bool scalingNodesOnly) +std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> inputTensor, bool scalingNodesOnly) { - std::map<std::string, float> valueRanges; + std::map<std::string, double> valueRanges; SequentialScheduler scheduler(graphView); scheduler.resetScheduling(); @@ -425,7 +436,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, { std::shared_ptr<Operator> nodeOperator = node->getOperator(); std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0)); - float range = getTensorAbsoluteMax(valueTensor); + double range = getTensorAbsoluteMax(valueTensor); // Associate the value to the scaling node ... valueRanges.insert(std::make_pair(node->name(), range)); @@ -435,9 +446,9 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, return valueRanges; } -std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda) +std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda) { - std::map<std::string, float> valueRanges; + std::map<std::string, double> valueRanges; std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); // std::shared_ptr<Node> inputNode = getFirstNode(graphView); @@ -467,7 +478,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, // Gather the sample ranges ... - std::map<std::string, float> sampleRanges; + std::map<std::string, double> sampleRanges; for (std::shared_ptr<Node> node : nodeSet) { if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) @@ -478,7 +489,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, if (useCuda) valueTensor->setBackend("cpu"); - float range = getTensorAbsoluteMax(valueTensor); + double range = getTensorAbsoluteMax(valueTensor); // Associate the value to the scaling node ... sampleRanges.insert(std::make_pair(node->name(), range)); @@ -510,7 +521,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, return valueRanges; } -void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, float> valueRanges) +void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges) { std::shared_ptr<Node> firstNode = getFirstNode(graphView); @@ -518,7 +529,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); - std::map<std::string, float> scalingFactors; + std::map<std::string, double> scalingFactors; for (std::shared_ptr<Node> node : nodeVector) scalingFactors.insert(std::make_pair(node->name(), 1.0)); @@ -549,12 +560,12 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st { // retrieve the previous scaling factor ... std::shared_ptr<Node> prevNode = node->getParent(0); - float prevScalingFactor = scalingFactors[prevNode->name()]; + double prevScalingFactor = scalingFactors[prevNode->name()]; // ValueRanges must contains all the scaling nodes !!! - float scalingFactor = valueRanges[node->name()]; + double scalingFactor = valueRanges[node->name()]; - float scaling_factor = getScalingFactor(node); + double scaling_factor = getScalingFactor(node); updateScalingFactor(node, (scaling_factor) / (scalingFactor / prevScalingFactor)); scalingFactors[node->name()] = scalingFactor; @@ -579,10 +590,10 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st std::vector<std::shared_ptr<Node>> mergingNodes = node->getParents(); // Compute the max scaling ... - float maxScaling = 0; + double maxScaling = 0; for (std::size_t i = 0; i < mergingNodes.size(); i++) { - float merginNodeScaling = scalingFactors[mergingNodes[i]->name()]; + double merginNodeScaling = scalingFactors[mergingNodes[i]->name()]; if (merginNodeScaling > maxScaling) { maxScaling = merginNodeScaling; } @@ -592,12 +603,12 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st for (std::shared_ptr<Node> mergingNode : mergingNodes) { - float mergingNodeScaling = scalingFactors[mergingNode->name()]; - float rescaling = mergingNodeScaling / maxScaling; + double mergingNodeScaling = scalingFactors[mergingNode->name()]; + double rescaling = mergingNodeScaling / maxScaling; std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name()); - float scaling_factor = getScalingFactor(scalingNode); + double scaling_factor = getScalingFactor(scalingNode); updateScalingFactor(scalingNode, scaling_factor * rescaling); } } @@ -735,8 +746,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ AIDGE_THROW_OR_ABORT(std::runtime_error,"Signs optimization can not be applied if network is not fully quantized ..."); } - float signedMax = (1 << (nbBits - 1)) - 1; - float unsignedMax = (1 << nbBits) - 1; + double signedMax = (1 << (nbBits - 1)) - 1; + double unsignedMax = (1 << nbBits) - 1; std::map<std::string, std::pair<bool, bool>> signMap; @@ -771,7 +782,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ if (nodeHasBias(node)) { bool inputIsUnsigned = signMap[node->name()].first; - float rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax; + double rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax; std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); @@ -783,7 +794,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ // Compensate the rescaling using the next Scaling node - float rescaling = 1.0 / signedMax; + double rescaling = 1.0 / signedMax; bool inputIsUnsigned = signMap[node->name()].first; bool outputIsUnsigned = signMap[node->name()].second; @@ -792,13 +803,13 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ rescaling *= outputIsUnsigned ? unsignedMax : signedMax; std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - float scaling_factor = getScalingFactor(scalingNode); + double scaling_factor = getScalingFactor(scalingNode); updateScalingFactor(scalingNode, scaling_factor * rescaling); } if (isMerging(node)) { - float rescaling = 1.0; + double rescaling = 1.0; bool inputIsUnsigned = signMap[node->name()].first; bool outputIsUnsigned = signMap[node->name()].second; @@ -808,9 +819,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - - float scaling_factor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode,scaling_factor * rescaling); + double scaling_factor = getScalingFactor(scalingNode); // XXX bad naming + updateScalingFactor(scalingNode, scaling_factor * rescaling); } // Handle the Scaling Nodes ... @@ -819,18 +829,19 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ { if (!noQuant) { - //[!!] replacement of Scaling Node by Quantizer - float currentSF = getScalingFactor(node); + // Replacement of Scaling Node by Quantizer + double currentSF = getScalingFactor(node); // XXX bad naming ! + + std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, -(signedMax + 1), signedMax, node->name()); - std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, - (signedMax + 1), signedMax, node->name()); - quantizerNode->getOperator()->setDataType(DataType::Float32); + quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) quantizerNode->getOperator()->setBackend("cpu"); - graphView->replace({node}, {quantizerNode}); + graphView->replace({node}, {quantizerNode}); if (optimizeSigns) { - float rescaling = 1.0; + double rescaling = 1.0; bool inputIsUnsigned = signMap[node->name()].first; bool outputIsUnsigned = signMap[node->name()].second; @@ -838,7 +849,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ rescaling /= inputIsUnsigned ? unsignedMax : signedMax; rescaling *= outputIsUnsigned ? unsignedMax : signedMax; - float scalingFactor = getScalingFactor(quantizerNode); + double scalingFactor = getScalingFactor(quantizerNode); updateScalingFactor(quantizerNode,scalingFactor * rescaling); if(outputIsUnsigned) @@ -854,7 +865,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits) { // XXX Use the signMap to increase the resolution when possible ... - float signedMax = (1 << (nbBits - 1)) - 1; + double signedMax = (1 << (nbBits - 1)) - 1; std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); @@ -874,7 +885,8 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); std::shared_ptr<Node> mulNode = Mul(mulNodeName); - mulNode->getOperator()->setDataType(DataType::Float32); + + mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) mulNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, mulNode, 0, 0, 0); @@ -882,10 +894,11 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u // create and insert the producer node std::shared_ptr<Tensor> inputTensor = std::static_pointer_cast<Tensor> (mulNode->getOperator()->getRawInput(0)); - std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(); - coeffTensor->setDataType(DataType::Float32); - coeffTensor->setBackend("cpu"); + + coeffTensor->setDataType(DataType::Float64); // getDataType(parentNode) + coeffTensor->setBackend("cpu"); + coeffTensor->resize(inputTensor->dims()); fillTensor(coeffTensor, 1); @@ -896,8 +909,8 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u // rescale the coeffs and edit scaling factor fillTensor(coeffTensor, signedMax); - float sf = getScalingFactor(node); - updateScalingFactor(node,sf/signedMax); + double sf = getScalingFactor(node); // XXX bad naming ! + updateScalingFactor(node, sf/signedMax); // TODO : double check this !!! //std::cout << getTensorAbsoluteMax(coeffTensor) << std::endl; @@ -906,9 +919,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u } } -void - -performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant) +void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant) { std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); @@ -919,13 +930,13 @@ performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQua { std::shared_ptr<Node> scalingNode = (*node->getChildren().begin()); - float base = getScalingFactor(scalingNode); + double base = getScalingFactor(scalingNode); - float approx = std::pow(2, std::ceil(std::log2(base))); + double approx = std::pow(2, std::ceil(std::log2(base))); updateScalingFactor(scalingNode,approx); - float ratio = base / approx; + double ratio = base / approx; std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); rescaleTensor(weightTensor, ratio); @@ -949,17 +960,46 @@ static void printScalingFactors(std::shared_ptr<GraphView> graphView) for (auto node : retrieveNodeVector(graphView)) if (node->type() == "Scaling") { - float factor = getScalingFactor(node); + double factor = getScalingFactor(node); Log::info(" {:.6f} ({})", factor, node->name()); } } +/* +std::string deduceBackend(std::shared_ptr<GraphView> graphView) +{ + std::string rootNodeBackend = graphView->getRootNode()->backend(); + for (auto node : graphView->getNodes()) + if (node->backend() != rootNodeBackend) + log::warn(" Multiple backend detected, setting all nodes to {}") +} +*/ + +static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType dataType) +{ + graphView->setDataType(dataType); + + for (auto inputNode : graphView->inputNodes()) { + auto op = std::static_pointer_cast<OperatorTensor>(inputNode->getOperator()); + auto inputTensor = op->getInput(0); + if (inputTensor) + inputTensor->setDataType(dataType); + } + + for (auto tensor : inputDataSet) + tensor->setDataType(dataType); +} + + void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) { Log::info(" === QUANT PTQ 0.2.21 === "); graphView->setBackend("cpu"); + DataType initialDataType = (inputDataSet[0])->dataType(); + setupDataType(graphView, inputDataSet, DataType::Float64); + if (!checkArchitecture(graphView)) return; @@ -975,8 +1015,22 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, normalizeParameters(graphView); Log::info(" Computing the value ranges ..."); - std::map<std::string, float> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda); + std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda); + + // XXX +/* + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + scheduler.generateScheduling(); + + auto scheduling = scheduler.getStaticScheduling(); + for (auto node : scheduling) + if (node->type() == "Scaling") + std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl; + std::cout << " RETURN " << std::endl; + return; +*/ Log::info(" Optimizing the clipping values ..."); valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose); @@ -992,32 +1046,34 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, insertCompensationNodes(graphView, nbBits); Log::info(" Performing the Single-Shift approximation ..."); - performSingleShiftApproximation(graphView,noQuant); + performSingleShiftApproximation(graphView, noQuant); } if (verbose) printScalingFactors(graphView); - Log::info(" Resetting the scheduler ..."); + Log::info(" Reseting the scheduler ..."); SequentialScheduler scheduler(graphView); scheduler.resetScheduling(); + setupDataType(graphView, inputDataSet, initialDataType); if (useCuda) graphView->setBackend("cuda"); Log::info(" Network is quantized !"); + } -std::map<std::string, float> getWeightRanges(std::shared_ptr<GraphView> graphView) +std::map<std::string, double> getWeightRanges(std::shared_ptr<GraphView> graphView) { - std::map<std::string, float> weightRanges; + std::map<std::string, double> weightRanges; for (std::shared_ptr<Node> node : graphView->getNodes()) { if (isAffine(node)) { std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - float range = getTensorAbsoluteMax(weightTensor); + double range = getTensorAbsoluteMax(weightTensor); weightRanges.insert(std::make_pair(node->name(), range)); } } diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp index 89590cb..d2423d0 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/PTQ/PTQMetaOps.cpp @@ -28,9 +28,9 @@ #include "aidge/utils/Types.h" #include "aidge/operator/Identity.hpp" #include "aidge/data/Tensor.hpp" -std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float clip_max,const std::string& name) +std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,double clip_max,const std::string& name) { - std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1>{scalingFactor}); + std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1>{scalingFactor}); std::shared_ptr<Aidge::Node> mul_node = Aidge::Mul((!name.empty()) ? name + "_MulQuant" : ""); std::shared_ptr<Aidge::Node> producer_scaling_factor = addProducer(mul_node,1,{1},"ScalingFactor"); @@ -48,27 +48,30 @@ std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float return metaopNode; } -std::shared_ptr<Aidge::Node> Scaling(float scalingFactor,const std::string& name) +std::shared_ptr<Aidge::Node> Scaling(double scalingFactor,const std::string& name) { - std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1>{scalingFactor}); - - std::shared_ptr<Aidge::Node> mul_node = Aidge::Mul((!name.empty()) ? name + "_Scaling" : ""); - - std::shared_ptr<Aidge::Node> producer_scaling_factor = addProducer(mul_node,1,{1},"ScalingFactor"); - producer_scaling_factor->getOperator()->setOutput(0, ScalingFactorTensorAttached); - std::shared_ptr<Aidge::GraphView> graph = Aidge::Sequential({mul_node}); - std::shared_ptr<Aidge::GraphView> connectedGV = getConnectedGraphView(mul_node); - Aidge::NodePtr metaopNode = MetaOperator("Scaling",connectedGV,{},name); + std::shared_ptr<Aidge::Tensor> scalingFactorTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scalingFactor}); + + std::shared_ptr<Aidge::Node> mulNode = Aidge::Mul((!name.empty()) ? name + "_Scaling" : ""); + + std::shared_ptr<Aidge::Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); + + std::shared_ptr<Aidge::GraphView> graphView = Aidge::Sequential({mulNode}); + std::shared_ptr<Aidge::GraphView> connectedGraphView = getConnectedGraphView(mulNode); + + Aidge::NodePtr metaopNode = MetaOperator("Scaling", connectedGraphView, {}, name); + return metaopNode; } -bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, float newScalingFactor) +bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor) { if(MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") { AIDGE_ASSERT("Cannot use updatePTQMetaOpsScalingFactor on Node of type {}", MetaOpNode->type()); } - std::shared_ptr<Aidge::Tensor> newScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1>{newScalingFactor}); + std::shared_ptr<Aidge::Tensor> newScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1>{newScalingFactor}); std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(MetaOpNode->getOperator()); std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Nodes inside PTQ Metaop Node for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List) @@ -82,7 +85,7 @@ bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, float newScali AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type()); return false; } -float getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode) +double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode) { if(MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") { @@ -97,13 +100,13 @@ float getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode) { std::shared_ptr<Aidge::Data> MulInput1Data = node->input(1).first->getOperator()->getRawOutput(0); void* RawInputScalingFactor = std::static_pointer_cast<Aidge::Tensor>(MulInput1Data)->getImpl()->rawPtr(); - return (*(static_cast<float*>(RawInputScalingFactor))); + return (*(static_cast<double*>(RawInputScalingFactor))); } } AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type()); return -1; } -bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode,float min, float max) +bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode,double min, double max) { if(QuantizerNode->type() != "Quantizer") { -- GitLab From 57239cf31424a7c5a8f0a5f5b6db2197d75655f4 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Wed, 18 Dec 2024 12:10:45 +0100 Subject: [PATCH 03/60] Switch back to float32 except for Producer --- src/PTQ/PTQ.cpp | 70 ++++++++++++++++++------------------------------- 1 file changed, 25 insertions(+), 45 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index bfc5e3f..2641dde 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -52,7 +52,7 @@ bool isMerging(std::shared_ptr<Node> node) bool checkArchitecture(std::shared_ptr<GraphView> graphView) { - std::set<std::string> otherNodeTypes({"Flatten", "Softmax", "BatchNorm2D", "ReLU", "Producer"}); + const std::set<std::string> otherNodeTypes({"Flatten", "Softmax", "BatchNorm2D", "ReLU", "Producer"}); for (std::shared_ptr<Node> node : graphView->getNodes()) { @@ -76,10 +76,10 @@ static void fillTensor(std::shared_ptr<Tensor> tensor, double value) castedTensor[i] = value; } -static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) { // Get the tensor data pointer - double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); + float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) @@ -89,20 +89,20 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) static void roundTensor(std::shared_ptr<Tensor> tensor) { // Get the tensor data pointer - double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); + float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) castedTensor[i] = std::nearbyint(castedTensor[i]);//Round } -static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) { // Get the tensor data pointer and edit it - double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr()); + float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr()); // Get the tensor absolute max value - double maxValue = 0.0f; + float maxValue = 0.0f; for(std::size_t i = 0; i < tensor->size(); ++i) { if(std::fabs(castedTensor[i]) > maxValue) { maxValue = std::fabs(castedTensor[i]); @@ -186,7 +186,7 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView) removeFlatten(graphView); bool containsBatchNorm = false; - std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) if (node->type() == "BatchNorm") @@ -213,7 +213,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) { // TODO: double check this ... - std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -234,7 +234,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); std::shared_ptr<Node> residualNode = Scaling(1.0, residualNodeName); - residualNode->getOperator()->setDataType(DataType::Float64); //getDataType(parentNode) + residualNode->getOperator()->setDataType(DataType::Float32); //getDataType(parentNode) residualNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, residualNode, i, 0, 0); @@ -256,16 +256,16 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) { insertResidualNodes(graphView); - std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); + const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); for (std::shared_ptr<Node> parentNode : nodeSet) { if (isAffine(parentNode) || isMerging(parentNode)) { - std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); + const std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName); - scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + scalingNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode) scalingNode->getOperator()->setBackend("cpu"); if (parentNode->getChildren().size() > 0) @@ -273,7 +273,7 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) // SCALING NODE INSERTION // We always have one output from Affine and Add nodes, but possibly multiple childs - std::vector<std::shared_ptr<Node>> nextNodes = parentNode->getChildren(0); + const std::vector<std::shared_ptr<Node>> nextNodes = parentNode->getChildren(0); // For each node in nextNodes store the connexion index std::vector<int> inputIndices(nextNodes.size()); @@ -331,7 +331,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) { // CREATE THE ACCUMULATED RATIO MAP /////////////////////////////////////// - std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); std::map<std::string, double> accumulatedRatios; for (std::shared_ptr<Node> node : nodeVector) @@ -429,7 +429,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView // Gather ranges ... - std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); + const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); for (std::shared_ptr<Node> node : nodeSet) { if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) @@ -449,7 +449,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda) { std::map<std::string, double> valueRanges; - std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); + const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); // std::shared_ptr<Node> inputNode = getFirstNode(graphView); @@ -527,7 +527,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st // CREATE THE SCALING FACTOR MAP ////////////////////////////////////////// - std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); std::map<std::string, double> scalingFactors; @@ -628,7 +628,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap // ITERATE OVER THE GRAPH - std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -672,7 +672,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap if (isMerging(node)) { - std::vector<std::shared_ptr<Node>> parentNodes = node->getParents(); + const std::vector<std::shared_ptr<Node>> parentNodes = node->getParents(); bool allParentAreSigned = true; bool allParentAreUnsigned = true; @@ -763,7 +763,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ // ITERATE OVER THE GRAPH ///////////////////////////////////////////////// - std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -834,7 +834,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, -(signedMax + 1), signedMax, node->name()); - quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + quantizerNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode) quantizerNode->getOperator()->setBackend("cpu"); graphView->replace({node}, {quantizerNode}); @@ -867,7 +867,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u // XXX Use the signMap to increase the resolution when possible ... double signedMax = (1 << (nbBits - 1)) - 1; - std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -886,7 +886,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); std::shared_ptr<Node> mulNode = Mul(mulNodeName); - mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + mulNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode) mulNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, mulNode, 0, 0, 0); @@ -921,7 +921,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant) { - std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -975,31 +975,12 @@ std::string deduceBackend(std::shared_ptr<GraphView> graphView) } */ -static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType dataType) -{ - graphView->setDataType(dataType); - - for (auto inputNode : graphView->inputNodes()) { - auto op = std::static_pointer_cast<OperatorTensor>(inputNode->getOperator()); - auto inputTensor = op->getInput(0); - if (inputTensor) - inputTensor->setDataType(dataType); - } - - for (auto tensor : inputDataSet) - tensor->setDataType(dataType); -} - - void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) { Log::info(" === QUANT PTQ 0.2.21 === "); graphView->setBackend("cpu"); - DataType initialDataType = (inputDataSet[0])->dataType(); - setupDataType(graphView, inputDataSet, DataType::Float64); - if (!checkArchitecture(graphView)) return; @@ -1056,7 +1037,6 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, SequentialScheduler scheduler(graphView); scheduler.resetScheduling(); - setupDataType(graphView, inputDataSet, initialDataType); if (useCuda) graphView->setBackend("cuda"); -- GitLab From fcb167c4c23733ddc200f8e4eccfb52e7188cc65 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Wed, 18 Dec 2024 14:12:21 +0000 Subject: [PATCH 04/60] Revert "Switch back to float32 except for Producer" This reverts commit 57239cf31424a7c5a8f0a5f5b6db2197d75655f4 --- src/PTQ/PTQ.cpp | 70 +++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 2641dde..bfc5e3f 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -52,7 +52,7 @@ bool isMerging(std::shared_ptr<Node> node) bool checkArchitecture(std::shared_ptr<GraphView> graphView) { - const std::set<std::string> otherNodeTypes({"Flatten", "Softmax", "BatchNorm2D", "ReLU", "Producer"}); + std::set<std::string> otherNodeTypes({"Flatten", "Softmax", "BatchNorm2D", "ReLU", "Producer"}); for (std::shared_ptr<Node> node : graphView->getNodes()) { @@ -76,10 +76,10 @@ static void fillTensor(std::shared_ptr<Tensor> tensor, double value) castedTensor[i] = value; } -static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) { // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) @@ -89,20 +89,20 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) static void roundTensor(std::shared_ptr<Tensor> tensor) { // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) castedTensor[i] = std::nearbyint(castedTensor[i]);//Round } -static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) { // Get the tensor data pointer and edit it - float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr()); // Get the tensor absolute max value - float maxValue = 0.0f; + double maxValue = 0.0f; for(std::size_t i = 0; i < tensor->size(); ++i) { if(std::fabs(castedTensor[i]) > maxValue) { maxValue = std::fabs(castedTensor[i]); @@ -186,7 +186,7 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView) removeFlatten(graphView); bool containsBatchNorm = false; - const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) if (node->type() == "BatchNorm") @@ -213,7 +213,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) { // TODO: double check this ... - const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -234,7 +234,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); std::shared_ptr<Node> residualNode = Scaling(1.0, residualNodeName); - residualNode->getOperator()->setDataType(DataType::Float32); //getDataType(parentNode) + residualNode->getOperator()->setDataType(DataType::Float64); //getDataType(parentNode) residualNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, residualNode, i, 0, 0); @@ -256,16 +256,16 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) { insertResidualNodes(graphView); - const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); + std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); for (std::shared_ptr<Node> parentNode : nodeSet) { if (isAffine(parentNode) || isMerging(parentNode)) { - const std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); + std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName); - scalingNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode) + scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) scalingNode->getOperator()->setBackend("cpu"); if (parentNode->getChildren().size() > 0) @@ -273,7 +273,7 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) // SCALING NODE INSERTION // We always have one output from Affine and Add nodes, but possibly multiple childs - const std::vector<std::shared_ptr<Node>> nextNodes = parentNode->getChildren(0); + std::vector<std::shared_ptr<Node>> nextNodes = parentNode->getChildren(0); // For each node in nextNodes store the connexion index std::vector<int> inputIndices(nextNodes.size()); @@ -331,7 +331,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) { // CREATE THE ACCUMULATED RATIO MAP /////////////////////////////////////// - const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); std::map<std::string, double> accumulatedRatios; for (std::shared_ptr<Node> node : nodeVector) @@ -429,7 +429,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView // Gather ranges ... - const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); + std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); for (std::shared_ptr<Node> node : nodeSet) { if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) @@ -449,7 +449,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda) { std::map<std::string, double> valueRanges; - const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); + std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); // std::shared_ptr<Node> inputNode = getFirstNode(graphView); @@ -527,7 +527,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st // CREATE THE SCALING FACTOR MAP ////////////////////////////////////////// - const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); std::map<std::string, double> scalingFactors; @@ -628,7 +628,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap // ITERATE OVER THE GRAPH - const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -672,7 +672,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap if (isMerging(node)) { - const std::vector<std::shared_ptr<Node>> parentNodes = node->getParents(); + std::vector<std::shared_ptr<Node>> parentNodes = node->getParents(); bool allParentAreSigned = true; bool allParentAreUnsigned = true; @@ -763,7 +763,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ // ITERATE OVER THE GRAPH ///////////////////////////////////////////////// - const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -834,7 +834,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, -(signedMax + 1), signedMax, node->name()); - quantizerNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode) + quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) quantizerNode->getOperator()->setBackend("cpu"); graphView->replace({node}, {quantizerNode}); @@ -867,7 +867,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u // XXX Use the signMap to increase the resolution when possible ... double signedMax = (1 << (nbBits - 1)) - 1; - const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -886,7 +886,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); std::shared_ptr<Node> mulNode = Mul(mulNodeName); - mulNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode) + mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) mulNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, mulNode, 0, 0, 0); @@ -921,7 +921,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant) { - const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -975,12 +975,31 @@ std::string deduceBackend(std::shared_ptr<GraphView> graphView) } */ +static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType dataType) +{ + graphView->setDataType(dataType); + + for (auto inputNode : graphView->inputNodes()) { + auto op = std::static_pointer_cast<OperatorTensor>(inputNode->getOperator()); + auto inputTensor = op->getInput(0); + if (inputTensor) + inputTensor->setDataType(dataType); + } + + for (auto tensor : inputDataSet) + tensor->setDataType(dataType); +} + + void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) { Log::info(" === QUANT PTQ 0.2.21 === "); graphView->setBackend("cpu"); + DataType initialDataType = (inputDataSet[0])->dataType(); + setupDataType(graphView, inputDataSet, DataType::Float64); + if (!checkArchitecture(graphView)) return; @@ -1037,6 +1056,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, SequentialScheduler scheduler(graphView); scheduler.resetScheduling(); + setupDataType(graphView, inputDataSet, initialDataType); if (useCuda) graphView->setBackend("cuda"); -- GitLab From f8be53be11be7efcfae5bfef3caa8533098d0bbf Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 20 Dec 2024 12:33:50 +0000 Subject: [PATCH 05/60] fix the scaling factor getter --- src/PTQ/PTQMetaOps.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp index d2423d0..69b5dd4 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/PTQ/PTQMetaOps.cpp @@ -28,6 +28,9 @@ #include "aidge/utils/Types.h" #include "aidge/operator/Identity.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/operator/OperatorTensor.hpp" + + std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,double clip_max,const std::string& name) { std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1>{scalingFactor}); @@ -96,12 +99,17 @@ double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode) std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Nodes inside PTQ Metaop Node for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List) { - if(node->type() == "Mul") - { - std::shared_ptr<Aidge::Data> MulInput1Data = node->input(1).first->getOperator()->getRawOutput(0); - void* RawInputScalingFactor = std::static_pointer_cast<Aidge::Tensor>(MulInput1Data)->getImpl()->rawPtr(); - return (*(static_cast<double*>(RawInputScalingFactor))); - } + if(node->type() == "Mul") + { + //auto tensor = std::static_pointer_cast<Aidge::OperatorTensor>(node->getOperator())->getInput(1); + //bool useFloat = tensor->dataType() == Aidge::DataType::Float32; + //return useFloat ? tensor->get<float>(0) : tensor->get<double>(0); + + auto tensor = std::static_pointer_cast<Aidge::OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Aidge::Tensor> fallback; + const Aidge::Tensor& scalingFactorTensor = tensor->refCastFrom(fallback, Aidge::DataType::Float64, "cpu"); + return scalingFactorTensor.get<double>(0); + } } AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type()); return -1; -- GitLab From e854b9768c6078c2b4e2dac7b74b9d9c267027cf Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 20 Dec 2024 12:35:22 +0000 Subject: [PATCH 06/60] fix the histogram bin computation --- src/PTQ/Clipping.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp index f8765f3..57ad7a8 100644 --- a/src/PTQ/Clipping.cpp +++ b/src/PTQ/Clipping.cpp @@ -132,9 +132,10 @@ double computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, double clippingErrors.push_back(accumulatedError); } - std::vector<double>::iterator it = std::min_element(clippingErrors.begin(), clippingErrors.end()); - double bestClipping = static_cast<double> (std::distance(clippingErrors.begin(), it)) / static_cast<double> (nbIter); - + std::vector<double>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end()); + int bestBin = static_cast<int> (std::distance(clippingErrors.begin(), it)) + 1; + double bestClipping = static_cast<double> (bestBin) / static_cast<double> (nbIter); + return bestClipping; } @@ -199,7 +200,8 @@ double computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) } std::vector<double>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end()); - double bestClipping = static_cast<double> (std::distance(clippingErrors.begin(), it)) / static_cast<double> (nbIter); + int bestBin = static_cast<int> (std::distance(clippingErrors.begin(), it)) + 1; + double bestClipping = (static_cast<double> (bestBin)) / static_cast<double> (nbIter); return bestClipping; } -- GitLab From f82754691c9848c28c9cf1ccf57803b1bdf2ca84 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 20 Dec 2024 12:39:48 +0000 Subject: [PATCH 07/60] remove commented code --- src/PTQ/PTQ.cpp | 58 ++++++++++++++++++++---------------------- src/PTQ/PTQMetaOps.cpp | 4 --- 2 files changed, 28 insertions(+), 34 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index bfc5e3f..ffd5044 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -925,7 +925,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool for (std::shared_ptr<Node> node : nodeVector) { - //Use A meatoperator of type Scaling of MulCompensation instead + // Use A meatoperator of type Scaling of MulCompensation instead if (isAffine(node) || (node->type() == "Mul")) { std::shared_ptr<Node> scalingNode = (*node->getChildren().begin()); @@ -958,23 +958,13 @@ static void printScalingFactors(std::shared_ptr<GraphView> graphView) { Log::info(" === SCALING FACTORS === "); for (auto node : retrieveNodeVector(graphView)) - if (node->type() == "Scaling") + if (node->type() == "Scaling" || node->type() == "Quantizer") { double factor = getScalingFactor(node); Log::info(" {:.6f} ({})", factor, node->name()); } } -/* -std::string deduceBackend(std::shared_ptr<GraphView> graphView) -{ - std::string rootNodeBackend = graphView->getRootNode()->backend(); - for (auto node : graphView->getNodes()) - if (node->backend() != rootNodeBackend) - log::warn(" Multiple backend detected, setting all nodes to {}") -} -*/ - static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType dataType) { graphView->setDataType(dataType); @@ -990,6 +980,17 @@ static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std: tensor->setDataType(dataType); } +static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges) +{ + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + scheduler.generateScheduling(); + + auto scheduling = scheduler.getStaticScheduling(); + for (auto node : scheduling) + if (node->type() == "Scaling") + std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl; +} void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) { @@ -1017,23 +1018,15 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, Log::info(" Computing the value ranges ..."); std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda); - // XXX -/* - SequentialScheduler scheduler(graphView); - scheduler.resetScheduling(); - scheduler.generateScheduling(); - - auto scheduling = scheduler.getStaticScheduling(); - for (auto node : scheduling) - if (node->type() == "Scaling") - std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl; + //std::cout << " === RANGES (BEFORE ADJUST) ===" << std::endl; + //printRanges(graphView, valueRanges); - std::cout << " RETURN " << std::endl; - return; -*/ Log::info(" Optimizing the clipping values ..."); valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose); + //std::cout << " === RANGES (AFTER ADJUST) ===" << std::endl; + //printRanges(graphView, valueRanges); + Log::info(" Normalizing the activations ..."); normalizeActivations(graphView, valueRanges); @@ -1048,20 +1041,25 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, Log::info(" Performing the Single-Shift approximation ..."); performSingleShiftApproximation(graphView, noQuant); } - + if (verbose) printScalingFactors(graphView); - Log::info(" Reseting the scheduler ..."); - SequentialScheduler scheduler(graphView); - scheduler.resetScheduling(); + //std::cout << " === SCALINGS (BEFORE CAST) ===" << std::endl; + //printScalingFactors(graphView); setupDataType(graphView, inputDataSet, initialDataType); if (useCuda) graphView->setBackend("cuda"); - Log::info(" Network is quantized !"); + //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; + //printScalingFactors(graphView); + + Log::info(" Reseting the scheduler ..."); + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + Log::info(" Network is quantized !"); } std::map<std::string, double> getWeightRanges(std::shared_ptr<GraphView> graphView) diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp index 69b5dd4..d2bc184 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/PTQ/PTQMetaOps.cpp @@ -101,10 +101,6 @@ double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode) { if(node->type() == "Mul") { - //auto tensor = std::static_pointer_cast<Aidge::OperatorTensor>(node->getOperator())->getInput(1); - //bool useFloat = tensor->dataType() == Aidge::DataType::Float32; - //return useFloat ? tensor->get<float>(0) : tensor->get<double>(0); - auto tensor = std::static_pointer_cast<Aidge::OperatorTensor>(node->getOperator())->getInput(1); std::shared_ptr<Aidge::Tensor> fallback; const Aidge::Tensor& scalingFactorTensor = tensor->refCastFrom(fallback, Aidge::DataType::Float64, "cpu"); -- GitLab From 115852970bb7386305cc10b866dc75c6ff4b3e4b Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 20 Dec 2024 16:03:48 +0000 Subject: [PATCH 08/60] improve code quality --- include/aidge/quantization/PTQ/PTQMetaOps.hpp | 12 +- python_binding/pybind_PTQ.cpp | 2 +- src/PTQ/PTQ.cpp | 42 ++--- src/PTQ/PTQMetaOps.cpp | 166 ++++++++++-------- 4 files changed, 123 insertions(+), 99 deletions(-) diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/quantization/PTQ/PTQMetaOps.hpp index 29bb7f2..62fac87 100644 --- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp +++ b/include/aidge/quantization/PTQ/PTQMetaOps.hpp @@ -25,6 +25,8 @@ #include "aidge/graph/OpArgs.hpp" // Sequential #include "aidge/operator/MetaOperator.hpp" +namespace Aidge { + /// @brief Quantizer acts as a meta-operator to handle scaling operations in the PTQ, replacing the Scaling Operator. /// This operator is composed of a sequence of [Mul] -> [Clip] -> [Round] operations. /// @@ -33,7 +35,7 @@ /// @param clip_max The maximum value for the clip operation. /// @param name The name of the meta-operator node created. /// @return A shared pointer to an instance of the meta-operator node. -std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,double clip_max,const std::string& name); +std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name); /// @brief The purpose of Scaling is to encapsulate the Mul operator and tag it as a PTQ node rather than a regular Mul operator. /// Therefore, this meta-operator consists solely of a [Mul] operation. @@ -41,7 +43,7 @@ std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,dou /// @param scalingFactor The scaling factor to apply to the input (a scalar to multiply the input with). /// @param name The name of the meta-operator node created. /// @return A shared pointer to an instance of the scaling node. -std::shared_ptr<Aidge::Node> Scaling(double scalingFactor,const std::string& name = ""); +std::shared_ptr<Aidge::Node> Scaling(double scalingFactor, const std::string& name = ""); /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter. /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation. @@ -50,7 +52,7 @@ std::shared_ptr<Aidge::Node> Scaling(double scalingFactor,const std::string& nam /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor will be updated. /// @param newScalingFactor The new scaling factor to apply to the meta-operator node. /// @return True if the scaling factor was successfully updated, false if the operation failed (e.g., if MetaOpNode is null or incompatible). -bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor); +void updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor); /// @brief Retrieves the current scaling factor of a PTQ meta-operator node. /// This function returns the scaling factor associated with the specified PTQ meta-operator node, @@ -69,6 +71,8 @@ double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode); /// @param min The minimum value for the clip range. Values below this will be clipped to this minimum. /// @param max The maximum value for the clip range. Values above this will be clipped to this maximum. /// @return True if the clip range was successfully set, false if the operation failed (e.g., if QuantizerNode is null). -bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, double min, double max); +void setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, double min, double max); + +} #endif /* AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ */ diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index 73b217d..195c0bf 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -220,7 +220,7 @@ void init_PTQ(py::module &m) { :type network: :py:class:`aidge_core.GraphView` )mydelimiter"); - m.def("prepare_network", &prepareNetwork, py::arg("network"), "prepare the network fo the PTQ"); + m.def("prepare_network", &prepareNetwork, py::arg("network"), "prepare the network for the PTQ"); } diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index ffd5044..4c5d1d1 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -407,8 +407,9 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); - double scaling_factor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode,scaling_factor / rescaling); + double currScalingFactor = getScalingFactor(scalingNode); + updateScalingFactor(scalingNode, currScalingFactor / rescaling); + accumulatedRatios[mergingNode->name()] /= rescaling; // optional ... } } @@ -565,8 +566,8 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st // ValueRanges must contains all the scaling nodes !!! double scalingFactor = valueRanges[node->name()]; - double scaling_factor = getScalingFactor(node); - updateScalingFactor(node, (scaling_factor) / (scalingFactor / prevScalingFactor)); + double currScalingFactor = getScalingFactor(node); + updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor)); scalingFactors[node->name()] = scalingFactor; @@ -608,8 +609,9 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name()); - double scaling_factor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode, scaling_factor * rescaling); + + double currScalingFactor = getScalingFactor(scalingNode); + updateScalingFactor(scalingNode, currScalingFactor * rescaling); } } } @@ -803,8 +805,9 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ rescaling *= outputIsUnsigned ? unsignedMax : signedMax; std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - double scaling_factor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode, scaling_factor * rescaling); + + double currScalingFactor = getScalingFactor(scalingNode); + updateScalingFactor(scalingNode, currScalingFactor * rescaling); } if (isMerging(node)) @@ -819,8 +822,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - double scaling_factor = getScalingFactor(scalingNode); // XXX bad naming - updateScalingFactor(scalingNode, scaling_factor * rescaling); + double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming + updateScalingFactor(scalingNode, currScalingFactor * rescaling); } // Handle the Scaling Nodes ... @@ -829,11 +832,9 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ { if (!noQuant) { - // Replacement of Scaling Node by Quantizer - double currentSF = getScalingFactor(node); // XXX bad naming ! - - std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, -(signedMax + 1), signedMax, node->name()); + // Replace the Scaling Node by Quantizer + std::shared_ptr<Node> quantizerNode = Quantizer(getScalingFactor(node), -(signedMax + 1), signedMax, node->name()); quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) quantizerNode->getOperator()->setBackend("cpu"); @@ -849,8 +850,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ rescaling /= inputIsUnsigned ? unsignedMax : signedMax; rescaling *= outputIsUnsigned ? unsignedMax : signedMax; - double scalingFactor = getScalingFactor(quantizerNode); - updateScalingFactor(quantizerNode,scalingFactor * rescaling); + double currScalingFactor = getScalingFactor(quantizerNode); + updateScalingFactor(quantizerNode, currScalingFactor * rescaling); if(outputIsUnsigned) { @@ -909,8 +910,9 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u // rescale the coeffs and edit scaling factor fillTensor(coeffTensor, signedMax); - double sf = getScalingFactor(node); // XXX bad naming ! - updateScalingFactor(node, sf/signedMax); + + double currScalingFactor = getScalingFactor(node); // XXX bad naming ! + updateScalingFactor(node, currScalingFactor / signedMax); // TODO : double check this !!! //std::cout << getTensorAbsoluteMax(coeffTensor) << std::endl; @@ -960,8 +962,8 @@ static void printScalingFactors(std::shared_ptr<GraphView> graphView) for (auto node : retrieveNodeVector(graphView)) if (node->type() == "Scaling" || node->type() == "Quantizer") { - double factor = getScalingFactor(node); - Log::info(" {:.6f} ({})", factor, node->name()); + double scalingFactor = getScalingFactor(node); + Log::info(" {:.6f} ({})", scalingFactor, node->name()); } } diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp index d2bc184..152a3b0 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/PTQ/PTQMetaOps.cpp @@ -29,106 +29,124 @@ #include "aidge/operator/Identity.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/OperatorTensor.hpp" +#include "aidge/utils/Log.hpp" -std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,double clip_max,const std::string& name) +namespace Aidge { - std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1>{scalingFactor}); - std::shared_ptr<Aidge::Node> mul_node = Aidge::Mul((!name.empty()) ? name + "_MulQuant" : ""); - std::shared_ptr<Aidge::Node> producer_scaling_factor = addProducer(mul_node,1,{1},"ScalingFactor"); - producer_scaling_factor ->getOperator()->setOutput(0,ScalingFactorTensorAttached); - - std::shared_ptr<Aidge::Node> clip_node = Aidge::Clip((!name.empty()) ? name + "_ClipQuant" : "",clip_min,clip_max); +std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name) +{ + // create the nodes + + std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_MulQuant" : ""); + std::shared_ptr<Node> roundNode = Round((!name.empty()) ? name + "_RoundQuant" : ""); + std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_ClipQuant" : "", clipMin, clipMax); + + // connect the scaling factor producer + + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); + std::shared_ptr<Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); - std::shared_ptr<Aidge::GraphView> graph = Aidge::Sequential({ - mul_node, - Aidge::Round((!name.empty()) ? name + "_RoundQuant" : ""), - clip_node}); + // create the metaop graph + + std::shared_ptr<GraphView> graphView = Sequential({mulNode, roundNode, clipNode}); + std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ??? + + // return the metaop + + std::shared_ptr<Node> metaopNode = MetaOperator("Quantizer", connectedGraphView, {}, name); // XXX alternative prototype - std::shared_ptr<Aidge::GraphView> connectedGV = getConnectedGraphView(mul_node); - std::shared_ptr<Aidge::Node> metaopNode = MetaOperator("Quantizer",connectedGV,{},name); return metaopNode; } -std::shared_ptr<Aidge::Node> Scaling(double scalingFactor,const std::string& name) +std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name) { - std::shared_ptr<Aidge::Tensor> scalingFactorTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scalingFactor}); + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); - std::shared_ptr<Aidge::Node> mulNode = Aidge::Mul((!name.empty()) ? name + "_Scaling" : ""); + std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_Scaling" : ""); - std::shared_ptr<Aidge::Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); + std::shared_ptr<Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); - std::shared_ptr<Aidge::GraphView> graphView = Aidge::Sequential({mulNode}); - std::shared_ptr<Aidge::GraphView> connectedGraphView = getConnectedGraphView(mulNode); + std::shared_ptr<GraphView> graphView = Sequential({mulNode}); + std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); - Aidge::NodePtr metaopNode = MetaOperator("Scaling", connectedGraphView, {}, name); + NodePtr metaopNode = MetaOperator("Scaling", connectedGraphView, {}, name); return metaopNode; } -bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor) +static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType) { - if(MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") - { - AIDGE_ASSERT("Cannot use updatePTQMetaOpsScalingFactor on Node of type {}", MetaOpNode->type()); - } - std::shared_ptr<Aidge::Tensor> newScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1>{newScalingFactor}); - std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(MetaOpNode->getOperator()); - std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Nodes inside PTQ Metaop Node - for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List) - { - if(node->type() == "Mul") - { - node->input(1).first->getOperator()->setOutput(0, newScalingFactorTensorAttached); - return true; - } - } - AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type()); - return false; + std::shared_ptr<Node> mulNode = nullptr; + for(std::shared_ptr<Node> node : graphView->getNodes()) + if (node->type() == nodeType) + mulNode = node; + + return mulNode; } -double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode) + +void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) { - if(MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") - { - AIDGE_ASSERT("Cannot use getPTQMetaOpsScalingFactor on Node of type {}",MetaOpNode->type()); - return -1; + if(metaOpNode->type() != "Scaling" && metaOpNode->type() != "Quantizer") + Log::warn(" Cannot update the scaling factor on Node of type {}", metaOpNode->type()); + + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); + + std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(metaOpNode->getOperator()); + + std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); + + if (!mulNode) + Log::warn(" Invalid PTQ MetaOperator, no Mul node found inside ! "); + + mulNode->input(1).first->getOperator()->setOutput(0, scalingFactorTensor); +} + +double getScalingFactor(std::shared_ptr<Node> MetaOpNode) +{ + if (MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") { + Log::warn(" Cannot get the scaling factor on Node of type {}", MetaOpNode->type()); + return 0; } - std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(MetaOpNode->getOperator()); - std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Nodes inside PTQ Metaop Node - for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List) - { - if(node->type() == "Mul") - { - auto tensor = std::static_pointer_cast<Aidge::OperatorTensor>(node->getOperator())->getInput(1); - std::shared_ptr<Aidge::Tensor> fallback; - const Aidge::Tensor& scalingFactorTensor = tensor->refCastFrom(fallback, Aidge::DataType::Float64, "cpu"); - return scalingFactorTensor.get<double>(0); - } + + std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(MetaOpNode->getOperator()); + + std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); + + if (!mulNode) { + Log::warn(" Invalid PTQ MetaOperator, no Mul found inside node of type {}", MetaOpNode->type()); + return 0; } - AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type()); - return -1; + + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + + return localTensor.get<double>(0); } -bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode,double min, double max) + + +void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max) { - if(QuantizerNode->type() != "Quantizer") - { - AIDGE_ASSERT("Cannot use setQuantizerClipRange on Node of type {}",QuantizerNode->type()); - return false; + if (quantizerNode->type() != "Quantizer") { + Log::warn(" Cannot set the clipping range on Node of type {}", quantizerNode->type()); + return; } - std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(QuantizerNode->getOperator()); - std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Node inside - for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List) - { - if(node->type() == "Clip") - { - std::shared_ptr<Aidge::Clip_Op> Clip_Node_Op = std::static_pointer_cast<Aidge::Clip_Op>(node->getOperator()); - Clip_Node_Op->max() = max; - Clip_Node_Op->min() = min; - return true; - } + + std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op> (quantizerNode->getOperator()); + + std::shared_ptr<Node> clipNode = getSubNode(metaOp->getMicroGraph(), "Clip"); + + if (!clipNode) { + Log::warn(" Invalid PTQ MetaOperator, no Clip found inside node of type {}", quantizerNode->type()); + return; } - AIDGE_ASSERT("Invalid MetaOperator Quantizer, no clip node found inside Node of type {}",QuantizerNode->type()); - return false; + + std::shared_ptr<Clip_Op> clipOp = std::static_pointer_cast<Clip_Op>(clipNode->getOperator()); + clipOp->max() = max; + clipOp->min() = min; +} } \ No newline at end of file -- GitLab From 579d9320830d8353bc216d56c93b5f6385f082e2 Mon Sep 17 00:00:00 2001 From: cmoineau <cyril.moineau@cea.fr> Date: Wed, 11 Dec 2024 10:08:53 +0000 Subject: [PATCH 09/60] Update quantization with https://gitlab.eclipse.org/eclipse/aidge/aidge_core/-/merge_requests/277 --- .gitignore | 3 +- CMakeLists.txt | 26 +++++++++++++ include/aidge/quantization_version.h | 11 ++++++ .../sys_info/QuantizationVersionInfo.hpp | 38 +++++++++++++++++++ include/aidge/version.h.in | 11 ++++++ pyproject.toml | 18 +++++---- python_binding/pybind_Quantization.cpp | 6 ++- .../pybind_QuantizationVersionInfo.cpp | 11 ++++++ setup.cfg | 3 ++ 9 files changed, 115 insertions(+), 12 deletions(-) create mode 100644 include/aidge/quantization_version.h create mode 100644 include/aidge/utils/sys_info/QuantizationVersionInfo.hpp create mode 100644 include/aidge/version.h.in create mode 100644 python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp create mode 100644 setup.cfg diff --git a/.gitignore b/.gitignore index 18f1583..ba5c593 100644 --- a/.gitignore +++ b/.gitignore @@ -4,17 +4,16 @@ # C++ Build build*/ install*/ +include/aidge/backend/quantization_version.h # VSCode .vscode # Python -aidge_quantization/_version.py *.so __pycache__ *.pyc *.egg-info -aidge_quantization/_version.py wheelhouse/* # Mermaid diff --git a/CMakeLists.txt b/CMakeLists.txt index 905a2a2..7a2b168 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,13 +5,39 @@ set(CXX_STANDARD 14) file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version) file(STRINGS "${CMAKE_SOURCE_DIR}/project_name.txt" project) +# Parse version.txt to retrieve Major, Minor and Path +string(REGEX MATCH "([0-9]+)\\.([0-9]+)\\.([0-9]+)" _ MATCHES ${version}) +set(PROJECT_VERSION_MAJOR ${CMAKE_MATCH_1}) +set(PROJECT_VERSION_MINOR ${CMAKE_MATCH_2}) +set(PROJECT_VERSION_PATCH ${CMAKE_MATCH_3}) + +# Retrieve latest git commit +execute_process( + COMMAND git rev-parse --short HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_HASH + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET +) + message(STATUS "Project name: ${project}") message(STATUS "Project version: ${version}") +message(STATUS "Latest git commit: ${GIT_COMMIT_HASH}") + +message(STATUS "Creating ${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h") project(${project} VERSION ${version} DESCRIPTION "Quantization methods for the Aidge framework." LANGUAGES CXX) +# Note: Using configure_file later in the code make so that version variables are lost... +# I tried to set in internal cache but it failed. +# Current code is working, but there might be a scope issue. +# Generate version.h file from config file version.h.in +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/version.h.in" + "${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h" +) # Note : project name is {project} and python module name is also {project} set(module_name _${project}) # target name diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h new file mode 100644 index 0000000..546263a --- /dev/null +++ b/include/aidge/quantization_version.h @@ -0,0 +1,11 @@ +#ifndef VERSION_H +#define VERSION_H + +namespace Aidge { +static constexpr const int PROJECT_VERSION_MAJOR = 0; +static constexpr const int PROJECT_VERSION_MINOR = 2; +static constexpr const int PROJECT_VERSION_PATCH = 0; +static constexpr const char * PROJECT_VERSION = "0.2.0"; +static constexpr const char * PROJECT_GIT_HASH = "f50c860"; +} +#endif // VERSION_H diff --git a/include/aidge/utils/sys_info/QuantizationVersionInfo.hpp b/include/aidge/utils/sys_info/QuantizationVersionInfo.hpp new file mode 100644 index 0000000..6b4deb8 --- /dev/null +++ b/include/aidge/utils/sys_info/QuantizationVersionInfo.hpp @@ -0,0 +1,38 @@ +#ifndef AIDGE_UTILS_SYS_INFO_OPENCV_VERSION_INFO_H +#define AIDGE_UTILS_SYS_INFO_OPENCV_VERSION_INFO_H + +#include "aidge/utils/Log.hpp" +#include "aidge/quantization_version.h" + +namespace Aidge { + +constexpr inline const char * getQuantizationProjectVersion(){ + return PROJECT_VERSION; +} + +constexpr inline const char * getQuantizationGitHash(){ + return PROJECT_GIT_HASH; +} + +void showQuantizationVersion() { + Log::info("Aidge quantization: {} ({}), {} {}", getQuantizationProjectVersion(), getQuantizationGitHash(), __DATE__, __TIME__); + // Compiler version + #if defined(__clang__) + /* Clang/LLVM. ---------------------------------------------- */ + Log::info("Clang/LLVM compiler version: {}.{}.{}\n", __clang_major__ , __clang_minor__, __clang_patchlevel__); + #elif defined(__ICC) || defined(__INTEL_COMPILER) + /* Intel ICC/ICPC. ------------------------------------------ */ + Log::info("Intel ICC/ICPC compiler version: {}\n", __INTEL_COMPILER); + #elif defined(__GNUC__) || defined(__GNUG__) + /* GNU GCC/G++. --------------------------------------------- */ + Log::info("GNU GCC/G++ compiler version: {}.{}.{}", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); + #elif defined(_MSC_VER) + /* Microsoft Visual Studio. --------------------------------- */ + Log::info("Microsoft Visual Studio compiler version: {}\n", _MSC_VER); + #else + Log::info("Unknown compiler\n"); + #endif + +} +} // namespace Aidge +#endif // AIDGE_UTILS_SYS_INFO_OPENCV_VERSION_INFO_H diff --git a/include/aidge/version.h.in b/include/aidge/version.h.in new file mode 100644 index 0000000..4b876f6 --- /dev/null +++ b/include/aidge/version.h.in @@ -0,0 +1,11 @@ +#ifndef VERSION_H +#define VERSION_H + +namespace Aidge { +static constexpr const int PROJECT_VERSION_MAJOR = @PROJECT_VERSION_MAJOR@; +static constexpr const int PROJECT_VERSION_MINOR = @PROJECT_VERSION_MINOR@; +static constexpr const int PROJECT_VERSION_PATCH = @PROJECT_VERSION_PATCH@; +static constexpr const char * PROJECT_VERSION = "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@"; +static constexpr const char * PROJECT_GIT_HASH = "@GIT_COMMIT_HASH@"; +} +#endif // VERSION_H diff --git a/pyproject.toml b/pyproject.toml index fc745eb..deb91c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,18 +11,24 @@ classifiers = [ "Development Status :: 2 - Pre-Alpha", "Programming Language :: Python :: 3" ] -dynamic = ["version"] #Â defined in tool.setuptools_scm -# version="1" +dynamic = ["version"] #Â defined by pbr [build-system] requires = [ "setuptools>=64", - "setuptools_scm[toml]==7.1.0", "cmake>=3.15.3.post1", - "toml" + "toml", + "pbr" ] build-backend = "setuptools.build_meta" +[project.urls] +Homepage = "https://www.deepgreen.ai/en/platform" +Documentation = "https://eclipse-aidge.readthedocs.io/en/latest/" +Repository = "https://gitlab.eclipse.org/eclipse/aidge/aidge_quantization" +Issues = "https://gitlab.eclipse.org/eclipse/aidge/aidge_quantization/-/issues/" +Changelog = "https://gitlab.eclipse.org/eclipse/aidge/aidge_quantization/-/releases" + ##################################################### # SETUPTOOLS [tool.setuptools] @@ -35,10 +41,6 @@ exclude = [ "aidge_quantization.unit_tests.assets" ] # exclude packages matching these glob patterns (empty by default) -# SETUPTOOLS_SCM -[tool.setuptools_scm] -write_to = "aidge_quantization/_version.py" - ##################################################### # CIBUILDWHEEL [tool.cibuildwheel] diff --git a/python_binding/pybind_Quantization.cpp b/python_binding/pybind_Quantization.cpp index cd18cf8..7ac344d 100644 --- a/python_binding/pybind_Quantization.cpp +++ b/python_binding/pybind_Quantization.cpp @@ -20,7 +20,7 @@ namespace py = pybind11; -namespace Aidge +namespace Aidge { // operators @@ -35,8 +35,9 @@ void init_QAT_FixedQ(py::module &m); void init_QAT_LSQ(py::module &m); void init_QuantRecipes(py::module &m); +void init_QuantizationVersionInfo(py::module &m); -PYBIND11_MODULE(aidge_quantization, m) +PYBIND11_MODULE(aidge_quantization, m) { init_FixedQ(m); init_LSQ(m); @@ -47,6 +48,7 @@ PYBIND11_MODULE(aidge_quantization, m) init_QAT_FixedQ(m); init_QAT_LSQ(m); init_QuantRecipes(m); + init_QuantizationVersionInfo(m); } } // namespace Aidge diff --git a/python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp b/python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp new file mode 100644 index 0000000..abed12b --- /dev/null +++ b/python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp @@ -0,0 +1,11 @@ +#include <pybind11/pybind11.h> +#include "aidge/utils/sys_info/QuantizationVersionInfo.hpp" + +namespace py = pybind11; +namespace Aidge { +void init_QuantizationVersionInfo(py::module& m){ + m.def("show_version", &showQuantizationVersion); + m.def("get_project_version", &getQuantizationProjectVersion); + m.def("get_git_hash", &getQuantizationGitHash); +} +} diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..aa0f227 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,3 @@ +# pbr file +[metadata] +version = file: version.txt -- GitLab From d3798ad61a45abdbf67238fe1d749b58d98e6464 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 3 Jan 2025 16:10:53 +0000 Subject: [PATCH 10/60] set the LSQ op backward kernels to gradient accumulation mode --- .../aidge/backend/cpu/operator/LSQImpl_kernels.hpp | 12 ++++++------ src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu | 13 ++++++++----- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp b/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp index ddb8209..1ed05e2 100644 --- a/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp @@ -67,16 +67,16 @@ void LSQImpl_cpu_backward_kernel(const std::size_t inputLength, const GI fullPrecScale_4 = input[4*i+3] / stepSize[0]; /*****************Features Gradient Computation********************/ // STE method is simply applied - grad_input[4*i] = grad_output[4*i]*((fullPrecScale_1 <= static_cast<GI>(range.first)) ? GI(0.0) : + grad_input[4*i] += grad_output[4*i]*((fullPrecScale_1 <= static_cast<GI>(range.first)) ? GI(0.0) : (fullPrecScale_1 >= static_cast<GI>(range.second)) ? GI(0.0) : GI(1.0)); - grad_input[4*i+1] = grad_output[4*i+1]*((fullPrecScale_2 <= static_cast<GI>(range.first)) ? GI(0.0) : + grad_input[4*i+1] += grad_output[4*i+1]*((fullPrecScale_2 <= static_cast<GI>(range.first)) ? GI(0.0) : (fullPrecScale_2 >= static_cast<GI>(range.second)) ? GI(0.0) : GI(1.0)); - grad_input[4*i+2] = grad_output[4*i+2]*((fullPrecScale_3 <= static_cast<GI>(range.first)) ? GI(0.0) : + grad_input[4*i+2] += grad_output[4*i+2]*((fullPrecScale_3 <= static_cast<GI>(range.first)) ? GI(0.0) : (fullPrecScale_3 >= static_cast<GI>(range.second)) ? GI(0.0) : GI(1.0)); - grad_input[4*i+3] = grad_output[4*i+3]*((fullPrecScale_4 <= static_cast<GI>(range.first)) ? GI(0.0) : + grad_input[4*i+3] += grad_output[4*i+3]*((fullPrecScale_4 <= static_cast<GI>(range.first)) ? GI(0.0) : (fullPrecScale_4 >= static_cast<GI>(range.second)) ? GI(0.0) : GI(1.0)); @@ -105,7 +105,7 @@ void LSQImpl_cpu_backward_kernel(const std::size_t inputLength, // Process remaining for(unsigned int i=inputLength-inputLength%4; i<inputLength; ++i) { const GI fullPrecScale = input[i] / stepSize[0]; - grad_input[i] = grad_output[i]*((fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) : + grad_input[i] += grad_output[i]*((fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) : (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) : GI(1.0)); GI qData = fullPrecScale; @@ -117,7 +117,7 @@ void LSQImpl_cpu_backward_kernel(const std::size_t inputLength, const GI gradScaleFactor = static_cast<GI>(1.0f / std::sqrt(inputLength * range.second)); // 3rd: Multiply Step Size gradient with scale factor - grad_stepSize[0] = diffStepSize * gradScaleFactor; + grad_stepSize[0] += diffStepSize * gradScaleFactor; } diff --git a/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu b/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu index 0d54909..96065e4 100644 --- a/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu +++ b/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu @@ -84,10 +84,11 @@ __global__ void LSQImpl_cuda_backward_kernel_(const std::size_t inputLength, const GI fullPrecScale = input[i] / stepSize[0]; /*****************************Data/Weights Gradient Computation************************/ - // STE method is simply apply: - grad_input[i] = grad_output[i]*( (fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) : - (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) : - GI(1.0)); + // STE method is simply applied : + // (we accumulate the gradient instead of replacing it) + grad_input[i] += grad_output[i] * ((fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) : + (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) : + GI(1.0)); /*****************************Step Size Gradient Computation*************************/ GI qData = fullPrecScale; @@ -142,7 +143,9 @@ void Aidge::LSQImpl_cuda_backward_kernel(const std::size_t inputLength, // for simplicity and foolproof-ness thrust::device_ptr<GI> grad_workspacePtr(grad_workspace); thrust::device_ptr<GI> grad_stepSizePtr(grad_stepSize); - grad_stepSizePtr[0] = thrust::reduce(grad_workspacePtr, grad_workspacePtr + inputLength, GI(0.0)); + + // We accumulate the stepSize gradient instead of replacing it + grad_stepSizePtr[0] += thrust::reduce(grad_workspacePtr, grad_workspacePtr + inputLength, GI(0.0)); //printf(" step grad = %f \n", (float) grad_stepSizePtr[0]); -- GitLab From 73e899eb733850400ec8df7896171663e225d0ca Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 8 Jan 2025 10:32:06 +0000 Subject: [PATCH 11/60] improve tensor manipulation routines + enhance insertCompensationNodes --- src/PTQ/CLE.cpp | 73 +++++++++++++++++------ src/PTQ/PTQ.cpp | 137 ++++++++++++++++++++++++++------------------ src/QAT/QAT_LSQ.cpp | 9 +-- 3 files changed, 138 insertions(+), 81 deletions(-) diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 1d5ccc7..2c6e374 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -19,6 +19,12 @@ #include "aidge/utils/Log.hpp" #include "aidge/operator/OperatorTensor.hpp" +#include "aidge/operator/Mul.hpp" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/operator/Abs.hpp" +#include "aidge/operator/Reshape.hpp" +#include "aidge/operator/Round.hpp" + namespace Aidge { @@ -34,27 +40,58 @@ static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node) static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) { - // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); - - // Rescale the tensor - for(std::size_t i = 0; i < tensor->size(); i++) - castedTensor[i] *= scaling; + auto mulOp = Mul_Op(); + mulOp.setDataType(tensor->dataType()); + mulOp.setBackend(tensor->backend()); + + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); + scalingTensor->setDataType(tensor->dataType()); + scalingTensor->setBackend(tensor->backend()); + + mulOp.associateInput(0, tensor); + mulOp.associateInput(1, scalingTensor); + + mulOp.forward(); + + auto outTensor = mulOp.getOutput(0); + *tensor = *outTensor; + //tensor->copyCast(*outTensor); } -static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +// TODO : make the retreival of argmax values backend independant (refCastFrom) +static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) { - // Get the tensor data pointer and edit it - float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr()); - - // Get the tensor absolute max value - float maxValue = 0.0f; - for(std::size_t i = 0; i < tensor->size(); ++i) { - if(std::fabs(castedTensor[i]) > maxValue) { - maxValue = std::fabs(castedTensor[i]); - } - } - return maxValue; + // get the abs tensor + + std::shared_ptr<Tensor> absTensor = std::make_shared<Tensor>(tensor->abs()); + + // flatten the abs tensor + + std::int64_t nbElement = tensor->size(); + + auto reshapeOp = Reshape_Op({nbElement}); + reshapeOp.setDataType(tensor->dataType()); + reshapeOp.setBackend(tensor->backend()); + + reshapeOp.associateInput(0, absTensor); + reshapeOp.forward(); + std::shared_ptr<Tensor> flatTensor = reshapeOp.getOutput(0); + + // Get the argmax + + auto argmaxOp = ArgMax_Op(0, true, false); + argmaxOp.setDataType(tensor->dataType()); + argmaxOp.setBackend(tensor->backend()); + + argmaxOp.associateInput(0, flatTensor); + argmaxOp.forward(); + std::shared_ptr<Tensor> argmaxTensor = argmaxOp.getOutput(0); + + // Return the max + + int maxIndex = std::round(argmaxTensor->get<double>(0)); + + return flatTensor->get<double>(maxIndex); } void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDelta) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 54b95cb..54d645e 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -28,6 +28,12 @@ #include "aidge/operator/BatchNorm.hpp" #include "aidge/operator/Conv.hpp" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/operator/Abs.hpp" +#include "aidge/operator/Reshape.hpp" +#include "aidge/operator/Round.hpp" + + #include "aidge/recipes/Recipes.hpp" #include "aidge/recipes/QuantRecipes.hpp" @@ -66,51 +72,75 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } -static void fillTensor(std::shared_ptr<Tensor> tensor, double value) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) { - // Get the tensor data pointer - double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); + auto mulOp = Mul_Op(); + mulOp.setDataType(tensor->dataType()); + mulOp.setBackend(tensor->backend()); - // Fill the tensor - for(std::size_t i = 0; i < tensor->size(); i++) - castedTensor[i] = value; -} + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); + scalingTensor->setDataType(tensor->dataType()); + scalingTensor->setBackend(tensor->backend()); -static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) -{ - // Get the tensor data pointer - double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); + mulOp.associateInput(0, tensor); + mulOp.associateInput(1, scalingTensor); - // Rescale the tensor - for(std::size_t i = 0; i < tensor->size(); i++) - castedTensor[i] *= scaling; + mulOp.forward(); + + auto outTensor = mulOp.getOutput(0); + *tensor = *outTensor; } static void roundTensor(std::shared_ptr<Tensor> tensor) { - // Get the tensor data pointer - double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); + auto roundOp = Round_Op(); + roundOp.setDataType(tensor->dataType()); + roundOp.setBackend(tensor->backend()); - // Rescale the tensor - for(std::size_t i = 0; i < tensor->size(); i++) - castedTensor[i] = std::nearbyint(castedTensor[i]);//Round + roundOp.associateInput(0, tensor); + roundOp.forward(); + + auto outTensor = roundOp.getOutput(0); + *tensor = *outTensor; } -static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +// TODO : make the retreival of argmax values backend independant (refCastFrom) +static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) { - // Get the tensor data pointer and edit it - double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr()); - - // Get the tensor absolute max value - double maxValue = 0.0f; - for(std::size_t i = 0; i < tensor->size(); ++i) { - if(std::fabs(castedTensor[i]) > maxValue) { - maxValue = std::fabs(castedTensor[i]); - } - } - return maxValue; + // get the abs tensor + + std::shared_ptr<Tensor> absTensor = std::make_shared<Tensor>(tensor->abs()); + + // flatten the abs tensor + + std::int64_t nbElement = tensor->size(); + + auto reshapeOp = Reshape_Op({nbElement}); + reshapeOp.setDataType(tensor->dataType()); + reshapeOp.setBackend(tensor->backend()); + + reshapeOp.associateInput(0, absTensor); + reshapeOp.forward(); + std::shared_ptr<Tensor> flatTensor = reshapeOp.getOutput(0); + + // Get the argmax + + auto argmaxOp = ArgMax_Op(0, true, false); + argmaxOp.setDataType(tensor->dataType()); + argmaxOp.setBackend(tensor->backend()); + + argmaxOp.associateInput(0, flatTensor); + argmaxOp.forward(); + std::shared_ptr<Tensor> argmaxTensor = argmaxOp.getOutput(0); + + // Return the max + + int maxIndex = std::round(argmaxTensor->get<double>(0)); + + return flatTensor->get<double>(maxIndex); } + // TODO : pass nodeVector by reference ... static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::shared_ptr<Node>> nodeVector, std::string nodeType) { @@ -876,50 +906,42 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u for (std::shared_ptr<Node> node : nodeVector) { - // A merging node is always followed by a scaling node at this point ... + // A merging node is always followed by a Quantizer node at this point if (node->type() == "Quantizer") { + // check if the Quantizer is a residual one, and insert a compensation node if so ... + bool prevNodeIsForking = ((node->getParent(0))->getChildren().size() > 1); bool prevNodeIsAffine = isAffine(node->getParent(0)); bool insertNode = prevNodeIsForking || !prevNodeIsAffine; if (insertNode) { - // create and insert the multplicative node + // create and insert the multplicative node before the Quantizer std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); std::shared_ptr<Node> mulNode = Mul(mulNodeName); - mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) mulNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, mulNode, 0, 0, 0); - // create and insert the producer node - - std::shared_ptr<Tensor> inputTensor = std::static_pointer_cast<Tensor> (mulNode->getOperator()->getRawInput(0)); - std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(); + // Add the coeff producer to the multiplier node - coeffTensor->setDataType(DataType::Float64); // getDataType(parentNode) - coeffTensor->setBackend("cpu"); + std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); + std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax}); + coeffProducer->getOperator()->setOutput(0, coeffTensor); - coeffTensor->resize(inputTensor->dims()); - fillTensor(coeffTensor, 1); + coeffProducer->getOperator()->setDataType(DataType::Float64); + coeffProducer->getOperator()->setBackend("cpu"); - std::shared_ptr<Node> producerNode = Producer(coeffTensor, makeUniqueName("coeff", graphView)); - producerNode->addChild(mulNode); - graphView->add(producerNode); + graphView->add(coeffProducer); // needed ? - // rescale the coeffs and edit scaling factor + // Adapt the scaling factor value accordingly - fillTensor(coeffTensor, signedMax); - - double currScalingFactor = getScalingFactor(node); // XXX bad naming ! + double currScalingFactor = getScalingFactor(node); updateScalingFactor(node, currScalingFactor / signedMax); - - // TODO : double check this !!! - //std::cout << getTensorAbsoluteMax(coeffTensor) << std::endl; } } } @@ -931,7 +953,8 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool for (std::shared_ptr<Node> node : nodeVector) { - // Use A meatoperator of type Scaling of MulCompensation instead + // TODO : use Compensation nodes instead of Mul nodes + if (isAffine(node) || (node->type() == "Mul")) { std::shared_ptr<Node> scalingNode = (*node->getChildren().begin()); @@ -940,7 +963,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool double approx = std::pow(2, std::ceil(std::log2(base))); - updateScalingFactor(scalingNode,approx); + updateScalingFactor(scalingNode, approx); double ratio = base / approx; @@ -954,7 +977,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); rescaleTensor(biasTensor, ratio); if (!noQuant) - roundTensor(biasTensor); + roundTensor(biasTensor); } } } @@ -1058,8 +1081,8 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (useCuda) graphView->setBackend("cuda"); - //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; - //printScalingFactors(graphView); + std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; + printScalingFactors(graphView); Log::info(" Reseting the scheduler ..."); SequentialScheduler scheduler(graphView); diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 38c8182..4b23eba 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -89,19 +89,16 @@ void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbB static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) { auto backend = tensor->backend(); + if (backend == "cuda") tensor->setBackend("cpu"); - float acc = 0; - float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr()); - for(std::size_t i = 0; i < tensor->size(); i++) - acc += std::abs(castedTensor[i]); - acc /= static_cast<float> (tensor->size()); + float value = (*tensor).abs().mean().get<float>(0); if (backend == "cuda") tensor->setBackend("cuda"); - return acc; + return value; } static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda) -- GitLab From eae59717221edebb8db5555be182af957af87e3e Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 8 Jan 2025 10:37:27 +0000 Subject: [PATCH 12/60] comment verbose --- src/PTQ/PTQ.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 54d645e..2b50f37 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -1081,8 +1081,8 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (useCuda) graphView->setBackend("cuda"); - std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; - printScalingFactors(graphView); + //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; + //printScalingFactors(graphView); Log::info(" Reseting the scheduler ..."); SequentialScheduler scheduler(graphView); -- GitLab From 85791eabedd373ee5c1b57a39d95beb48bc0bc32 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 8 Jan 2025 16:27:21 +0000 Subject: [PATCH 13/60] minor change --- src/PTQ/PTQ.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 2b50f37..88e7ac8 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -215,6 +215,8 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView) { removeFlatten(graphView); + sanitizeNodeNames(graphView); + bool containsBatchNorm = false; std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); @@ -1078,6 +1080,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, //printScalingFactors(graphView); setupDataType(graphView, inputDataSet, initialDataType); + if (useCuda) graphView->setBackend("cuda"); -- GitLab From 45e8db898ea6e35ccdd9f549bde983414f457495 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 13 Jan 2025 13:01:34 +0000 Subject: [PATCH 14/60] rework the LSQ code --- include/aidge/quantization/QAT/QAT_LSQ.hpp | 18 +- python_binding/pybind_QAT_LSQ.cpp | 5 +- src/QAT/QAT_LSQ.cpp | 204 +++++++-------------- 3 files changed, 77 insertions(+), 150 deletions(-) diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp index 4970be0..d7d03ca 100644 --- a/include/aidge/quantization/QAT/QAT_LSQ.hpp +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -20,22 +20,14 @@ namespace Aidge { namespace QuantLSQ { /** - * @brief Insert the LSQ quantizer nodes in a given GraphView - * @param graphView The GraphView containing the graph to quantize. + * @brief Given a GraphView with parameters properly initialized, insert + * the LSQ quantizer nodes, and setup the adjustment their step-sizes. + * @param graphView The GraphView containing the network to quantize. * @param nbBits Number of quantization bits. - * @param span Fixed output span of the quantizers. */ -void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float step_size); +void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); -/** - * @brief Given a GraphView with parameters properly initialized and some calibration data, - * insert the LSQ quantizer nodes, and adjust their step-sizes. - * @param graphView The GraphView containing the graph to quantize. - * @param nbBits Number of quantization bits. - * @param calibrationData Calibration data used to adjust the spans. - * @param scale Multiplicative constant applied to the spans. - */ -void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData); +void devLSQ(std::shared_ptr<Tensor> tensor); } } diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp index 206985e..0b9fcc2 100644 --- a/python_binding/pybind_QAT_LSQ.cpp +++ b/python_binding/pybind_QAT_LSQ.cpp @@ -23,8 +23,9 @@ void init_QAT_LSQ(py::module &m) { auto mQuantLSQ = m.def_submodule("lsq"); - mQuantLSQ.def("insert_quantizers", &QuantLSQ::insertQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("step_size")); + mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits")); + + mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor")); - mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data")); } } // namespace Aidge diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 4b23eba..04f2027 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -23,7 +23,42 @@ namespace Aidge { -void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize) +static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) +{ + auto valueTensor = (*tensor).abs().mean(); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu"); + return localTensor.get<float>(0); +} + +// INIT THE STEP SIZE OF A QUANTIZER NODE + +static bool initStepSize(std::shared_ptr<Node> quantizer) +{ + const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator()); + + float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0)); + + float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second)); + + auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + + // XXX Manage backend here ? + stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend()); + stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType()); + + auto stepSizeProducer = quantizer->getParent(1); + + stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); + + std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl; + + return false; +} + +// INPUT QUANTIZERS INSERTION + +static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); @@ -34,177 +69,76 @@ void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbB std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; - // INPUT QUANTIZERS INSERTION + // Create the input quantizer node - // TODO : double check this, and use createUniqueName() - auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); - auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName); + auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); + auto quantizerNode = LSQ(signedRange, quantizerName); - // Set the step size + // Init the step-size using the node call stack - auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator(); - auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - inputStepSizeOp->setOutput(0, inputStepSizeTensor); + quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); // Absorb the ReLU when possible ... - // XXX is this safe ??? - bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); - // bool nodeHasParent = (linearNode->getParents().size() != 0); + bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ? if (nodeHasParent) { auto parentNode = linearNode->getParents()[0]; if (parentNode->type() == "ReLU") { - auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator()); - inputQuantizerOp->range() = unsignedRange; + auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator()); + quantizerOp->range() = unsignedRange; graphView->replace({parentNode}, {}); } } - // We need to handle the case where the linear node is the first one ... + // Insert the quantizer in the graphView ... + // (We need to handle the case where the linear node is the first one) if (nodeHasParent) { - graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0); + graphView->insertParent(linearNode, quantizerNode, 0, 0, 0); } else { - inputQuantizerNode->addChild(graphView); - graphView->add(inputQuantizerNode); + quantizerNode->addChild(graphView); + graphView->add(quantizerNode); } - - // PARAM QUANTIZERS INSERTION - - // TODO : double check this, and use createUniqueName() - auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); - auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); - graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0); - - // Set the step size - - auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator(); - auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - paramStepSizeOp->setOutput(0, paramStepSizeTensor); } - } -static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) -{ - auto backend = tensor->backend(); - - if (backend == "cuda") - tensor->setBackend("cpu"); - - float value = (*tensor).abs().mean().get<float>(0); - - if (backend == "cuda") - tensor->setBackend("cuda"); - - return value; -} +// PARAM QUANTIZERS INSERTION -static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda) +static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { - // Propagate the calibration tensor + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - SequentialScheduler scheduler(graphView); - scheduler.resetScheduling(); - scheduler.forward(true, {calibrationData}); + std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; - // Store the input tensor statistics + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); - if (useCuda) - graphView->setBackend("cpu"); + // TODO : double check this, and use createUniqueName() + auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); + auto quantizerNode = LSQ(signedRange, quantizerName); - std::map<std::string, float> inputStats; - for (auto node : graphView->getNodes()) - { - if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! - { - const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); - float inputAbsMean = getTensorAbsMean(op->getInput(0)); - inputStats.insert(std::make_pair(node->name(), inputAbsMean)); - std::cout << node->name() << " -> " << inputAbsMean << std::endl; - } - } + // Init the step-size using the node call stack - if (useCuda) - graphView->setBackend("cuda"); + quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); - return inputStats; -} + // Insert the quantizer in the graphView -static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda) -{ - if (useCuda) - graphView->setBackend("cpu"); - - std::map<std::string, float> paramStats; - for (auto node : graphView->getNodes()) - { - if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! - { - const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); - float paramAbsMean = getTensorAbsMean(op->getInput(1)); - paramStats.insert(std::make_pair(node->name(), paramAbsMean)); - std::cout << node->name() << " -> " << paramAbsMean << std::endl; - } + graphView->insertParent(linearNode, quantizerNode, 1, 0, 0); } - - if (useCuda) - graphView->setBackend("cuda"); - - return paramStats; } -static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats) +void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - - for (const auto& match : matches) - { - auto linearNode = match.graph->rootNode(); - - // INPUT QUANTIZERS STEP-SIZES - - auto inputQuantNode = linearNode->getParent(0); - auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator()); - - float absMean = inputStats[linearNode->name()]; - float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second)); - - auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator(); - // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); - auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - inputStepSizeOp->setOutput(0, inputStepSizeTensor); - - // PARAM QUANTIZERS STEP-SIZES - - auto paramQuantNode = linearNode->getParent(1); - auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator()); - - absMean = paramStats[linearNode->name()]; - stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second)); - - auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator(); - // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); - auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - paramStepSizeOp->setOutput(0, paramStepSizeTensor); - } + setupInputQuantizers(graphView, nbBits); + setupParamQuantizers(graphView, nbBits); } -void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData) +void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) { - bool useCuda = (calibrationData->backend() == "cuda"); - - // Collect the tensor statisics - auto inputStats = collectInputStats(graphView, calibrationData, useCuda); - - auto paramStats = collectParamStats(graphView, useCuda); - - // Insert the quantizers - insertQuantizers(graphView, nbBits, 1.0); - - // Adjust the quantizers step-sizes - adjustQuantizersStepSizes(graphView, inputStats, paramStats); + float mean = (tensor->mean()).get<float> (0); + std::cout << " MEAN = " << mean << std::endl; } } \ No newline at end of file -- GitLab From ccea932f276aad2ed919951693f7d7628cb02472 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 15 Jan 2025 13:18:27 +0000 Subject: [PATCH 15/60] set the CLE data types to double --- include/aidge/quantization/PTQ/CLE.hpp | 2 +- src/PTQ/CLE.cpp | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/include/aidge/quantization/PTQ/CLE.hpp b/include/aidge/quantization/PTQ/CLE.hpp index d94b6e9..77eaf7f 100644 --- a/include/aidge/quantization/PTQ/CLE.hpp +++ b/include/aidge/quantization/PTQ/CLE.hpp @@ -30,7 +30,7 @@ namespace Aidge * @param graphView The GraphView to process. * @param targetDelta the stopping criterion (typical value : 0.01) */ - void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDelta = 0.01); + void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta = 0.01); } diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 1d5ccc7..2c81815 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -32,23 +32,23 @@ static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node) return std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2); } -static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) { // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast<double *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) castedTensor[i] *= scaling; } -static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) { // Get the tensor data pointer and edit it - float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast<double*> (tensor->getImpl()->rawPtr()); // Get the tensor absolute max value - float maxValue = 0.0f; + double maxValue = 0.0f; for(std::size_t i = 0; i < tensor->size(); ++i) { if(std::fabs(castedTensor[i]) > maxValue) { maxValue = std::fabs(castedTensor[i]); @@ -57,7 +57,7 @@ static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) return maxValue; } -void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDelta) +void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta) { std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); @@ -79,7 +79,7 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDe if (isAffine(node)) affineNodeVector.push_back(node); - float maxRangeDelta; + double maxRangeDelta; do { @@ -94,18 +94,18 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDe std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; - float r1 = getTensorAbsoluteMax(getWeightTensor(n1)); - float r2 = getTensorAbsoluteMax(getWeightTensor(n2)); + double r1 = getTensorAbsoluteMax(getWeightTensor(n1)); + double r2 = getTensorAbsoluteMax(getWeightTensor(n2)); - float s1 = std::sqrt(r1 * r2) / r1; - float s2 = std::sqrt(r1 * r2) / r2; + double s1 = std::sqrt(r1 * r2) / r1; + double s2 = std::sqrt(r1 * r2) / r2; rescaleTensor(getWeightTensor(n1), s1); rescaleTensor(getWeightTensor(n2), s2); rescaleTensor(getBiasTensor(n1), s1); - float rangeDelta = std::abs(r1 - r2); + double rangeDelta = std::abs(r1 - r2); if (rangeDelta > maxRangeDelta) maxRangeDelta = rangeDelta; } -- GitLab From ad2675740c411b36d73cb8f6fab3689eef739412 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Fri, 17 Jan 2025 10:54:17 +0100 Subject: [PATCH 16/60] Hotfix --- include/aidge/operator/LSQ.hpp | 2 +- src/PTQ/PTQMetaOps.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/aidge/operator/LSQ.hpp b/include/aidge/operator/LSQ.hpp index 10ceb81..eb266bc 100644 --- a/include/aidge/operator/LSQ.hpp +++ b/include/aidge/operator/LSQ.hpp @@ -95,7 +95,7 @@ public: */ inline std::shared_ptr<Node> LSQ(const std::pair<int, int>& range = {0, 255}, const std::string& name = "") { auto lsq = std::make_shared<Node>(std::make_shared<LSQ_Op>(range), name); - addProducer(lsq, 1, {1}, "ss"); + addProducer<1>(lsq, 1, {1}, "ss"); return lsq; } } diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp index 152a3b0..527d853 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/PTQ/PTQMetaOps.cpp @@ -46,7 +46,7 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli // connect the scaling factor producer std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); - std::shared_ptr<Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); + std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); // create the metaop graph @@ -67,7 +67,7 @@ std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name) std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_Scaling" : ""); - std::shared_ptr<Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); + std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); std::shared_ptr<GraphView> graphView = Sequential({mulNode}); -- GitLab From 40863ab3ca0c489683b823fc0b163c582761eb89 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Sun, 19 Jan 2025 14:07:53 +0100 Subject: [PATCH 17/60] Hotfix: removed std::cout --- src/PTQ/PTQ.cpp | 4 ++-- src/QAT/QAT_FixedQ.cpp | 6 +++--- src/QAT/QAT_LSQ.cpp | 4 ++-- src/recipes/QuantRecipes.cpp | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 54b95cb..0e26313 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -995,7 +995,7 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri auto scheduling = scheduler.getStaticScheduling(); for (auto node : scheduling) if (node->type() == "Scaling") - std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl; + fmt::println("{} range = {}", node->name(), valueRanges[node->name()]); } void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) @@ -1098,7 +1098,7 @@ void clearBiases(std::shared_ptr<GraphView> graphView) void devPTQ(std::shared_ptr<GraphView> graphView) { for (std::shared_ptr<Node> node : graphView->getNodes()) - std::cout << " UUU : " << node->name() << std::endl; + fmt::println(" UUU : {}", node->name()); } } diff --git a/src/QAT/QAT_FixedQ.cpp b/src/QAT/QAT_FixedQ.cpp index d22074f..9160b4a 100644 --- a/src/QAT/QAT_FixedQ.cpp +++ b/src/QAT/QAT_FixedQ.cpp @@ -91,7 +91,7 @@ static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator()); float inputStd = getTensorStd(op->getInput(0)); inputStats.insert(std::make_pair(node->name(), inputStd)); - std::cout << node->name() << " -> " << inputStd << std::endl; + fmt::println("{} -> {}", node->name(), inputStd); } } @@ -108,7 +108,7 @@ static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator()); float paramStd = getTensorStd(op->getInput(1)); paramStats.insert(std::make_pair(node->name(), paramStd)); - std::cout << node->name() << " -> " << paramStd << std::endl; + fmt::println("{} -> {}", node->name(), paramStd); } } @@ -156,7 +156,7 @@ void QuantFixedQ::devQAT(std::shared_ptr<GraphView> graphView) scheduler.generateScheduling(); auto s = scheduler.getStaticScheduling(); for (std::shared_ptr<Node> node : s) - std::cout << " name : " << node->name() << std::endl; + fmt::println(" name : {}", node->name()); } } \ No newline at end of file diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 38c8182..9b51e84 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -125,7 +125,7 @@ static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); float inputAbsMean = getTensorAbsMean(op->getInput(0)); inputStats.insert(std::make_pair(node->name(), inputAbsMean)); - std::cout << node->name() << " -> " << inputAbsMean << std::endl; + fmt::println("{} -> {}", node->name(), inputAbsMean); } } @@ -148,7 +148,7 @@ static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); float paramAbsMean = getTensorAbsMean(op->getInput(1)); paramStats.insert(std::make_pair(node->name(), paramAbsMean)); - std::cout << node->name() << " -> " << paramAbsMean << std::endl; + fmt::println("{} -> {}", node->name(), paramAbsMean); } } diff --git a/src/recipes/QuantRecipes.cpp b/src/recipes/QuantRecipes.cpp index 562948c..6e1dcdb 100644 --- a/src/recipes/QuantRecipes.cpp +++ b/src/recipes/QuantRecipes.cpp @@ -59,7 +59,7 @@ void insertBatchNormNodes(std::shared_ptr<GraphView> graphView) { std::shared_ptr<Conv_Op<2>> convOperator = std::static_pointer_cast<Conv_Op<2>> (parentNode->getOperator()); int nb_channels = convOperator->getInput(1)->dims()[0]; - std::cout << " NB CHANNELS = " << nb_channels << std::endl; // TODO : remove this ... + fmt::println(" NB CHANNELS = {}", nb_channels); // TODO : remove this ... std::string batchnormNodeName = makeUniqueName(parentNode->name() + "_BN", graphView); std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nb_channels, 1e-5, 0.1, false, batchnormNodeName); -- GitLab From 7ad6bbf206ead01aa52a1279f8a807fa6f734f22 Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Tue, 21 Jan 2025 11:04:56 +0000 Subject: [PATCH 18/60] ADD: fmt as private library --- CMakeLists.txt | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a2b168..80c5ae7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -85,17 +85,6 @@ endif() # ############################################## # Find system dependencies -Include(FetchContent) - -FetchContent_Declare( - fmt - GIT_REPOSITORY https://github.com/fmtlib/fmt.git - GIT_TAG 10.2.1 # or a later release -) - -set(FMT_SYSTEM_HEADERS ON) -FetchContent_MakeAvailable(fmt) -set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON) if(CUDA) find_package(CUDAToolkit REQUIRED) @@ -169,7 +158,7 @@ if (PYBIND) endif() # XXX HERE !!! -target_link_libraries(${module_name} PUBLIC fmt::fmt) +target_link_libraries(${module_name} PRIVATE fmt::fmt) target_compile_features(${module_name} PRIVATE cxx_std_14) target_compile_options(${module_name} PRIVATE -- GitLab From 98fe14506f1d5268e412c2a36e3a4c51878cff8b Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Tue, 21 Jan 2025 12:28:19 +0000 Subject: [PATCH 19/60] rework the ReLU handling --- src/QAT/QAT_LSQ.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 04f2027..f9ce554 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -82,13 +82,19 @@ static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nb bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ? - if (nodeHasParent) { - auto parentNode = linearNode->getParents()[0]; - if (parentNode->type() == "ReLU") { + if (nodeHasParent) + { + bool allParentsAreReLU = true; + for (auto parentNode : linearNode->getParents()) + if (parentNode->type() != "ReLU") + allParentsAreReLU = false; + + if (allParentsAreReLU) { auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator()); quantizerOp->range() = unsignedRange; - graphView->replace({parentNode}, {}); } + + // TODO : remove the ReLUs when possible } // Insert the quantizer in the graphView ... -- GitLab From b76a4a55aa321f407c7f7813b02890c8afbc23ad Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Tue, 21 Jan 2025 15:14:32 +0000 Subject: [PATCH 20/60] revert changes for debug --- include/aidge/quantization/QAT/QAT_LSQ.hpp | 6 +- python_binding/pybind_QAT_LSQ.cpp | 4 +- src/QAT/QAT_LSQ.cpp | 199 ++++++++++++++++++++- 3 files changed, 204 insertions(+), 5 deletions(-) diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp index d7d03ca..979e823 100644 --- a/include/aidge/quantization/QAT/QAT_LSQ.hpp +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -25,9 +25,11 @@ namespace QuantLSQ { * @param graphView The GraphView containing the network to quantize. * @param nbBits Number of quantization bits. */ -void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); -void devLSQ(std::shared_ptr<Tensor> tensor); +//void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); +//void devLSQ(std::shared_ptr<Tensor> tensor); + +void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData); } } diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp index 0b9fcc2..cb5b7f0 100644 --- a/python_binding/pybind_QAT_LSQ.cpp +++ b/python_binding/pybind_QAT_LSQ.cpp @@ -23,9 +23,11 @@ void init_QAT_LSQ(py::module &m) { auto mQuantLSQ = m.def_submodule("lsq"); +/* mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits")); - mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor")); +*/ + mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data")); } } // namespace Aidge diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index f9ce554..e52bafb 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -21,6 +21,201 @@ #include "aidge/graph/Matching.hpp" #include "aidge/recipes/QuantRecipes.hpp" + +namespace Aidge { + +static void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize) +{ + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; + std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; + + // INPUT QUANTIZERS INSERTION + + // TODO : double check this, and use createUniqueName() + auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); + auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName); + + // Set the step size + + auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator(); + auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + inputStepSizeOp->setOutput(0, inputStepSizeTensor); + + // Absorb the ReLU when possible ... + + // XXX is this safe ??? + bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); + // bool nodeHasParent = (linearNode->getParents().size() != 0); + + if (nodeHasParent) { + auto parentNode = linearNode->getParents()[0]; + if (parentNode->type() == "ReLU") { + auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator()); + inputQuantizerOp->range() = unsignedRange; + graphView->replace({parentNode}, {}); + } + } + + // We need to handle the case where the linear node is the first one ... + + if (nodeHasParent) { + graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0); + } else { + inputQuantizerNode->addChild(graphView); + graphView->add(inputQuantizerNode); + } + + // PARAM QUANTIZERS INSERTION + + // TODO : double check this, and use createUniqueName() + auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); + auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); + graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0); + + // Set the step size + + auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator(); + auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + paramStepSizeOp->setOutput(0, paramStepSizeTensor); + } + +} + +static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) +{ + auto backend = tensor->backend(); + if (backend == "cuda") + tensor->setBackend("cpu"); + + float acc = 0; + float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr()); + for(std::size_t i = 0; i < tensor->size(); i++) + acc += std::abs(castedTensor[i]); + acc /= static_cast<float> (tensor->size()); + + if (backend == "cuda") + tensor->setBackend("cuda"); + + return acc; +} + +static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda) +{ + // Propagate the calibration tensor + + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + scheduler.forward(true, {calibrationData}); + + // Store the input tensor statistics + + if (useCuda) + graphView->setBackend("cpu"); + + std::map<std::string, float> inputStats; + for (auto node : graphView->getNodes()) + { + if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! + { + const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); + float inputAbsMean = getTensorAbsMean(op->getInput(0)); + inputStats.insert(std::make_pair(node->name(), inputAbsMean)); + std::cout << node->name() << " -> " << inputAbsMean << std::endl; + } + } + + if (useCuda) + graphView->setBackend("cuda"); + + return inputStats; +} + +static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda) +{ + if (useCuda) + graphView->setBackend("cpu"); + + std::map<std::string, float> paramStats; + for (auto node : graphView->getNodes()) + { + if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! + { + const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); + float paramAbsMean = getTensorAbsMean(op->getInput(1)); + paramStats.insert(std::make_pair(node->name(), paramAbsMean)); + std::cout << node->name() << " -> " << paramAbsMean << std::endl; + } + } + + if (useCuda) + graphView->setBackend("cuda"); + + return paramStats; +} + +static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats) +{ + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + // INPUT QUANTIZERS STEP-SIZES + + auto inputQuantNode = linearNode->getParent(0); + auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator()); + + float absMean = inputStats[linearNode->name()]; + float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second)); + + auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator(); + // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); + auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + inputStepSizeOp->setOutput(0, inputStepSizeTensor); + + // PARAM QUANTIZERS STEP-SIZES + + auto paramQuantNode = linearNode->getParent(1); + auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator()); + + absMean = paramStats[linearNode->name()]; + stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second)); + + auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator(); + // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); + auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + paramStepSizeOp->setOutput(0, paramStepSizeTensor); + } +} + +void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData) +{ + bool useCuda = (calibrationData->backend() == "cuda"); + + // Collect the tensor statisics + auto inputStats = collectInputStats(graphView, calibrationData, useCuda); + + auto paramStats = collectParamStats(graphView, useCuda); + + // Insert the quantizers + insertQuantizers(graphView, nbBits, 1.0); + + // Adjust the quantizers step-sizes + adjustQuantizersStepSizes(graphView, inputStats, paramStats); +} +} + + +/* + XXX XXX XXX + namespace Aidge { static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) @@ -146,5 +341,5 @@ void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) float mean = (tensor->mean()).get<float> (0); std::cout << " MEAN = " << mean << std::endl; } - -} \ No newline at end of file +} +*/ \ No newline at end of file -- GitLab From d656e1eebfa339c992b7ae9adcd29f8e102bb016 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 22 Jan 2025 12:47:59 +0000 Subject: [PATCH 21/60] re-apply the LSQ changes --- include/aidge/quantization/QAT/QAT_LSQ.hpp | 6 +- python_binding/pybind_QAT_LSQ.cpp | 6 +- src/QAT/QAT_LSQ.cpp | 258 ++++++++++----------- 3 files changed, 133 insertions(+), 137 deletions(-) diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp index 979e823..f33a7c6 100644 --- a/include/aidge/quantization/QAT/QAT_LSQ.hpp +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -26,10 +26,8 @@ namespace QuantLSQ { * @param nbBits Number of quantization bits. */ -//void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); -//void devLSQ(std::shared_ptr<Tensor> tensor); - -void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData); +void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); +void devLSQ(std::shared_ptr<Tensor> tensor); } } diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp index cb5b7f0..0dd4267 100644 --- a/python_binding/pybind_QAT_LSQ.cpp +++ b/python_binding/pybind_QAT_LSQ.cpp @@ -23,11 +23,11 @@ void init_QAT_LSQ(py::module &m) { auto mQuantLSQ = m.def_submodule("lsq"); -/* + mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits")); mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor")); -*/ - mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data")); + + //mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data")); } } // namespace Aidge diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index e52bafb..66e8ec7 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -21,6 +21,134 @@ #include "aidge/graph/Matching.hpp" #include "aidge/recipes/QuantRecipes.hpp" +namespace Aidge { + +static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) +{ + auto valueTensor = (*tensor).abs().mean(); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu"); + return localTensor.get<float>(0); +} + +// INIT THE STEP SIZE OF A QUANTIZER NODE + +static bool initStepSize(std::shared_ptr<Node> quantizer) +{ + const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator()); + + float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0)); + + float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second)); + + auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + + // XXX Manage backend here ? + stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend()); + stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType()); + + auto stepSizeProducer = quantizer->getParent(1); + + stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); + + std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl; + + return false; +} + +// INPUT QUANTIZERS INSERTION + +static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) +{ + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; + std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; + + // Create the input quantizer node + + auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); + auto quantizerNode = LSQ(signedRange, quantizerName); + + // Init the step-size using the node call stack + + quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); + + // Absorb the ReLU when possible ... + + bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ? + + if (nodeHasParent) + { + bool allParentsAreReLU = true; + for (auto parentNode : linearNode->getParents()) + if (parentNode->type() != "ReLU") + allParentsAreReLU = false; + + if (allParentsAreReLU) { + auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator()); + quantizerOp->range() = unsignedRange; + } + + // TODO : remove the ReLUs when possible + } + + // Insert the quantizer in the graphView ... + // (We need to handle the case where the linear node is the first one) + + if (nodeHasParent) { + graphView->insertParent(linearNode, quantizerNode, 0, 0, 0); + } else { + quantizerNode->addChild(graphView); + graphView->add(quantizerNode); + } + } +} + +// PARAM QUANTIZERS INSERTION + +static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) +{ + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); + + std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + // TODO : double check this, and use createUniqueName() + auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); + auto quantizerNode = LSQ(signedRange, quantizerName); + + // Init the step-size using the node call stack + + quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); + + // Insert the quantizer in the graphView + + graphView->insertParent(linearNode, quantizerNode, 1, 0, 0); + } +} + +void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) +{ + setupInputQuantizers(graphView, nbBits); + setupParamQuantizers(graphView, nbBits); +} + +void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) +{ + float mean = (tensor->mean()).get<float> (0); + std::cout << " MEAN = " << mean << std::endl; +} +} + +/* namespace Aidge { @@ -212,134 +340,4 @@ void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, siz } } - -/* - XXX XXX XXX - -namespace Aidge { - -static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) -{ - auto valueTensor = (*tensor).abs().mean(); - std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu"); - return localTensor.get<float>(0); -} - -// INIT THE STEP SIZE OF A QUANTIZER NODE - -static bool initStepSize(std::shared_ptr<Node> quantizer) -{ - const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator()); - - float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0)); - - float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second)); - - auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - - // XXX Manage backend here ? - stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend()); - stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType()); - - auto stepSizeProducer = quantizer->getParent(1); - - stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); - - std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl; - - return false; -} - -// INPUT QUANTIZERS INSERTION - -static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) -{ - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - - for (const auto& match : matches) - { - auto linearNode = match.graph->rootNode(); - - std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; - std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; - - // Create the input quantizer node - - auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); - auto quantizerNode = LSQ(signedRange, quantizerName); - - // Init the step-size using the node call stack - - quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); - - // Absorb the ReLU when possible ... - - bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ? - - if (nodeHasParent) - { - bool allParentsAreReLU = true; - for (auto parentNode : linearNode->getParents()) - if (parentNode->type() != "ReLU") - allParentsAreReLU = false; - - if (allParentsAreReLU) { - auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator()); - quantizerOp->range() = unsignedRange; - } - - // TODO : remove the ReLUs when possible - } - - // Insert the quantizer in the graphView ... - // (We need to handle the case where the linear node is the first one) - - if (nodeHasParent) { - graphView->insertParent(linearNode, quantizerNode, 0, 0, 0); - } else { - quantizerNode->addChild(graphView); - graphView->add(quantizerNode); - } - } -} - -// PARAM QUANTIZERS INSERTION - -static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) -{ - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - - std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; - - for (const auto& match : matches) - { - auto linearNode = match.graph->rootNode(); - - // TODO : double check this, and use createUniqueName() - auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); - auto quantizerNode = LSQ(signedRange, quantizerName); - - // Init the step-size using the node call stack - - quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); - - // Insert the quantizer in the graphView - - graphView->insertParent(linearNode, quantizerNode, 1, 0, 0); - } -} - -void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) -{ - setupInputQuantizers(graphView, nbBits); - setupParamQuantizers(graphView, nbBits); -} - -void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) -{ - float mean = (tensor->mean()).get<float> (0); - std::cout << " MEAN = " << mean << std::endl; -} -} */ \ No newline at end of file -- GitLab From e3a715178125f72d648edeeb7aaafb1c6b0c5e87 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 27 Jan 2025 13:04:58 +0000 Subject: [PATCH 22/60] refactor the LSQ code --- include/aidge/quantization/QAT/QAT_LSQ.hpp | 1 - python_binding/pybind_QAT_LSQ.cpp | 5 - src/QAT/QAT_LSQ.cpp | 235 +++------------------ 3 files changed, 30 insertions(+), 211 deletions(-) diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp index f33a7c6..b9d8b33 100644 --- a/include/aidge/quantization/QAT/QAT_LSQ.hpp +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -27,7 +27,6 @@ namespace QuantLSQ { */ void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); -void devLSQ(std::shared_ptr<Tensor> tensor); } } diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp index 0dd4267..4bba3b6 100644 --- a/python_binding/pybind_QAT_LSQ.cpp +++ b/python_binding/pybind_QAT_LSQ.cpp @@ -23,11 +23,6 @@ void init_QAT_LSQ(py::module &m) { auto mQuantLSQ = m.def_submodule("lsq"); - mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits")); - mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor")); - - //mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data")); - } } // namespace Aidge diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 66e8ec7..80e8a05 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -21,25 +21,50 @@ #include "aidge/graph/Matching.hpp" #include "aidge/recipes/QuantRecipes.hpp" -namespace Aidge { + +namespace Aidge +{ static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) { + //std::cout << " GET TENSOR ABS MEAN " << std::endl; auto valueTensor = (*tensor).abs().mean(); std::shared_ptr<Tensor> fallback; const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu"); return localTensor.get<float>(0); } +static float getTensorStd(std::shared_ptr<Tensor> tensor) +{ + auto valueTensor = (*tensor); + + auto skewedTensor = valueTensor - valueTensor.mean(); + auto squaredTensor = skewedTensor * skewedTensor; + auto varianceTensor = squaredTensor.mean(); + + std::shared_ptr<Tensor> fallback; + auto localTensor = varianceTensor.refCastFrom(fallback, DataType::Float32, "cpu"); + + float variance = localTensor.get<float>(0); + return std::sqrt(variance); +} + + // INIT THE STEP SIZE OF A QUANTIZER NODE static bool initStepSize(std::shared_ptr<Node> quantizer) { const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator()); - float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0)); + // This formula is the one proposed in the paper ... + + // float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0)); + // float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second)); - float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second)); + // .. but this formula seems to work better !!! + + float inputStd = getTensorStd(quantizerOp->getInput(0)); + float stepSize = 8.0f * (inputStd / (quantizerOp->range().second)); auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); @@ -56,8 +81,6 @@ static bool initStepSize(std::shared_ptr<Node> quantizer) return false; } -// INPUT QUANTIZERS INSERTION - static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); @@ -137,207 +160,9 @@ static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nb void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { + sanitizeNodeNames(graphView); setupInputQuantizers(graphView, nbBits); setupParamQuantizers(graphView, nbBits); } -void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) -{ - float mean = (tensor->mean()).get<float> (0); - std::cout << " MEAN = " << mean << std::endl; -} -} - -/* - -namespace Aidge { - -static void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize) -{ - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - - for (const auto& match : matches) - { - auto linearNode = match.graph->rootNode(); - - std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; - std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; - - // INPUT QUANTIZERS INSERTION - - // TODO : double check this, and use createUniqueName() - auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); - auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName); - - // Set the step size - - auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator(); - auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - inputStepSizeOp->setOutput(0, inputStepSizeTensor); - - // Absorb the ReLU when possible ... - - // XXX is this safe ??? - bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); - // bool nodeHasParent = (linearNode->getParents().size() != 0); - - if (nodeHasParent) { - auto parentNode = linearNode->getParents()[0]; - if (parentNode->type() == "ReLU") { - auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator()); - inputQuantizerOp->range() = unsignedRange; - graphView->replace({parentNode}, {}); - } - } - - // We need to handle the case where the linear node is the first one ... - - if (nodeHasParent) { - graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0); - } else { - inputQuantizerNode->addChild(graphView); - graphView->add(inputQuantizerNode); - } - - // PARAM QUANTIZERS INSERTION - - // TODO : double check this, and use createUniqueName() - auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); - auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); - graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0); - - // Set the step size - - auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator(); - auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - paramStepSizeOp->setOutput(0, paramStepSizeTensor); - } - -} - -static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) -{ - auto backend = tensor->backend(); - if (backend == "cuda") - tensor->setBackend("cpu"); - - float acc = 0; - float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr()); - for(std::size_t i = 0; i < tensor->size(); i++) - acc += std::abs(castedTensor[i]); - acc /= static_cast<float> (tensor->size()); - - if (backend == "cuda") - tensor->setBackend("cuda"); - - return acc; -} - -static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda) -{ - // Propagate the calibration tensor - - SequentialScheduler scheduler(graphView); - scheduler.resetScheduling(); - scheduler.forward(true, {calibrationData}); - - // Store the input tensor statistics - - if (useCuda) - graphView->setBackend("cpu"); - - std::map<std::string, float> inputStats; - for (auto node : graphView->getNodes()) - { - if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! - { - const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); - float inputAbsMean = getTensorAbsMean(op->getInput(0)); - inputStats.insert(std::make_pair(node->name(), inputAbsMean)); - std::cout << node->name() << " -> " << inputAbsMean << std::endl; - } - } - - if (useCuda) - graphView->setBackend("cuda"); - - return inputStats; -} - -static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda) -{ - if (useCuda) - graphView->setBackend("cpu"); - - std::map<std::string, float> paramStats; - for (auto node : graphView->getNodes()) - { - if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! - { - const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); - float paramAbsMean = getTensorAbsMean(op->getInput(1)); - paramStats.insert(std::make_pair(node->name(), paramAbsMean)); - std::cout << node->name() << " -> " << paramAbsMean << std::endl; - } - } - - if (useCuda) - graphView->setBackend("cuda"); - - return paramStats; -} - -static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats) -{ - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - - for (const auto& match : matches) - { - auto linearNode = match.graph->rootNode(); - - // INPUT QUANTIZERS STEP-SIZES - - auto inputQuantNode = linearNode->getParent(0); - auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator()); - - float absMean = inputStats[linearNode->name()]; - float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second)); - - auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator(); - // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); - auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - inputStepSizeOp->setOutput(0, inputStepSizeTensor); - - // PARAM QUANTIZERS STEP-SIZES - - auto paramQuantNode = linearNode->getParent(1); - auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator()); - - absMean = paramStats[linearNode->name()]; - stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second)); - - auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator(); - // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); - auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - paramStepSizeOp->setOutput(0, paramStepSizeTensor); - } -} - -void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData) -{ - bool useCuda = (calibrationData->backend() == "cuda"); - - // Collect the tensor statisics - auto inputStats = collectInputStats(graphView, calibrationData, useCuda); - - auto paramStats = collectParamStats(graphView, useCuda); - - // Insert the quantizers - insertQuantizers(graphView, nbBits, 1.0); - - // Adjust the quantizers step-sizes - adjustQuantizersStepSizes(graphView, inputStats, paramStats); -} -} - -*/ \ No newline at end of file +} \ No newline at end of file -- GitLab From 96e095d4f55962c3b7989a85abd8652d13956f2f Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 27 Jan 2025 13:07:12 +0000 Subject: [PATCH 23/60] remove commented code --- src/backend/cuda/operator/LSQImpl.cpp | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/src/backend/cuda/operator/LSQImpl.cpp b/src/backend/cuda/operator/LSQImpl.cpp index c66bd8a..fa45f21 100644 --- a/src/backend/cuda/operator/LSQImpl.cpp +++ b/src/backend/cuda/operator/LSQImpl.cpp @@ -52,19 +52,6 @@ void Aidge::LSQImpl_cuda::backward() { std::shared_ptr<Tensor> gra_int1 = op_.getInput(1)->grad(); std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); - // XXX -/* - size_t tmp; - - cudaDeviceSetLimit(cudaLimitStackSize, 2048); - cudaDeviceGetLimit(&tmp, cudaLimitStackSize ); - printf(" stack limit = %ld \n", tmp); - - cudaDeviceSetLimit(cudaLimitMallocHeapSize, 100000000); - cudaDeviceGetLimit(&tmp, cudaLimitMallocHeapSize); - printf(" heap limit = %ld \n", tmp); -*/ - if (gra_int0->size() > mWorkspaceSize) { // std::cout << " reallocation " << sizeof(gra_int0) << " " << gra_int0->size() << std::endl; if (mWorkspace != nullptr) { @@ -87,12 +74,7 @@ void Aidge::LSQImpl_cuda::backward() { gra_int0->getImpl()->rawPtr(), gra_int1->getImpl()->rawPtr(), mWorkspace); -/* - gra_int1->setBackend("cpu"); - float *castedTensor = static_cast<float *> (gra_int1->getImpl()->rawPtr()); - std::cout << castedTensor[0] << std::endl; - gra_int1->setBackend("cuda"); -*/ + } Aidge::LSQImpl_cuda::~LSQImpl_cuda() { -- GitLab From 474fe56eb058fb31ff26ed311a0fac015901eb73 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 27 Jan 2025 14:14:40 +0000 Subject: [PATCH 24/60] complete the PTQ float to double migration --- include/aidge/quantization/PTQ/CLE.hpp | 2 +- src/PTQ/CLE.cpp | 18 +++++++++--------- src/PTQ/PTQ.cpp | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/aidge/quantization/PTQ/CLE.hpp b/include/aidge/quantization/PTQ/CLE.hpp index d94b6e9..77eaf7f 100644 --- a/include/aidge/quantization/PTQ/CLE.hpp +++ b/include/aidge/quantization/PTQ/CLE.hpp @@ -30,7 +30,7 @@ namespace Aidge * @param graphView The GraphView to process. * @param targetDelta the stopping criterion (typical value : 0.01) */ - void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDelta = 0.01); + void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta = 0.01); } diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 2c6e374..aac0073 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -38,13 +38,13 @@ static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node) return std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2); } -static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) { auto mulOp = Mul_Op(); mulOp.setDataType(tensor->dataType()); mulOp.setBackend(tensor->backend()); - std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling}); scalingTensor->setDataType(tensor->dataType()); scalingTensor->setBackend(tensor->backend()); @@ -94,7 +94,7 @@ static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) return flatTensor->get<double>(maxIndex); } -void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDelta) +void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta) { std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); @@ -116,7 +116,7 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDe if (isAffine(node)) affineNodeVector.push_back(node); - float maxRangeDelta; + double maxRangeDelta; do { @@ -131,18 +131,18 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDe std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; - float r1 = getTensorAbsoluteMax(getWeightTensor(n1)); - float r2 = getTensorAbsoluteMax(getWeightTensor(n2)); + double r1 = getTensorAbsoluteMax(getWeightTensor(n1)); + double r2 = getTensorAbsoluteMax(getWeightTensor(n2)); - float s1 = std::sqrt(r1 * r2) / r1; - float s2 = std::sqrt(r1 * r2) / r2; + double s1 = std::sqrt(r1 * r2) / r1; + double s2 = std::sqrt(r1 * r2) / r2; rescaleTensor(getWeightTensor(n1), s1); rescaleTensor(getWeightTensor(n2), s2); rescaleTensor(getBiasTensor(n1), s1); - float rangeDelta = std::abs(r1 - r2); + double rangeDelta = std::abs(r1 - r2); if (rangeDelta > maxRangeDelta) maxRangeDelta = rangeDelta; } diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 88e7ac8..3b156e7 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -72,13 +72,13 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } -static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) { auto mulOp = Mul_Op(); mulOp.setDataType(tensor->dataType()); mulOp.setBackend(tensor->backend()); - std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling}); scalingTensor->setDataType(tensor->dataType()); scalingTensor->setBackend(tensor->backend()); -- GitLab From d8ea1014323f4a8e8616132313df2bd155790067 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 27 Jan 2025 14:35:45 +0000 Subject: [PATCH 25/60] replace the couts with logs --- src/PTQ/CLE.cpp | 4 ---- src/PTQ/Clipping.cpp | 2 +- src/PTQ/PTQ.cpp | 13 ++++++------- src/QAT/QAT_FixedQ.cpp | 6 +++--- src/QAT/QAT_LSQ.cpp | 3 +-- 5 files changed, 11 insertions(+), 17 deletions(-) diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index aac0073..e6bcbc0 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -122,10 +122,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD { maxRangeDelta = 0.0; - //std::cout << " ----- " << std::endl; - //for (std::shared_ptr<Node> node : affineNodeVector) - // std::cout << getTensorAbsoluteMax(getWeightTensor(node)) << std::endl; - for (size_t i = 0; i < (affineNodeVector.size() - 1); i++) { std::shared_ptr<Node> n1 = affineNodeVector[i]; diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp index 57ad7a8..66b0ab3 100644 --- a/src/PTQ/Clipping.cpp +++ b/src/PTQ/Clipping.cpp @@ -26,7 +26,7 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, std::shared_ptr<Node> firstNode = retrieveNodeVector(graphView)[0]; - //std::cout << " COMPUTING HISTOGRAMS ... " << std::endl; + // Log::debug(" COMPUTING HISTOGRAMS ... "); std::map<std::string, std::vector<int>> histograms; diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 3b156e7..073e5e0 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -987,7 +987,6 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool static void printScalingFactors(std::shared_ptr<GraphView> graphView) { - Log::info(" === SCALING FACTORS === "); for (auto node : retrieveNodeVector(graphView)) if (node->type() == "Scaling" || node->type() == "Quantizer") { @@ -1020,7 +1019,7 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri auto scheduling = scheduler.getStaticScheduling(); for (auto node : scheduling) if (node->type() == "Scaling") - std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl; + Log::info(" {} range = {} ", node->name(), valueRanges[node->name()]); } void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) @@ -1049,13 +1048,13 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, Log::info(" Computing the value ranges ..."); std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda); - //std::cout << " === RANGES (BEFORE ADJUST) ===" << std::endl; + //Log::info(" === RANGES (BEFORE ADJUST) ==="); //printRanges(graphView, valueRanges); Log::info(" Optimizing the clipping values ..."); valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose); - //std::cout << " === RANGES (AFTER ADJUST) ===" << std::endl; + //Log::info(" === RANGES (AFTER ADJUST) ==="); //printRanges(graphView, valueRanges); Log::info(" Normalizing the activations ..."); @@ -1076,7 +1075,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (verbose) printScalingFactors(graphView); - //std::cout << " === SCALINGS (BEFORE CAST) ===" << std::endl; + //Log::info(" === SCALINGS (BEFORE CAST) ==="); //printScalingFactors(graphView); setupDataType(graphView, inputDataSet, initialDataType); @@ -1084,7 +1083,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (useCuda) graphView->setBackend("cuda"); - //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; + //Log::info(" === SCALINGS (AFTER CAST) ==="); //printScalingFactors(graphView); Log::info(" Reseting the scheduler ..."); @@ -1124,7 +1123,7 @@ void clearBiases(std::shared_ptr<GraphView> graphView) void devPTQ(std::shared_ptr<GraphView> graphView) { for (std::shared_ptr<Node> node : graphView->getNodes()) - std::cout << " UUU : " << node->name() << std::endl; + Log::info(" UUU : {}", node->name()); } } diff --git a/src/QAT/QAT_FixedQ.cpp b/src/QAT/QAT_FixedQ.cpp index d22074f..6ada532 100644 --- a/src/QAT/QAT_FixedQ.cpp +++ b/src/QAT/QAT_FixedQ.cpp @@ -91,7 +91,7 @@ static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator()); float inputStd = getTensorStd(op->getInput(0)); inputStats.insert(std::make_pair(node->name(), inputStd)); - std::cout << node->name() << " -> " << inputStd << std::endl; + Log::info(" {} -> {} ", node->name(), inputStd); } } @@ -108,7 +108,7 @@ static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator()); float paramStd = getTensorStd(op->getInput(1)); paramStats.insert(std::make_pair(node->name(), paramStd)); - std::cout << node->name() << " -> " << paramStd << std::endl; + Log::info(" {} -> {} ", node->name(), paramStd); } } @@ -156,7 +156,7 @@ void QuantFixedQ::devQAT(std::shared_ptr<GraphView> graphView) scheduler.generateScheduling(); auto s = scheduler.getStaticScheduling(); for (std::shared_ptr<Node> node : s) - std::cout << " name : " << node->name() << std::endl; + Log::info(" name : {} ", node->name()); } } \ No newline at end of file diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 80e8a05..0508fc7 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -27,7 +27,6 @@ namespace Aidge static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) { - //std::cout << " GET TENSOR ABS MEAN " << std::endl; auto valueTensor = (*tensor).abs().mean(); std::shared_ptr<Tensor> fallback; const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu"); @@ -76,7 +75,7 @@ static bool initStepSize(std::shared_ptr<Node> quantizer) stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); - std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl; + Log::info(" [ INIT STEP SIZE = {} ] ", stepSize); return false; } -- GitLab From 06c57eaaa1136a17d9935a2368a6357ecbad1947 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 27 Jan 2025 14:53:30 +0000 Subject: [PATCH 26/60] minor change --- src/recipes/QuantRecipes.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/recipes/QuantRecipes.cpp b/src/recipes/QuantRecipes.cpp index 562948c..7f01b24 100644 --- a/src/recipes/QuantRecipes.cpp +++ b/src/recipes/QuantRecipes.cpp @@ -58,11 +58,11 @@ void insertBatchNormNodes(std::shared_ptr<GraphView> graphView) if (parentNode->type() == "Conv2D") { std::shared_ptr<Conv_Op<2>> convOperator = std::static_pointer_cast<Conv_Op<2>> (parentNode->getOperator()); - int nb_channels = convOperator->getInput(1)->dims()[0]; - std::cout << " NB CHANNELS = " << nb_channels << std::endl; // TODO : remove this ... + int nbChannels = convOperator->getInput(1)->dims()[0]; + Log::info(" NB CHANNELS = {} ", nbChannels); std::string batchnormNodeName = makeUniqueName(parentNode->name() + "_BN", graphView); - std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nb_channels, 1e-5, 0.1, false, batchnormNodeName); + std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nbChannels, 1e-5, 0.1, false, batchnormNodeName); batchnormNode->getOperator()->setDataType(DataType::Float32); batchnormNode->getOperator()->setBackend("cpu"); -- GitLab From d9c551fd838a2783d4311294d0476348e2ba7cf3 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 27 Jan 2025 15:03:44 +0000 Subject: [PATCH 27/60] move the PTQMetaOps files --- include/aidge/{quantization/PTQ => operator}/PTQMetaOps.hpp | 0 src/PTQ/PTQ.cpp | 2 +- src/{PTQ => operator}/PTQMetaOps.cpp | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename include/aidge/{quantization/PTQ => operator}/PTQMetaOps.hpp (100%) rename src/{PTQ => operator}/PTQMetaOps.cpp (100%) diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/operator/PTQMetaOps.hpp similarity index 100% rename from include/aidge/quantization/PTQ/PTQMetaOps.hpp rename to include/aidge/operator/PTQMetaOps.hpp diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 073e5e0..09b039f 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -12,7 +12,7 @@ #include "aidge/quantization/PTQ/CLE.hpp" #include "aidge/quantization/PTQ/Clipping.hpp" #include "aidge/quantization/PTQ/PTQ.hpp" -#include "aidge/quantization/PTQ/PTQMetaOps.hpp" +#include "aidge/operator/PTQMetaOps.hpp" #include "aidge/data/Tensor.hpp" diff --git a/src/PTQ/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp similarity index 100% rename from src/PTQ/PTQMetaOps.cpp rename to src/operator/PTQMetaOps.cpp -- GitLab From 7aff7e0fc383009a282c81153d7f3d72525b5d08 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Tue, 28 Jan 2025 10:06:53 +0000 Subject: [PATCH 28/60] fix an include --- src/operator/PTQMetaOps.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index 152a3b0..a079ed6 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -9,13 +9,12 @@ * ********************************************************************************/ -#include "aidge/quantization/PTQ/PTQMetaOps.hpp" +#include "aidge/operator/PTQMetaOps.hpp" #include <array> #include <memory> #include <utility> -//Operator #include "aidge/operator/Clip.hpp" #include "aidge/operator/Mul.hpp" #include "aidge/operator/Round.hpp" -- GitLab From 5840b845139be68ed2837b34f7339d0382d08c08 Mon Sep 17 00:00:00 2001 From: Maxence Naud <maxence.naud@cea.fr> Date: Wed, 29 Jan 2025 16:41:43 +0000 Subject: [PATCH 29/60] [Upd] standardization of some files --- aidge_quantization/unit_tests/test_ptq.py | 22 +++--- include/aidge/operator/FixedQ.hpp | 43 ++++++------ include/aidge/operator/LSQ.hpp | 6 +- include/aidge/operator/SAT/DoReFa.hpp | 67 ++++++++++++------- include/aidge/operator/SAT/TanhClamp.hpp | 33 +++------ include/aidge/quantization/PTQ/CLE.hpp | 30 +++++---- include/aidge/quantization/PTQ/Clipping.hpp | 22 +++--- include/aidge/quantization/PTQ/PTQ.hpp | 27 ++++---- include/aidge/quantization/PTQ/PTQMetaOps.hpp | 18 ++--- include/aidge/quantization/QAT/QAT_FixedQ.hpp | 10 +-- include/aidge/quantization/QAT/QAT_LSQ.hpp | 20 +++--- src/PTQ/CLE.cpp | 40 +++++++---- src/PTQ/PTQMetaOps.cpp | 22 +++--- src/operator/FixedQ.cpp | 20 ++++++ src/operator/SAT/DoReFa.cpp | 33 +++++++-- src/operator/SAT/TanhClamp.cpp | 22 +++++- 16 files changed, 259 insertions(+), 176 deletions(-) diff --git a/aidge_quantization/unit_tests/test_ptq.py b/aidge_quantization/unit_tests/test_ptq.py index dfdedd8..56080bf 100644 --- a/aidge_quantization/unit_tests/test_ptq.py +++ b/aidge_quantization/unit_tests/test_ptq.py @@ -21,7 +21,7 @@ ACCURACIES = (95.4, 94.4) # (97.9, 97.7) NB_BITS = 4 # -------------------------------------------------------------- -# UTILS +# UTILS # -------------------------------------------------------------- def propagate(model, scheduler, sample): @@ -50,7 +50,7 @@ def compute_accuracy(model, samples, labels): # -------------------------------------------------------------- class test_ptq(unittest.TestCase): - + def setUp(self): # load the samples / labels (numpy) @@ -70,19 +70,20 @@ class test_ptq(unittest.TestCase): def tearDown(self): pass - + def test_model(self): Log.set_console_level(Level.Info) # compute the base accuracy accuracy = compute_accuracy(self.model, self.samples[0:NB_SAMPLES], self.labels) self.assertAlmostEqual(accuracy * 100, ACCURACIES[0], msg='base accuracy does not meet the baseline !', delta=0.1) - + def test_quant_model(self): - Log.set_console_level(Level.Info) + Log.set_console_level(Level.Debug) # create the calibration dataset + tensors = [] for sample in self.samples[0:NB_SAMPLES]: sample = prepare_sample(sample) @@ -91,14 +92,13 @@ class test_ptq(unittest.TestCase): # quantize the model - aidge_quantization.quantize_network( - self.model, - NB_BITS, - tensors, - clipping_mode=aidge_quantization.Clipping.MSE, + self.model, + NB_BITS, + tensors, + clipping_mode=aidge_quantization.Clipping.MSE, no_quantization=False, - optimize_signs=True, + optimize_signs=True, single_shift=False ) diff --git a/include/aidge/operator/FixedQ.hpp b/include/aidge/operator/FixedQ.hpp index 96a52b4..3d46dcf 100644 --- a/include/aidge/operator/FixedQ.hpp +++ b/include/aidge/operator/FixedQ.hpp @@ -9,11 +9,12 @@ * ********************************************************************************/ -#ifndef AIDGE_CORE_OPERATOR_FIXEDQ_H_ -#define AIDGE_CORE_OPERATOR_FIXEDQ_H_ +#ifndef AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_ +#define AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_ -#include <cassert> +#include <cstddef> // std::size_t #include <memory> +#include <string> #include <vector> #include "aidge/backend/OperatorImpl.hpp" @@ -21,8 +22,8 @@ #include "aidge/operator/OperatorTensor.hpp" #include "aidge/utils/ErrorHandling.hpp" #include "aidge/utils/Registrar.hpp" -#include "aidge/utils/Types.h" #include "aidge/utils/StaticAttributes.hpp" +#include "aidge/utils/Types.h" namespace Aidge { @@ -43,24 +44,20 @@ private: public: - FixedQ_Op(std::size_t nbBits, float span, bool isOutputUnsigned) : - OperatorTensor(Type, {InputCategory::Data}, 1), - mAttributes(std::make_shared<Attributes_>(attr<FixedQAttr::NbBits>(nbBits), attr<FixedQAttr::Span>(span), attr<FixedQAttr::IsOutputUnsigned>(isOutputUnsigned))) + FixedQ_Op(std::size_t nbBits = 8, float span = 4.0f, bool isOutputUnsigned = false) : + OperatorTensor(Type, {InputCategory::Data}, 1), + mAttributes(std::make_shared<Attributes_>( + attr<FixedQAttr::NbBits>(nbBits), + attr<FixedQAttr::Span>(span), + attr<FixedQAttr::IsOutputUnsigned>(isOutputUnsigned))) {} /** - * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). + * @brief Copy-constructor. Copy the operator attributes and its output + * tensor(s), but not its input tensors (the new operator has no input associated). * @param op Operator to copy. */ - FixedQ_Op(const FixedQ_Op& op) - : OperatorTensor(op), mAttributes(op.mAttributes) - { - if (op.mImpl){ - SET_IMPL_MACRO(FixedQ_Op, *this, op.backend()); - }else{ - mImpl = nullptr; - } - } + FixedQ_Op(const FixedQ_Op& op); /** * @brief Clone the operator using its copy-constructor. @@ -88,14 +85,16 @@ public: }; -inline std::shared_ptr<Node> FixedQ(std::size_t nbBits = 8, float span = 4.0f, bool isOutputUnsigned = false, const std::string& name = "") { - return std::make_shared<Node>(std::make_shared<FixedQ_Op>(nbBits, span, isOutputUnsigned), name); -} -} +std::shared_ptr<Node> FixedQ(std::size_t nbBits = 8, + float span = 4.0f, + bool isOutputUnsigned = false, + const std::string& name = ""); + +} // namespace Aidge namespace { template <> const char* const EnumStrings<Aidge::FixedQAttr>::data[] = {"nb_bits", "span", "is_output_unsigned"}; } -#endif /* AIDGE_CORE_OPERATOR_FIXEDQ_H_ */ +#endif /* AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_ */ diff --git a/include/aidge/operator/LSQ.hpp b/include/aidge/operator/LSQ.hpp index eb266bc..970c476 100644 --- a/include/aidge/operator/LSQ.hpp +++ b/include/aidge/operator/LSQ.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CORE_OPERATOR_LSQ_H_ -#define AIDGE_CORE_OPERATOR_LSQ_H_ +#ifndef AIDGE_QUANTIZATION_OPERATOR_LSQ_H_ +#define AIDGE_QUANTIZATION_OPERATOR_LSQ_H_ #include <cassert> #include <memory> @@ -105,4 +105,4 @@ template <> const char *const EnumStrings<Aidge::LSQAttr>::data[] = {"range"}; } -#endif /* AIDGE_CORE_OPERATOR_LSQ_H_ */ +#endif /* AIDGE_QUANTIZATION_OPERATOR_LSQ_H_ */ diff --git a/include/aidge/operator/SAT/DoReFa.hpp b/include/aidge/operator/SAT/DoReFa.hpp index 92ce167..d168c38 100644 --- a/include/aidge/operator/SAT/DoReFa.hpp +++ b/include/aidge/operator/SAT/DoReFa.hpp @@ -9,17 +9,15 @@ * ********************************************************************************/ -#ifndef AIDGE_CORE_OPERATOR_DOREFA_H_ -#define AIDGE_CORE_OPERATOR_DOREFA_H_ +#ifndef AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_ +#define AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_ -#include <cassert> #include <memory> #include <vector> #include "aidge/backend/OperatorImpl.hpp" #include "aidge/graph/Node.hpp" #include "aidge/operator/OperatorTensor.hpp" -#include "aidge/utils/ErrorHandling.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/StaticAttributes.hpp" #include "aidge/utils/Types.h" @@ -43,12 +41,17 @@ public: static const std::string Type; private: - using Attributes_ = StaticAttributes<DoReFaAttr, size_t, DoReFaMode>; + using Attributes_ = StaticAttributes<DoReFaAttr, std::size_t, DoReFaMode>; template <DoReFaAttr e> using attr = typename Attributes_::template attr<e>; const std::shared_ptr<Attributes_> mAttributes; public: - DoReFa_Op(size_t range = 255, DoReFaMode mode = DoReFaMode::Default) + /** + * @brief Constructor for DoReFa_Op + * @param range The quantization range (default: 255) + * @param mode The quantization mode (default: Default) + */ + DoReFa_Op(std::size_t range = 255, DoReFaMode mode = DoReFaMode::Default) : OperatorTensor(Type, {InputCategory::Param}, 1), mAttributes(std::make_shared<Attributes_>( attr<DoReFaAttr::Range>(range), @@ -59,30 +62,34 @@ public: * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). * @param op Operator to copy. */ - DoReFa_Op(const DoReFa_Op& op) - : OperatorTensor(op), - mAttributes(op.mAttributes) - { - if (op.mImpl){ - SET_IMPL_MACRO(DoReFa_Op, *this, op.backend()); - }else{ - mImpl = nullptr; - } - } + DoReFa_Op(const DoReFa_Op& op); /** * @brief Clone the operator using its copy-constructor. * @see Operator::DoReFa_Op + * @return std::shared_ptr<Operator> A deep copy of the operator */ - std::shared_ptr<Operator> clone() const override { - return std::make_shared<DoReFa_Op>(*this); - } + std::shared_ptr<Operator> clone() const override; + /** + * @brief Get available backends for this operator + * @return std::set<std::string> Set of supported backend names + */ std::set<std::string> getAvailableBackends() const override final; + + /** + * @brief Set the backend for this operator + * @param name Backend name + * @param device Device index (default: 0) + */ void setBackend(const std::string& name, DeviceIdx_t device = 0) override final; + /** + * @brief Get operator attributes + * @return std::shared_ptr<Attributes> Shared pointer to operator attributes + */ inline std::shared_ptr<Attributes> attributes() const override { return mAttributes; } - inline size_t& range() const noexcept { return mAttributes->getAttr<DoReFaAttr::Range>(); } + inline std::size_t& range() const noexcept { return mAttributes->getAttr<DoReFaAttr::Range>(); } inline DoReFaMode& mode() const noexcept { return mAttributes->getAttr<DoReFaAttr::Mode>(); } static const std::vector<std::string> getInputsName(){ @@ -93,10 +100,20 @@ public: } }; -inline std::shared_ptr<Node> DoReFa(size_t range = 255, DoReFaMode mode = DoReFaMode::Default, const std::string& name = "") { - return std::make_shared<Node>(std::make_shared<DoReFa_Op>(range, mode), name); -} -} +/** + * @brief Factory function to create a DoReFa operator node + * + * @param range Quantization range (default: 255) + * @param mode Quantization mode (default: Default) + * @param name Node name (default: empty) + * + * @return std::shared_ptr<Node> Shared pointer to the created node + */ +std::shared_ptr<Node> DoReFa(std::size_t range = 255, + DoReFaMode mode = DoReFaMode::Default, + const std::string& name = ""); + +} // namespace Aidge namespace { template <> @@ -106,4 +123,4 @@ template <> const char *const EnumStrings<Aidge::DoReFaMode>::data[] = {"default", "symmetric", "asymmetric", "full_range"}; } -#endif /* AIDGE_CORE_OPERATOR_DOREFA_H_ */ +#endif /* AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_ */ diff --git a/include/aidge/operator/SAT/TanhClamp.hpp b/include/aidge/operator/SAT/TanhClamp.hpp index def43b8..9d99d70 100644 --- a/include/aidge/operator/SAT/TanhClamp.hpp +++ b/include/aidge/operator/SAT/TanhClamp.hpp @@ -9,20 +9,18 @@ * ********************************************************************************/ -#ifndef AIDGE_CORE_OPERATOR_TANHCLAMP_H_ -#define AIDGE_CORE_OPERATOR_TANHCLAMP_H_ +#ifndef AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_ +#define AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_ -#include <cassert> #include <memory> +#include <set> +#include <string> #include <vector> #include "aidge/backend/OperatorImpl.hpp" #include "aidge/graph/Node.hpp" #include "aidge/operator/OperatorTensor.hpp" -#include "aidge/operator/Producer.hpp" -#include "aidge/utils/ErrorHandling.hpp" #include "aidge/utils/Registrar.hpp" -#include "aidge/utils/StaticAttributes.hpp" #include "aidge/utils/Types.h" namespace Aidge { @@ -44,23 +42,13 @@ public: * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). * @param op Operator to copy. */ - TanhClamp_Op(const TanhClamp_Op& op) - : OperatorTensor(op) - { - if (op.mImpl){ - SET_IMPL_MACRO(TanhClamp_Op, *this, op.backend()); - }else{ - mImpl = nullptr; - } - } + TanhClamp_Op(const TanhClamp_Op& op); /** * @brief Clone the operator using its copy-constructor. * @see Operator::TanhClamp_Op */ - std::shared_ptr<Operator> clone() const override { - return std::make_shared<TanhClamp_Op>(*this); - } + std::shared_ptr<Operator> clone() const override; bool forwardDims(bool allowDataDependency = false) override final; std::set<std::string> getAvailableBackends() const override final; @@ -75,9 +63,8 @@ public: } }; -inline std::shared_ptr<Node> TanhClamp(const std::string& name = "") { - return std::make_shared<Node>(std::make_shared<TanhClamp_Op>(), name); -} -} +std::shared_ptr<Node> TanhClamp(const std::string& name = ""); + +} // namespace Aidge -#endif /* AIDGE_CORE_OPERATOR_TANHCLAMP_H_ */ +#endif /* AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_ */ diff --git a/include/aidge/quantization/PTQ/CLE.hpp b/include/aidge/quantization/PTQ/CLE.hpp index 77eaf7f..f4dc073 100644 --- a/include/aidge/quantization/PTQ/CLE.hpp +++ b/include/aidge/quantization/PTQ/CLE.hpp @@ -9,29 +9,33 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_PTQ_CLE_H_ -#define AIDGE_QUANTIZATION_PTQ_CLE_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_ -//#include <cstdint> -//#include <map> -//#include <memory> -//#include <string> -//#include <vector> +#include <memory> -#include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" namespace Aidge { /** - * @brief Equalize the ranges of the nodes parameters by proceding iteratively. - * Can only be applied to single branch networks (otherwise does not edit the graphView). + * @brief Equalize the ranges of the nodes parameters by proceding iteratively. + * Can only be applied to single branch networks (otherwise does not edit the GraphView). + * + * Cross Layer Equalization (CLE) is used to balance the weights between consecutive + * layers to improve quantization performance. It works by iteratively scaling weights + * and biases of adjacent layers while preserving the overall function of the network. + * + * @note The operation modifies weights and biases in-place but preserves the mathematical + * function computed by the network. + * * @param graphView The GraphView to process. - * @param targetDelta the stopping criterion (typical value : 0.01) + * @param targetDelta the stopping criterion (typical value : 0.01). Smaller values lead + * to more precise equalization but may require more iterations. */ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta = 0.01); -} +} // namespace Aidge -#endif /* AIDGE_QUANTIZATION_PTQ_CLE_H_ */ \ No newline at end of file +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_ */ diff --git a/include/aidge/quantization/PTQ/Clipping.hpp b/include/aidge/quantization/PTQ/Clipping.hpp index d0622f4..3f65c42 100644 --- a/include/aidge/quantization/PTQ/Clipping.hpp +++ b/include/aidge/quantization/PTQ/Clipping.hpp @@ -9,14 +9,14 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_PTQ_CLIP_H_ -#define AIDGE_QUANTIZATION_PTQ_CLIP_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_ -//#include <cstdint> -//#include <map> -//#include <memory> -//#include <string> -//#include <vector> +#include <cstdint> // std::uint8_t +#include <map> +#include <memory> +#include <string> +#include <vector> #include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" @@ -56,9 +56,9 @@ namespace Aidge double computeKLClipping(std::vector<int> histogram, std::uint8_t nbBits); /** - * @brief Return a corrected map of the provided activation ranges. - * To do so compute the optimal clipping values for every node and multiply the input ranges by those values. - * The method used to compute the clippings can be eihter 'MSE', 'AA', 'KL' or 'MAX'. + * @brief Return a corrected map of the provided activation ranges. + * To do so compute the optimal clipping values for every node and multiply the input ranges by those values. + * The method used to compute the clippings can be eihter 'MSE', 'AA', 'KL' or 'MAX'. * @param clippingMode The method used to compute the optimal clippings. * @param valueRanges The map associating each affine node to its output range. * @param nbBits The quantization number of bits. @@ -71,5 +71,5 @@ namespace Aidge } -#endif /* AIDGE_QUANTIZATION_PTQ_CLIP_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_ */ diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index d2b8b7f..4fc38bc 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -9,16 +9,19 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_PTQ_PTQ_H_ -#define AIDGE_QUANTIZATION_PTQ_PTQ_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_ -//#include <cstdint> -//#include <map> -//#include <memory> -//#include <string> -//#include <vector> +#include <cstdint> // std::uint8_t +#include <map> +#include <memory> +#include <set> +#include <string> +#include <utility> // std::pair +#include <vector> #include "aidge/data/Tensor.hpp" +#include "aidge/quantization/PTQ/Clipping.hpp" #include "aidge/graph/GraphView.hpp" namespace Aidge { @@ -104,12 +107,12 @@ namespace Aidge { * @brief Normalize the activations of each affine node so that they fit in the [-1:1] range. * This is done by reconfiguring the scaling nodes, as well as rescaling the weights and biases tensors. * @param graphView The GraphView containing the affine nodes. - * @param valueRanges The node output value ranges computed over the calibration dataset. + * @param valueRanges The node output value ranges computed over the calibration dataset. */ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges); /** - * @brief For each node, compute the sign of its input and output values. + * @brief For each node, compute the sign of its input and output values. * The goal of the routine is to maximize the number of unsigned IOs in order to double the value resolution when possible. * @param graphView The GraphView to analyze. * @param verbose Whether to print the sign map or not. @@ -135,7 +138,7 @@ namespace Aidge { * @param clippingMode: Type of the clipping optimization. Can be either 'MAX', 'MSE', 'AA' or 'KL'. * @param applyRounding Whether to apply the rounding operations or not. * @param optimizeSigns Whether to take account of the IO signs of the operators or not. - * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights. + * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights. * @param verbose Whether to print internal informations about the quantization process. */ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool applyRounding, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose); @@ -157,8 +160,8 @@ namespace Aidge { * @brief Developement and test routine. * @param graphView The GraphView under test. */ - void devPTQ(std::shared_ptr<GraphView> graphView); + void devPTQ(std::shared_ptr<GraphView> graphView); } -#endif /* AIDGE_QUANTIZATION_PTQ_PTQ_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_ */ diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/quantization/PTQ/PTQMetaOps.hpp index 62fac87..b9bad0d 100644 --- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp +++ b/include/aidge/quantization/PTQ/PTQMetaOps.hpp @@ -8,22 +8,14 @@ * SPDX-License-Identifier: EPL-2.0 * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ -#define AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_ -#include <array> #include <memory> #include <string> -#include <utility> - -#include "aidge/operator/Clip.hpp" -#include "aidge/operator/Mul.hpp" -#include "aidge/operator/Round.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/graph/Node.hpp" -#include "aidge/graph/OpArgs.hpp" // Sequential -#include "aidge/operator/MetaOperator.hpp" namespace Aidge { @@ -55,7 +47,7 @@ std::shared_ptr<Aidge::Node> Scaling(double scalingFactor, const std::string& na void updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor); /// @brief Retrieves the current scaling factor of a PTQ meta-operator node. -/// This function returns the scaling factor associated with the specified PTQ meta-operator node, +/// This function returns the scaling factor associated with the specified PTQ meta-operator node, /// allowing inspection of the current scalar applied in the [Mul] operation. /// /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor is being queried. @@ -66,7 +58,7 @@ double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode); /// This function modifies the clip range of a Quantizer node, allowing adjustment of the range within which values are clipped /// in the [Clip] operation of the Quantizer sequence. /// -/// @param QuantizerNode A shared pointer to the Quantizer node whose clip range is being set. +/// @param QuantizerNode A shared pointer to the Quantizer node whose clip range is being set. /// This node should have been created using the Quantizer function. /// @param min The minimum value for the clip range. Values below this will be clipped to this minimum. /// @param max The maximum value for the clip range. Values above this will be clipped to this maximum. @@ -75,4 +67,4 @@ void setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, double min, double } -#endif /* AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_ */ diff --git a/include/aidge/quantization/QAT/QAT_FixedQ.hpp b/include/aidge/quantization/QAT/QAT_FixedQ.hpp index ecbe742..6a2aa24 100644 --- a/include/aidge/quantization/QAT/QAT_FixedQ.hpp +++ b/include/aidge/quantization/QAT/QAT_FixedQ.hpp @@ -9,8 +9,10 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ -#define AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_ + +#include <memory> #include "aidge/graph/Node.hpp" #include "aidge/graph/GraphView.hpp" @@ -41,10 +43,10 @@ void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits * @brief Developement and test routine. * @param graphView The GraphView under test. */ -void devQAT(std::shared_ptr<GraphView> graphView); +void devQAT(std::shared_ptr<GraphView> graphView); } } -#endif /* AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_ */ diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp index 4970be0..a44c71b 100644 --- a/include/aidge/quantization/QAT/QAT_LSQ.hpp +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -9,12 +9,14 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_QAT_LSQ_H_ -#define AIDGE_QUANTIZATION_QAT_LSQ_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ + +#include <cstddef> // std::size_t +#include <memory> -#include "aidge/graph/Node.hpp" -#include "aidge/graph/GraphView.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/graph/GraphView.hpp" namespace Aidge { namespace QuantLSQ { @@ -25,7 +27,7 @@ namespace QuantLSQ { * @param nbBits Number of quantization bits. * @param span Fixed output span of the quantizers. */ -void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float step_size); +void insertQuantizers(std::shared_ptr<GraphView> graphView, std::size_t nbBits, float step_size); /** * @brief Given a GraphView with parameters properly initialized and some calibration data, @@ -35,10 +37,10 @@ void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float * @param calibrationData Calibration data used to adjust the spans. * @param scale Multiplicative constant applied to the spans. */ -void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData); +void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, std::size_t nbBits, std::shared_ptr<Tensor> calibrationData); -} -} +} // namespace QuantLSQ +} // namespace Aidge -#endif /* AIDGE_QUANTIZATION_QAT_LSQ_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ */ diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 2c81815..5265d9c 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -10,14 +10,19 @@ ********************************************************************************/ #include "aidge/quantization/PTQ/CLE.hpp" + +#include <cmath> // std::abs, std::fabs, std::sqrt +#include <cstddef> // std::size_t +#include <memory> +#include <vector> + #include "aidge/quantization/PTQ/Clipping.hpp" -#include "aidge/quantization/PTQ/PTQ.hpp" +#include "aidge/quantization/PTQ/PTQ.hpp" // retrieveNodeVector #include "aidge/graph/GraphView.hpp" -#include "aidge/scheduler/SequentialScheduler.hpp" -#include "aidge/scheduler/Scheduler.hpp" -#include "aidge/utils/Log.hpp" +#include "aidge/graph/Node.hpp" #include "aidge/operator/OperatorTensor.hpp" +#include "aidge/utils/Log.hpp" namespace Aidge { @@ -42,13 +47,13 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) castedTensor[i] *= scaling; } -static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) { // Get the tensor data pointer and edit it double * castedTensor = static_cast<double*> (tensor->getImpl()->rawPtr()); // Get the tensor absolute max value - double maxValue = 0.0f; + double maxValue = 0.0; for(std::size_t i = 0; i < tensor->size(); ++i) { if(std::fabs(castedTensor[i]) > maxValue) { maxValue = std::fabs(castedTensor[i]); @@ -62,15 +67,14 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); // Check if the CLE can be applied ... - for (std::shared_ptr<Node> node : nodeVector) if (node->getChildren().size() > 1) { - Log::info(" Network have multiple branches, skipping the CLE ... "); + Log::notice("Network have multiple branches, skipping the CLE ... "); return; - } + } - Log::info(" Applying the Cross-Layer Equalization ... "); + Log::info("Applying the Cross-Layer Equalization ... "); // Get the vector of affine nodes @@ -79,17 +83,22 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD if (isAffine(node)) affineNodeVector.push_back(node); + if (affineNodeVector.empty()) { + Log::notice("No affine nodes found in the network. CLE cannot be applied."); + return; + } double maxRangeDelta; + int iteration = 0; - do + do { + ++iteration; maxRangeDelta = 0.0; - //std::cout << " ----- " << std::endl; //for (std::shared_ptr<Node> node : affineNodeVector) // std::cout << getTensorAbsoluteMax(getWeightTensor(node)) << std::endl; - - for (size_t i = 0; i < (affineNodeVector.size() - 1); i++) + + for (std::size_t i = 0; i < (affineNodeVector.size() - 1); i++) { std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; @@ -111,6 +120,9 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD } } while (maxRangeDelta > targetDelta); + + Log::notice("CLE completed after {} iterations. Final max range delta: {:.6f}", + iteration, maxRangeDelta); } } \ No newline at end of file diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp index 527d853..77018c2 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/PTQ/PTQMetaOps.cpp @@ -11,8 +11,8 @@ #include "aidge/quantization/PTQ/PTQMetaOps.hpp" -#include <array> #include <memory> +#include <string> #include <utility> //Operator @@ -32,7 +32,7 @@ #include "aidge/utils/Log.hpp" -namespace Aidge +namespace Aidge { std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name) @@ -46,19 +46,19 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli // connect the scaling factor producer std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); - std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); + std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); - + // create the metaop graph std::shared_ptr<GraphView> graphView = Sequential({mulNode, roundNode, clipNode}); std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ??? - // return the metaop + // return the metaop std::shared_ptr<Node> metaopNode = MetaOperator("Quantizer", connectedGraphView, {}, name); // XXX alternative prototype - return metaopNode; + return metaopNode; } std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name) @@ -67,7 +67,7 @@ std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name) std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_Scaling" : ""); - std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); + std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); std::shared_ptr<GraphView> graphView = Sequential({mulNode}); @@ -96,7 +96,7 @@ void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(metaOpNode->getOperator()); - + std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); if (!mulNode) @@ -113,7 +113,7 @@ double getScalingFactor(std::shared_ptr<Node> MetaOpNode) } std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(MetaOpNode->getOperator()); - + std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); if (!mulNode) { @@ -123,8 +123,8 @@ double getScalingFactor(std::shared_ptr<Node> MetaOpNode) auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1); std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); - + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + return localTensor.get<double>(0); } diff --git a/src/operator/FixedQ.cpp b/src/operator/FixedQ.cpp index 8791740..9828ce9 100644 --- a/src/operator/FixedQ.cpp +++ b/src/operator/FixedQ.cpp @@ -20,6 +20,17 @@ const std::string Aidge::FixedQ_Op::Type = "FixedQ"; +Aidge::FixedQ_Op::FixedQ_Op(const Aidge::FixedQ_Op& op) + : OperatorTensor(op), + mAttributes(op.mAttributes) +{ + if (op.mImpl){ + SET_IMPL_MACRO(FixedQ_Op, *this, op.backend()); + }else{ + mImpl = nullptr; + } +} + std::set<std::string> Aidge::FixedQ_Op::getAvailableBackends() const { return Registrar<FixedQ_Op>::getKeys(); } @@ -28,3 +39,12 @@ void Aidge::FixedQ_Op::setBackend(const std::string& name, DeviceIdx_t device) { SET_IMPL_MACRO(FixedQ_Op, *this, name); mOutputs[0]->setBackend(name, device); } + +//////////////////////////////////////////////////////////////////////////////// + +std::shared_ptr<Aidge::Node> Aidge::FixedQ(std::size_t nbBits, + float span, + bool isOutputUnsigned, + const std::string& name) { + return std::make_shared<Node>(std::make_shared<FixedQ_Op>(nbBits, span, isOutputUnsigned), name); +} \ No newline at end of file diff --git a/src/operator/SAT/DoReFa.cpp b/src/operator/SAT/DoReFa.cpp index b6124ba..426e330 100644 --- a/src/operator/SAT/DoReFa.cpp +++ b/src/operator/SAT/DoReFa.cpp @@ -17,13 +17,38 @@ #include "aidge/data/Tensor.hpp" #include "aidge/utils/Types.h" -const std::string Aidge::DoReFa_Op::Type = "DoReFa"; +namespace Aidge { -std::set<std::string> Aidge::DoReFa_Op::getAvailableBackends() const { +const std::string DoReFa_Op::Type = "DoReFa"; + +DoReFa_Op::DoReFa_Op(const DoReFa_Op& op) + : OperatorTensor(op), + mAttributes(op.mAttributes) +{ + if (op.mImpl) { + SET_IMPL_MACRO(DoReFa_Op, *this, op.backend()); + } else { + mImpl = nullptr; + } +} + +std::shared_ptr<Operator> DoReFa_Op::clone() const { + return std::make_shared<DoReFa_Op>(*this); +} + +std::set<std::string> DoReFa_Op::getAvailableBackends() const { return Registrar<DoReFa_Op>::getKeys(); } -void Aidge::DoReFa_Op::setBackend(const std::string& name, DeviceIdx_t device) { +void DoReFa_Op::setBackend(const std::string& name, DeviceIdx_t device) { SET_IMPL_MACRO(DoReFa_Op, *this, name); mOutputs[0]->setBackend(name, device); -} \ No newline at end of file +} + +//////////////////////////////////////////////////////////////////////////////// + +std::shared_ptr<Node> DoReFa(size_t range, DoReFaMode mode, const std::string& name) { + return std::make_shared<Node>(std::make_shared<DoReFa_Op>(range, mode), name); +} + +} // namespace Aidge \ No newline at end of file diff --git a/src/operator/SAT/TanhClamp.cpp b/src/operator/SAT/TanhClamp.cpp index 2b8d63d..a03fc7d 100644 --- a/src/operator/SAT/TanhClamp.cpp +++ b/src/operator/SAT/TanhClamp.cpp @@ -20,6 +20,20 @@ const std::string Aidge::TanhClamp_Op::Type = "TanhClamp"; +Aidge::TanhClamp_Op::TanhClamp_Op(const Aidge::TanhClamp_Op& op) + : OperatorTensor(op) +{ + if (op.mImpl) { + SET_IMPL_MACRO(TanhClamp_Op, *this, op.backend()); + } else { + mImpl = nullptr; + } +} + +std::shared_ptr<Aidge::Operator> Aidge::TanhClamp_Op::clone() const { + return std::make_shared<TanhClamp_Op>(*this); +} + bool Aidge::TanhClamp_Op::forwardDims(bool /*allowDataDependency*/) { if (inputsAssociated()) { @@ -40,5 +54,11 @@ void Aidge::TanhClamp_Op::setBackend(const std::string& name, DeviceIdx_t device mOutputs[0]->setBackend(name, device); // Scale output is always on CPU for now - mOutputs[1]->setBackend("cpu"); // XXX why ? + mOutputs[1]->setBackend("cpu"); // XXX why ? +} + +//////////////////////////////////////////////////////////////////////////////// + +std::shared_ptr<Aidge::Node> Aidge::TanhClamp(const std::string& name) { + return std::make_shared<Node>(std::make_shared<TanhClamp_Op>(), name); } \ No newline at end of file -- GitLab From eba01977f3e1fde1bfe162310e981e5b87f6da7f Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Wed, 29 Jan 2025 22:26:39 +0000 Subject: [PATCH 30/60] Change Python minimum version 3.7 -> 3.8 --- pyproject.toml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index deb91c7..c7cd4c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ description="Quantization algorithms to compress aidge networks." dependencies = [ "numpy>=1.21.6", ] -requires-python = ">= 3.7" +requires-python = ">= 3.8" readme = "README.md" license = { file = "LICENSE" } classifiers = [ @@ -56,6 +56,19 @@ test-command = "pytest {package}/aidge_quantization/unit_tests" # "cp39-win_amd64", # "cp310-win_amd64", # ] +# PYLINT +[tool.pylint.main] +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list = ["aidge_core", "aidge_backend_cpu", "aidge_quantization", "onnx"] +# Files or directories to be skipped. They should be base names, not paths. +ignore = ["CVS"] +# List of module names for which member attributes should not be checked (useful +# for modules/projects where namespaces are manipulated during runtime and thus +# existing member attributes cannot be deduced by static analysis). It supports +# qualified module names, as well as Unix pattern matching. +ignored-modules = ["aidge_core", "aidge_backend_cpu", "aidge_quantization", "onnx"] ## AIDGE DEPENDENCIES DECLARATION [tool.cibuildwheel.environment] AIDGE_DEPENDENCIES = "aidge_core aidge_backend_cpu aidge_onnx" # format => "dep_1 dep_2 ... dep_n" -- GitLab From 6109a9d6eeb5d025da27cb56c1e2927e3b2add59 Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Wed, 29 Jan 2025 22:27:21 +0000 Subject: [PATCH 31/60] UPD: 'setup.py' to access compilation options from environment variables set by 'setup.sh' --- setup.py | 55 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/setup.py b/setup.py index 8774d01..1bfc0ac 100644 --- a/setup.py +++ b/setup.py @@ -37,6 +37,7 @@ class AidgePkgBuild(build_ext): # This lists the number of processors available on the machine # The compilation will use half of them max_jobs = str(ceil(multiprocessing.cpu_count() / 2)) + max_jobs = os.environ.get("AIDGE_NB_PROC", max_jobs) cwd = pathlib.Path().absolute() @@ -51,14 +52,20 @@ class AidgePkgBuild(build_ext): package_prefix = build_lib if not self.editable_mode else SETUP_DIR pybind_install_prefix = (package_prefix / PROJECT_NAME).absolute() - os.chdir(str(build_temp)) - - compile_type = os.environ.get("AIDGE_PYTHON_BUILD_TYPE", "Release") install_path = ( os.path.join(sys.prefix, "lib", "libAidge") if "AIDGE_INSTALL" not in os.environ else os.environ["AIDGE_INSTALL"] ) + + # Read environment variables for CMake options + c_compiler = os.environ.get("AIDGE_C_COMPILER", "gcc") + cxx_compiler = os.environ.get("AIDGE_CXX_COMPILER", "g++") + build_type = os.environ.get("AIDGE_BUILD_TYPE", "Release") + asan = os.environ.get("AIDGE_ASAN", "OFF") + with_cuda = os.environ.get("AIDGE_WITH_CUDA", "OFF") + cmake_arch = os.environ.get("AIDGE_CMAKE_ARCH", "") + build_gen = os.environ.get("AIDGE_BUILD_GEN", "") build_gen_opts = ( ["-G", build_gen] @@ -67,26 +74,36 @@ class AidgePkgBuild(build_ext): ) test_onoff = os.environ.get("AIDGE_BUILD_TEST", "OFF") - self.spawn( - [ - "cmake", - *build_gen_opts, - str(cwd), - f"-DTEST={test_onoff}", - f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}", - f"-DCMAKE_BUILD_TYPE={compile_type}", - "-DPYBIND=ON", - f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}", - "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", - "-DCOVERAGE=OFF", - ] - ) + os.chdir(str(build_temp)) + + cmake_cmd = [ + "cmake", + *build_gen_opts, + str(cwd), + f"-DTEST={test_onoff}", + f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}", + f"-DCMAKE_BUILD_TYPE={build_type}", + f"-DCMAKE_C_COMPILER={c_compiler}", + f"-DCMAKE_CXX_COMPILER={cxx_compiler}", + f"-DENABLE_ASAN={asan}", + f"-DCUDA={with_cuda}", + "-DPYBIND=ON", + f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}", + "-DCMAKE_EXPORT_COMPILE_COMMANDS=1", + "-DCOVERAGE=OFF", + ] + + # Append architecture-specific arguments if provided + if cmake_arch: + cmake_cmd.append(cmake_arch) + + self.spawn(cmake_cmd) if not self.dry_run: self.spawn( - ["cmake", "--build", ".", "--config", compile_type, "-j", max_jobs] + ["cmake", "--build", ".", "--config", build_type, "-j", max_jobs] ) - self.spawn(["cmake", "--install", ".", "--config", compile_type]) + self.spawn(["cmake", "--install", ".", "--config", build_type]) os.chdir(str(cwd)) -- GitLab From e5f28102bedaf39193b95a172891a782d5977330 Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Wed, 29 Jan 2025 23:48:19 +0000 Subject: [PATCH 32/60] FEAT: unit-tests/CMakeLists.txt add minimum version for Catch2 --- unit_tests/CMakeLists.txt | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt index 9d9f815..cfdbf0a 100644 --- a/unit_tests/CMakeLists.txt +++ b/unit_tests/CMakeLists.txt @@ -1,12 +1,23 @@ -Include(FetchContent) +# Catch2 configuration +set(CATCH2_MIN_VERSION 3.3.0) -FetchContent_Declare( - Catch2 - GIT_REPOSITORY https://github.com/catchorg/Catch2.git - GIT_TAG v3.0.1 # or a later release -) +# Try to find system installed Catch2 +find_package(Catch2 ${CATCH2_MIN_VERSION} QUIET) -FetchContent_MakeAvailable(Catch2) +if(NOT Catch2_FOUND) + message(STATUS "Catch2 not found in system, retrieving from git") + Include(FetchContent) + + FetchContent_Declare( + Catch2 + GIT_REPOSITORY https://github.com/catchorg/Catch2.git + GIT_TAG devel # or a later release + ) + FetchContent_MakeAvailable(Catch2) + message(STATUS "Fetched Catch2 version ${Catch2_VERSION}") +else() + message(STATUS "Using system Catch2 version ${Catch2_VERSION}") +endif() file(GLOB_RECURSE src_files "*.cpp") -- GitLab From ec304b0c321ad42c3856f9097ec407dc7e6d8877 Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Wed, 29 Jan 2025 23:50:48 +0000 Subject: [PATCH 33/60] UPD: CMakeLists.txt enforce C++14 and try to reorder sections --- CMakeLists.txt | 172 +++++++++++++++++++++++++------------------------ 1 file changed, 89 insertions(+), 83 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 80c5ae7..b3c6d45 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,15 +1,22 @@ # CMake >= 3.18 is required for good support of FindCUDAToolkit -cmake_minimum_required(VERSION 3.18) # XXX 3.18 -set(CXX_STANDARD 14) +cmake_minimum_required(VERSION 3.18) -file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version) +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +# Read project metadata file(STRINGS "${CMAKE_SOURCE_DIR}/project_name.txt" project) +message(STATUS "Project name: ${project}") +file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version) # Parse version.txt to retrieve Major, Minor and Path string(REGEX MATCH "([0-9]+)\\.([0-9]+)\\.([0-9]+)" _ MATCHES ${version}) set(PROJECT_VERSION_MAJOR ${CMAKE_MATCH_1}) set(PROJECT_VERSION_MINOR ${CMAKE_MATCH_2}) set(PROJECT_VERSION_PATCH ${CMAKE_MATCH_3}) +message(STATUS "Project version: ${version}") + # Retrieve latest git commit execute_process( @@ -19,17 +26,25 @@ execute_process( OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET ) - -message(STATUS "Project name: ${project}") -message(STATUS "Project version: ${version}") message(STATUS "Latest git commit: ${GIT_COMMIT_HASH}") -message(STATUS "Creating ${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h") project(${project} VERSION ${version} DESCRIPTION "Quantization methods for the Aidge framework." LANGUAGES CXX) + +if(NOT $ENV{AIDGE_INSTALL} STREQUAL "") + set(CMAKE_INSTALL_PREFIX $ENV{AIDGE_INSTALL}) + list(APPEND CMAKE_PREFIX_PATH $ENV{AIDGE_INSTALL}) + message(WARNING "Env var AIDGE_INSTALL detected : $ENV{AIDGE_INSTALL}. Set CMAKE_INSTALL_PREFIX to AIDGE_INSTALL & added to CMAKE_PREFIX_PATH" + "\n\tCMAKE_INSTALL_PREFIX = ${CMAKE_INSTALL_PREFIX}" + "\n\tCMAKE_PREFIX_PATH = ${CMAKE_PREFIX_PATH}") +endif() + +message(STATUS "Creating ${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h") + + # Note: Using configure_file later in the code make so that version variables are lost... # I tried to set in internal cache but it failed. # Current code is working, but there might be a scope issue. @@ -39,21 +54,12 @@ configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h" ) -# Note : project name is {project} and python module name is also {project} -set(module_name _${project}) # target name -set(pybind_module_name ${CMAKE_PROJECT_NAME}) # name of submodule for python bindings - -set(CXX_STANDARD 14) - -############################################## -# Import utils CMakeLists -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") ############################################## # Define options -option(PYBIND "python binding" ON) +option(PYBIND "python binding" OFF) option(WERROR "Warning as error" OFF) -option(TEST "Enable tests" ON) +option(TEST "Enable tests" OFF) option(COVERAGE "Enable coverage" OFF) option(CUDA "Enable CUDA backend" OFF) # XXX OFF option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memory use (over/underflow, memory leak, ...)" OFF) @@ -61,74 +67,55 @@ option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memor ############################################## # Import utils CMakeLists set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") -include(PybindModuleCreation) if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) Include(CodeCoverage) endif() +# Set variables if(CUDA) enable_language(CUDA) - message(STATUS "Cuda compiler version = ${CMAKE_CUDA_COMPILER_VERSION}") # Define a preprocessor macro with the Cuda compiler version add_definitions(-DCUDA_COMPILER_VERSION="${CMAKE_CUDA_COMPILER_VERSION}") endif() -if(NOT $ENV{AIDGE_INSTALL} STREQUAL "") - set(CMAKE_INSTALL_PREFIX $ENV{AIDGE_INSTALL}) - list(APPEND CMAKE_PREFIX_PATH $ENV{AIDGE_INSTALL}) - message(WARNING "Env var AIDGE_INSTALL detected : $ENV{AIDGE_INSTALL}. Set CMAKE_INSTALL_PREFIX to AIDGE_INSTALL & added to CMAKE_PREFIX_PATH" - "\n\tCMAKE_INSTALL_PREFIX = ${CMAKE_INSTALL_PREFIX}" - "\n\tCMAKE_PREFIX_PATH = ${CMAKE_PREFIX_PATH}") +# Source files +if(CUDA) + file(GLOB_RECURSE src_files "src/*.cpp" "src/*.cu") +else() + file(GLOB_RECURSE src_files "src/*.cpp") endif() -# ############################################## -# Find system dependencies +# Header files +file(GLOB_RECURSE inc_files "include/*.hpp") -if(CUDA) - find_package(CUDAToolkit REQUIRED) -endif() +# Note: cxx project name is {CMAKE_PROJECT_NAME} and python module name is also {CMAKE_PROJECT_NAME} +set(module_name _${CMAKE_PROJECT_NAME}) # target name +add_library(${module_name} ${src_files} ${inc_files}) +set(pybind_module_name ${CMAKE_PROJECT_NAME}) # name of submodule for python bindings -############################################## -# Find system dependencies +# Dependencies and linking find_package(aidge_core REQUIRED) find_package(aidge_backend_cpu REQUIRED) +target_link_libraries(${module_name} + PUBLIC + _aidge_core + _aidge_backend_cpu +) if(CUDA) + find_package(CUDAToolkit REQUIRED) find_package(aidge_backend_cuda REQUIRED) -endif() - -############################################## -# Create target and set properties - -if(CUDA) - file(GLOB_RECURSE src_files "src/*.cpp" "src/*.cu") - file(GLOB_RECURSE inc_files "include/*.hpp") - - add_library(${module_name} ${src_files} ${inc_files}) target_link_libraries(${module_name} PUBLIC - _aidge_core # _ is added because we link the target not the project - _aidge_backend_cpu - # _aidge_backend_cuda # XXX CUDA::cudart CUDA::cublas cudnn ) -else() - file(GLOB_RECURSE src_files "src/*.cpp") - file(GLOB_RECURSE inc_files "include/*.hpp") - - add_library(${module_name} ${src_files} ${inc_files}) - target_link_libraries(${module_name} - PUBLIC - _aidge_core # _ is added because we link the target not the project - _aidge_backend_cpu - ) endif() -#Set target properties +# Include directories target_include_directories(${module_name} PUBLIC $<INSTALL_INTERFACE:include> @@ -137,6 +124,7 @@ target_include_directories(${module_name} ${CMAKE_CURRENT_SOURCE_DIR}/src ) +# Compilation settings if(CUDA) if(NOT DEFINED CMAKE_CUDA_STANDARD) set(CMAKE_CUDA_STANDARD 14) @@ -157,23 +145,44 @@ if (PYBIND) generate_python_binding(${pybind_module_name} ${module_name}) endif() -# XXX HERE !!! -target_link_libraries(${module_name} PRIVATE fmt::fmt) target_compile_features(${module_name} PRIVATE cxx_std_14) +target_link_libraries(${module_name} PRIVATE fmt::fmt) +#################################### +# Compilation options and warnings target_compile_options(${module_name} PRIVATE + # Options for Clang, AppleClang, and GCC compilers $<$<COMPILE_LANGUAGE:CPP>:$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>: - -Wall -Wextra -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow $<$<BOOL:${WERROR}>:-Werror>>>) + -Wall # Enable all warnings + -Wextra # Enable extra warnings + -Wold-style-cast # Warn about C-style casts + -Winline # Warn if inline expansion fails + -pedantic # Enforce strict ISO C++ standards + -Werror=narrowing # Treat narrowing conversions as errors + -Wshadow # Warn about variable shadowing + $<$<BOOL:${WERROR}>:-Werror> # Optionally treat warnings as errors + >> +) + +# Additional MSVC-specific warning level +target_compile_options(${module_name} PRIVATE + $<$<CXX_COMPILER_ID:MSVC>: + /W4 # Warning level 4 (highest for MSVC) + > +) + +# CUDA-specific compile options if(CUDA) target_compile_options(${module_name} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: - -Wall>) + -Wall # Enable all warnings for CUDA + > + ) endif() -target_compile_options(${module_name} PRIVATE - $<$<CXX_COMPILER_ID:MSVC>: - /W4>) +# Coverage flags for GCC if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) + include(CodeCoverage) append_coverage_compiler_flags() endif() @@ -183,29 +192,31 @@ endif() include(GNUInstallDirs) set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/${project}) +# Install the library target install(TARGETS ${module_name} EXPORT ${project}-targets - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ) +# Install header files install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) -#Export the targets to a script - +# Export targets for other projects to use install(EXPORT ${project}-targets - FILE "${project}-targets.cmake" - DESTINATION ${INSTALL_CONFIGDIR} - COMPONENT ${module_name} + FILE "${project}-targets.cmake" + DESTINATION ${INSTALL_CONFIGDIR} + COMPONENT ${module_name} ) -if (PYBIND) +# Python binding installation +if(PYBIND) install(TARGETS ${pybind_module_name} DESTINATION ${PYBIND_INSTALL_PREFIX} ) endif() -#Create a ConfigVersion.cmake file +# Create and install CMake configuration files include(CMakePackageConfigHelpers) write_basic_package_version_file( "${CMAKE_CURRENT_BINARY_DIR}/${project}-config-version.cmake" @@ -218,15 +229,14 @@ configure_package_config_file("${project}-config.cmake.in" INSTALL_DESTINATION ${INSTALL_CONFIGDIR} ) -#Install the config, configversion and custom find modules +# Install CMake configuration files install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${project}-config.cmake" "${CMAKE_CURRENT_BINARY_DIR}/${project}-config-version.cmake" DESTINATION ${INSTALL_CONFIGDIR} ) -############################################## -## Exporting from the build tree +# Export from build tree export(EXPORT ${project}-targets FILE "${CMAKE_CURRENT_BINARY_DIR}/${project}-targets.cmake") @@ -234,10 +244,6 @@ export(EXPORT ${project}-targets ############################################## ## Add test if(TEST) - if (AIDGE_REQUIRES_PYTHON AND NOT AIDGE_PYTHON_HAS_EMBED) - message(WARNING "Skipping compilation of tests: missing Python embedded interpreter") - else() - enable_testing() - add_subdirectory(unit_tests) - endif() + enable_testing() + add_subdirectory(unit_tests) endif() -- GitLab From b4d50dfb37aab037ea18f1f86fc5b820dba39408 Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Wed, 29 Jan 2025 23:52:56 +0000 Subject: [PATCH 34/60] ADD: basic test --- unit_tests/Test_QuantPTQ.cpp | 50 +++++++++++++++++------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/unit_tests/Test_QuantPTQ.cpp b/unit_tests/Test_QuantPTQ.cpp index 36377e8..e7211ce 100644 --- a/unit_tests/Test_QuantPTQ.cpp +++ b/unit_tests/Test_QuantPTQ.cpp @@ -1,21 +1,19 @@ -// #include <catch2/catch_test_macros.hpp> - -// #include "aidge/data/Tensor.hpp" -// #include "aidge/backend/TensorImpl.hpp" -// #include "aidge/backend/cpu.hpp" -// #include "aidge/operator/Conv.hpp" -// #include "aidge/operator/Scaling.hpp" -// #include "aidge/operator/GenericOperator.hpp" -// #include "aidge/graph/GraphView.hpp" -// #include "aidge/QuantPTQ.hpp" -// #include "aidge/scheduler/Scheduler.hpp" -// #include "aidge/hook/OutputRange.hpp" -// #include "aidge/operator/Producer.hpp" - -// #include <unordered_map> - -// using namespace Aidge; -// //using namespace Aidge_HELPER; +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +TEST_CASE("[tmp] basic test") { + REQUIRE(true == true); +} // TEST_CASE("[aidge_module_template/ref_cpp/quantization] PTQ : Quantize Graph") { @@ -79,7 +77,7 @@ // std::shared_ptr<Tensor> myInput = // std::make_shared<Tensor>( -// Array4D<float,2,3,5,5> { +// Array4D<float,2,3,5,5> { // { // { // {{ 0., 1., 2., 3., 4.}, @@ -124,7 +122,7 @@ // ); // auto dataProvider = Producer(myInput, "dataProvider"); -// Tensor myOutput = Array4D<float,2,4,3,3> { +// Tensor myOutput = Array4D<float,2,4,3,3> { // { // { // {{ 15226., 15577., 15928.}, @@ -188,9 +186,9 @@ // "%f" // "\n", // max_output_conv); - + // } - + // float max_output_relu = std::static_pointer_cast<OutputRange>(myReLU1->getOperator()->getHook("output_range"))->getOutput(0); // if(verbose) { // printf("[hook] OutputRange(forward) :: ReLU output max: " @@ -222,10 +220,10 @@ // "\n", // (nodePtr->type()).c_str(), (nodePtr->name()).c_str()); // } -// } - +// } + // SequentialScheduler scheduler_v2(g1); - + // scheduler_v2.forward(); // scheduler_v2.generateScheduling(false); // std::vector<std::shared_ptr<Node>> ordered_graph_view_v2 = scheduler_v2.getStaticScheduling(); @@ -242,7 +240,7 @@ // "\n", // (nodePtr->type()).c_str(), (nodePtr->name()).c_str()); // } -// } +// } // } \ No newline at end of file -- GitLab From e01454400c872b2eb402ad5a9741f81506bac3e3 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 31 Jan 2025 15:25:10 +0000 Subject: [PATCH 35/60] enable the cuda backend --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b3c6d45..17dd74a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,7 +61,7 @@ option(PYBIND "python binding" OFF) option(WERROR "Warning as error" OFF) option(TEST "Enable tests" OFF) option(COVERAGE "Enable coverage" OFF) -option(CUDA "Enable CUDA backend" OFF) # XXX OFF +option(CUDA "Enable CUDA backend" ON) # XXX OFF option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memory use (over/underflow, memory leak, ...)" OFF) ############################################## -- GitLab From 134827717cebaa2fb6e952c356117b764b6eb06b Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 31 Jan 2025 15:26:07 +0000 Subject: [PATCH 36/60] remove unused log --- src/PTQ/CLE.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index cbfb91f..40b9e42 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -124,7 +124,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD do { - ++iteration; maxRangeDelta = 0.0; for (size_t i = 0; i < (affineNodeVector.size() - 1); i++) @@ -149,9 +148,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD } } while (maxRangeDelta > targetDelta); - - Log::notice("CLE completed after {} iterations. Final max range delta: {:.6f}", - iteration, maxRangeDelta); } } \ No newline at end of file -- GitLab From bccee6f45385093c984110635f83bc798a183cf1 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Tue, 11 Feb 2025 15:50:16 +0000 Subject: [PATCH 37/60] handle PaddedConv2Ds in the QAT and BatchNorm insertion code --- setup.py | 2 +- src/QAT/QAT_LSQ.cpp | 8 ++++++-- src/recipes/QuantRecipes.cpp | 24 ++++++++---------------- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/setup.py b/setup.py index 1bfc0ac..cde7c1e 100644 --- a/setup.py +++ b/setup.py @@ -63,7 +63,7 @@ class AidgePkgBuild(build_ext): cxx_compiler = os.environ.get("AIDGE_CXX_COMPILER", "g++") build_type = os.environ.get("AIDGE_BUILD_TYPE", "Release") asan = os.environ.get("AIDGE_ASAN", "OFF") - with_cuda = os.environ.get("AIDGE_WITH_CUDA", "OFF") + with_cuda = os.environ.get("AIDGE_WITH_CUDA", "ON") # default could be "OFF" cmake_arch = os.environ.get("AIDGE_CMAKE_ARCH", "") build_gen = os.environ.get("AIDGE_BUILD_GEN", "") diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 0508fc7..ff1c44a 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -82,12 +82,14 @@ static bool initStepSize(std::shared_ptr<Node> quantizer) static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|PaddedConv2D#|FC#)"); for (const auto& match : matches) { auto linearNode = match.graph->rootNode(); + // Log::notice(" SET INPUT QUANTIZER : {} ", linearNode->type()); + std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; @@ -135,7 +137,7 @@ static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nb static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|PaddedConv2D#|FC#)"); std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; @@ -143,6 +145,8 @@ static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nb { auto linearNode = match.graph->rootNode(); + // Log::notice(" SET PARAM QUANTIZER : {} ", linearNode->type()); + // TODO : double check this, and use createUniqueName() auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); auto quantizerNode = LSQ(signedRange, quantizerName); diff --git a/src/recipes/QuantRecipes.cpp b/src/recipes/QuantRecipes.cpp index 7f01b24..f03eb46 100644 --- a/src/recipes/QuantRecipes.cpp +++ b/src/recipes/QuantRecipes.cpp @@ -9,24 +9,13 @@ * ********************************************************************************/ -/* -#include "aidge/data/Tensor.hpp" -#include "aidge/graph/GraphView.hpp" -#include "aidge/graph/Node.hpp" -#include "aidge/scheduler/SequentialScheduler.hpp" -#include "aidge/scheduler/Scheduler.hpp" -#include "aidge/utils/Log.hpp" - -#include "aidge/operator/Producer.hpp" -#include "aidge/operator/Mul.hpp" -#include "aidge/operator/ReLU.hpp" -#include "aidge/operator/Scaling.hpp" -*/ #include "aidge/operator/Conv.hpp" #include "aidge/operator/BatchNorm.hpp" //#include "aidge/quantization/PTQ/PTQ.hpp" #include "aidge/recipes/QuantRecipes.hpp" +#include "aidge/graph/Node.hpp" + namespace Aidge { @@ -55,11 +44,13 @@ void insertBatchNormNodes(std::shared_ptr<GraphView> graphView) { for (std::shared_ptr<Node> parentNode : graphView->getNodes()) { - if (parentNode->type() == "Conv2D") + // TODO : use graph matching + + if (parentNode->type() == "Conv2D" || parentNode->type() == "PaddedConv2D") { - std::shared_ptr<Conv_Op<2>> convOperator = std::static_pointer_cast<Conv_Op<2>> (parentNode->getOperator()); + std::shared_ptr<OperatorTensor> convOperator = std::static_pointer_cast<OperatorTensor> (parentNode->getOperator()); int nbChannels = convOperator->getInput(1)->dims()[0]; - Log::info(" NB CHANNELS = {} ", nbChannels); + Log::notice(" NB CHANNELS = {} ", nbChannels); std::string batchnormNodeName = makeUniqueName(parentNode->name() + "_BN", graphView); std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nbChannels, 1e-5, 0.1, false, batchnormNodeName); @@ -118,6 +109,7 @@ std::string makeUniqueName(std::string baseName, std::shared_ptr<GraphView> grap return newName; } + void sanitizeNodeNames(std::shared_ptr<GraphView> graphView) { for (std::shared_ptr<Node> node : graphView->getNodes()) -- GitLab From 16c8b22ba3319d1c0300fd590f91077c173e653a Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 14 Feb 2025 13:28:57 +0000 Subject: [PATCH 38/60] minor changes --- include/aidge/quantization/PTQ/PTQ.hpp | 8 ++++---- python_binding/pybind_PTQ.cpp | 4 ++-- src/QAT/QAT_LSQ.cpp | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index 4fc38bc..bfe671e 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -124,11 +124,11 @@ namespace Aidge { * @brief Quantize an already normalized (in term of parameters and activations) network. * @param graphView The GraphView to be quantized. * @param nbBits The desired number of bits of the quantization. - * @param applyRounding Whether to apply the rounding operations or not. + * @param noQuant Whether to apply the rounding operations or not. * @param optimizeSigns Whether to take account of the IO signs of the operators or not. * @param verbose Whether to print the sign map or not. */ - void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, bool applyRounding, bool optimizeSigns, bool verbose); + void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, bool noQuant, bool optimizeSigns, bool verbose); /** * @brief Main quantization routine. Performs every step of the quantization pipeline. @@ -136,12 +136,12 @@ namespace Aidge { * @param nbBits The desired number of bits of the quantization. * @param inputDataSet The input dataset on which the value ranges are computed. * @param clippingMode: Type of the clipping optimization. Can be either 'MAX', 'MSE', 'AA' or 'KL'. - * @param applyRounding Whether to apply the rounding operations or not. + * @param noQuant Whether to apply the rounding operations or not. * @param optimizeSigns Whether to take account of the IO signs of the operators or not. * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights. * @param verbose Whether to print internal informations about the quantization process. */ - void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool applyRounding, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose); + void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose); /** * @brief Compute the weight ranges of every affine node. Provided for debugging purposes. diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index b5193bd..1de7976 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -78,7 +78,7 @@ void init_PTQ(py::module &m) { :type value_ranges: list of float. )mydelimiter"); - m.def("quantize_normalized_network", &quantizeNormalizedNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("no_quant")=false, py::arg("optimize_signs"), py::arg("verbose") = false, + m.def("quantize_normalized_network", &quantizeNormalizedNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("no_quantization")=false, py::arg("optimize_signs"), py::arg("verbose") = false, R"mydelimiter( Quantize an already normalized (in term of parameters and activations) network. :param network: The GraphView to be quantized. @@ -93,7 +93,7 @@ void init_PTQ(py::module &m) { :type verbose: bool )mydelimiter"); - m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("clipping_mode") = Clipping::MAX , py::arg("no_quantization") = true, py::arg("optimize_signs") = false, py::arg("single_shift") = false, py::arg("use_cuda") = false, py::arg("verbose") = false, + m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("clipping_mode") = Clipping::MAX , py::arg("no_quantization") = false, py::arg("optimize_signs") = false, py::arg("single_shift") = false, py::arg("use_cuda") = false, py::arg("verbose") = false, R"mydelimiter( Main quantization routine. Performs every step of the quantization pipeline. :param network: The GraphView to be quantized. diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index ff1c44a..da09d62 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -75,7 +75,7 @@ static bool initStepSize(std::shared_ptr<Node> quantizer) stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); - Log::info(" [ INIT STEP SIZE = {} ] ", stepSize); + Log::notice(" [ INIT STEP SIZE = {} ] ", stepSize); return false; } -- GitLab From 22e47ad9fb629f85ed4f1c5fa981c1d195c0201b Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 14 Feb 2025 13:43:58 +0000 Subject: [PATCH 39/60] use the scalar tensor constructor --- src/PTQ/CLE.cpp | 2 +- src/PTQ/PTQ.cpp | 4 ++-- src/QAT/QAT_LSQ.cpp | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 40b9e42..28858d0 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -49,7 +49,7 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) mulOp.setDataType(tensor->dataType()); mulOp.setBackend(tensor->backend()); - std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling}); + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(scaling); scalingTensor->setDataType(tensor->dataType()); scalingTensor->setBackend(tensor->backend()); diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 09b039f..7c29ee0 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -78,7 +78,7 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) mulOp.setDataType(tensor->dataType()); mulOp.setBackend(tensor->backend()); - std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling}); + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(scaling); scalingTensor->setDataType(tensor->dataType()); scalingTensor->setBackend(tensor->backend()); @@ -932,7 +932,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u // Add the coeff producer to the multiplier node std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); - std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax}); + std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(signedMax); coeffProducer->getOperator()->setOutput(0, coeffTensor); coeffProducer->getOperator()->setDataType(DataType::Float64); diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index da09d62..6eae077 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -65,7 +65,8 @@ static bool initStepSize(std::shared_ptr<Node> quantizer) float inputStd = getTensorStd(quantizerOp->getInput(0)); float stepSize = 8.0f * (inputStd / (quantizerOp->range().second)); - auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + // TODO : use the scalar constructor + auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); // XXX Manage backend here ? stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend()); -- GitLab From 4260a27622bfd2a41dfd420614520b1de288a46c Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 8 Jan 2025 10:32:06 +0000 Subject: [PATCH 40/60] improve tensor manipulation routines + enhance insertCompensationNodes --- src/PTQ/CLE.cpp | 71 +++++++++++++++++------ src/PTQ/PTQ.cpp | 137 ++++++++++++++++++++++++++------------------ src/QAT/QAT_LSQ.cpp | 9 +-- 3 files changed, 137 insertions(+), 80 deletions(-) diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 5265d9c..63d3b45 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -24,6 +24,12 @@ #include "aidge/operator/OperatorTensor.hpp" #include "aidge/utils/Log.hpp" +#include "aidge/operator/Mul.hpp" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/operator/Abs.hpp" +#include "aidge/operator/Reshape.hpp" +#include "aidge/operator/Round.hpp" + namespace Aidge { @@ -39,27 +45,58 @@ static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node) static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) { - // Get the tensor data pointer - double * castedTensor = static_cast<double *> (tensor->getImpl()->rawPtr()); - - // Rescale the tensor - for(std::size_t i = 0; i < tensor->size(); i++) - castedTensor[i] *= scaling; + auto mulOp = Mul_Op(); + mulOp.setDataType(tensor->dataType()); + mulOp.setBackend(tensor->backend()); + + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); + scalingTensor->setDataType(tensor->dataType()); + scalingTensor->setBackend(tensor->backend()); + + mulOp.associateInput(0, tensor); + mulOp.associateInput(1, scalingTensor); + + mulOp.forward(); + + auto outTensor = mulOp.getOutput(0); + *tensor = *outTensor; + //tensor->copyCast(*outTensor); } +// TODO : make the retreival of argmax values backend independant (refCastFrom) static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) { - // Get the tensor data pointer and edit it - double * castedTensor = static_cast<double*> (tensor->getImpl()->rawPtr()); - - // Get the tensor absolute max value - double maxValue = 0.0; - for(std::size_t i = 0; i < tensor->size(); ++i) { - if(std::fabs(castedTensor[i]) > maxValue) { - maxValue = std::fabs(castedTensor[i]); - } - } - return maxValue; + // get the abs tensor + + std::shared_ptr<Tensor> absTensor = std::make_shared<Tensor>(tensor->abs()); + + // flatten the abs tensor + + std::int64_t nbElement = tensor->size(); + + auto reshapeOp = Reshape_Op({nbElement}); + reshapeOp.setDataType(tensor->dataType()); + reshapeOp.setBackend(tensor->backend()); + + reshapeOp.associateInput(0, absTensor); + reshapeOp.forward(); + std::shared_ptr<Tensor> flatTensor = reshapeOp.getOutput(0); + + // Get the argmax + + auto argmaxOp = ArgMax_Op(0, true, false); + argmaxOp.setDataType(tensor->dataType()); + argmaxOp.setBackend(tensor->backend()); + + argmaxOp.associateInput(0, flatTensor); + argmaxOp.forward(); + std::shared_ptr<Tensor> argmaxTensor = argmaxOp.getOutput(0); + + // Return the max + + int maxIndex = std::round(argmaxTensor->get<double>(0)); + + return flatTensor->get<double>(maxIndex); } void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 0e26313..6e0b29e 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -28,6 +28,12 @@ #include "aidge/operator/BatchNorm.hpp" #include "aidge/operator/Conv.hpp" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/operator/Abs.hpp" +#include "aidge/operator/Reshape.hpp" +#include "aidge/operator/Round.hpp" + + #include "aidge/recipes/Recipes.hpp" #include "aidge/recipes/QuantRecipes.hpp" @@ -66,51 +72,75 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } -static void fillTensor(std::shared_ptr<Tensor> tensor, double value) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) { - // Get the tensor data pointer - double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); + auto mulOp = Mul_Op(); + mulOp.setDataType(tensor->dataType()); + mulOp.setBackend(tensor->backend()); - // Fill the tensor - for(std::size_t i = 0; i < tensor->size(); i++) - castedTensor[i] = value; -} + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); + scalingTensor->setDataType(tensor->dataType()); + scalingTensor->setBackend(tensor->backend()); -static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) -{ - // Get the tensor data pointer - double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); + mulOp.associateInput(0, tensor); + mulOp.associateInput(1, scalingTensor); - // Rescale the tensor - for(std::size_t i = 0; i < tensor->size(); i++) - castedTensor[i] *= scaling; + mulOp.forward(); + + auto outTensor = mulOp.getOutput(0); + *tensor = *outTensor; } static void roundTensor(std::shared_ptr<Tensor> tensor) { - // Get the tensor data pointer - double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); + auto roundOp = Round_Op(); + roundOp.setDataType(tensor->dataType()); + roundOp.setBackend(tensor->backend()); - // Rescale the tensor - for(std::size_t i = 0; i < tensor->size(); i++) - castedTensor[i] = std::nearbyint(castedTensor[i]);//Round + roundOp.associateInput(0, tensor); + roundOp.forward(); + + auto outTensor = roundOp.getOutput(0); + *tensor = *outTensor; } -static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +// TODO : make the retreival of argmax values backend independant (refCastFrom) +static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) { - // Get the tensor data pointer and edit it - double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr()); - - // Get the tensor absolute max value - double maxValue = 0.0f; - for(std::size_t i = 0; i < tensor->size(); ++i) { - if(std::fabs(castedTensor[i]) > maxValue) { - maxValue = std::fabs(castedTensor[i]); - } - } - return maxValue; + // get the abs tensor + + std::shared_ptr<Tensor> absTensor = std::make_shared<Tensor>(tensor->abs()); + + // flatten the abs tensor + + std::int64_t nbElement = tensor->size(); + + auto reshapeOp = Reshape_Op({nbElement}); + reshapeOp.setDataType(tensor->dataType()); + reshapeOp.setBackend(tensor->backend()); + + reshapeOp.associateInput(0, absTensor); + reshapeOp.forward(); + std::shared_ptr<Tensor> flatTensor = reshapeOp.getOutput(0); + + // Get the argmax + + auto argmaxOp = ArgMax_Op(0, true, false); + argmaxOp.setDataType(tensor->dataType()); + argmaxOp.setBackend(tensor->backend()); + + argmaxOp.associateInput(0, flatTensor); + argmaxOp.forward(); + std::shared_ptr<Tensor> argmaxTensor = argmaxOp.getOutput(0); + + // Return the max + + int maxIndex = std::round(argmaxTensor->get<double>(0)); + + return flatTensor->get<double>(maxIndex); } + // TODO : pass nodeVector by reference ... static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::shared_ptr<Node>> nodeVector, std::string nodeType) { @@ -876,50 +906,42 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u for (std::shared_ptr<Node> node : nodeVector) { - // A merging node is always followed by a scaling node at this point ... + // A merging node is always followed by a Quantizer node at this point if (node->type() == "Quantizer") { + // check if the Quantizer is a residual one, and insert a compensation node if so ... + bool prevNodeIsForking = ((node->getParent(0))->getChildren().size() > 1); bool prevNodeIsAffine = isAffine(node->getParent(0)); bool insertNode = prevNodeIsForking || !prevNodeIsAffine; if (insertNode) { - // create and insert the multplicative node + // create and insert the multplicative node before the Quantizer std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); std::shared_ptr<Node> mulNode = Mul(mulNodeName); - mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) mulNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, mulNode, 0, 0, 0); - // create and insert the producer node - - std::shared_ptr<Tensor> inputTensor = std::static_pointer_cast<Tensor> (mulNode->getOperator()->getRawInput(0)); - std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(); + // Add the coeff producer to the multiplier node - coeffTensor->setDataType(DataType::Float64); // getDataType(parentNode) - coeffTensor->setBackend("cpu"); + std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); + std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax}); + coeffProducer->getOperator()->setOutput(0, coeffTensor); - coeffTensor->resize(inputTensor->dims()); - fillTensor(coeffTensor, 1); + coeffProducer->getOperator()->setDataType(DataType::Float64); + coeffProducer->getOperator()->setBackend("cpu"); - std::shared_ptr<Node> producerNode = Producer(coeffTensor, makeUniqueName("coeff", graphView)); - producerNode->addChild(mulNode); - graphView->add(producerNode); + graphView->add(coeffProducer); // needed ? - // rescale the coeffs and edit scaling factor + // Adapt the scaling factor value accordingly - fillTensor(coeffTensor, signedMax); - - double currScalingFactor = getScalingFactor(node); // XXX bad naming ! + double currScalingFactor = getScalingFactor(node); updateScalingFactor(node, currScalingFactor / signedMax); - - // TODO : double check this !!! - //std::cout << getTensorAbsoluteMax(coeffTensor) << std::endl; } } } @@ -931,7 +953,8 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool for (std::shared_ptr<Node> node : nodeVector) { - // Use A meatoperator of type Scaling of MulCompensation instead + // TODO : use Compensation nodes instead of Mul nodes + if (isAffine(node) || (node->type() == "Mul")) { std::shared_ptr<Node> scalingNode = (*node->getChildren().begin()); @@ -940,7 +963,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool double approx = std::pow(2, std::ceil(std::log2(base))); - updateScalingFactor(scalingNode,approx); + updateScalingFactor(scalingNode, approx); double ratio = base / approx; @@ -954,7 +977,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); rescaleTensor(biasTensor, ratio); if (!noQuant) - roundTensor(biasTensor); + roundTensor(biasTensor); } } } @@ -1058,8 +1081,8 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (useCuda) graphView->setBackend("cuda"); - //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; - //printScalingFactors(graphView); + std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; + printScalingFactors(graphView); Log::info(" Reseting the scheduler ..."); SequentialScheduler scheduler(graphView); diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 9b51e84..a09dbb2 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -89,19 +89,16 @@ void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbB static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) { auto backend = tensor->backend(); + if (backend == "cuda") tensor->setBackend("cpu"); - float acc = 0; - float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr()); - for(std::size_t i = 0; i < tensor->size(); i++) - acc += std::abs(castedTensor[i]); - acc /= static_cast<float> (tensor->size()); + float value = (*tensor).abs().mean().get<float>(0); if (backend == "cuda") tensor->setBackend("cuda"); - return acc; + return value; } static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda) -- GitLab From 8a91f5210a0dc0be26a0491bcd39420bf2d9f1fe Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 8 Jan 2025 10:37:27 +0000 Subject: [PATCH 41/60] comment verbose --- src/PTQ/PTQ.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 6e0b29e..7f750f0 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -1081,8 +1081,8 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (useCuda) graphView->setBackend("cuda"); - std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; - printScalingFactors(graphView); + //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; + //printScalingFactors(graphView); Log::info(" Reseting the scheduler ..."); SequentialScheduler scheduler(graphView); -- GitLab From f1323476ae0d66a18efb299aeb04a398d09515c7 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 8 Jan 2025 16:27:21 +0000 Subject: [PATCH 42/60] minor change --- src/PTQ/PTQ.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 7f750f0..3677ae0 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -215,6 +215,8 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView) { removeFlatten(graphView); + sanitizeNodeNames(graphView); + bool containsBatchNorm = false; std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); @@ -1078,6 +1080,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, //printScalingFactors(graphView); setupDataType(graphView, inputDataSet, initialDataType); + if (useCuda) graphView->setBackend("cuda"); -- GitLab From 878bb4cfda4bacffb963828ebbad2456e3a702cc Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 13 Jan 2025 13:01:34 +0000 Subject: [PATCH 43/60] rework the LSQ code --- include/aidge/quantization/QAT/QAT_LSQ.hpp | 18 +- python_binding/pybind_QAT_LSQ.cpp | 5 +- src/QAT/QAT_LSQ.cpp | 204 +++++++-------------- 3 files changed, 77 insertions(+), 150 deletions(-) diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp index a44c71b..9827ee2 100644 --- a/include/aidge/quantization/QAT/QAT_LSQ.hpp +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -22,22 +22,14 @@ namespace Aidge { namespace QuantLSQ { /** - * @brief Insert the LSQ quantizer nodes in a given GraphView - * @param graphView The GraphView containing the graph to quantize. + * @brief Given a GraphView with parameters properly initialized, insert + * the LSQ quantizer nodes, and setup the adjustment their step-sizes. + * @param graphView The GraphView containing the network to quantize. * @param nbBits Number of quantization bits. - * @param span Fixed output span of the quantizers. */ -void insertQuantizers(std::shared_ptr<GraphView> graphView, std::size_t nbBits, float step_size); +void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); -/** - * @brief Given a GraphView with parameters properly initialized and some calibration data, - * insert the LSQ quantizer nodes, and adjust their step-sizes. - * @param graphView The GraphView containing the graph to quantize. - * @param nbBits Number of quantization bits. - * @param calibrationData Calibration data used to adjust the spans. - * @param scale Multiplicative constant applied to the spans. - */ -void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, std::size_t nbBits, std::shared_ptr<Tensor> calibrationData); +void devLSQ(std::shared_ptr<Tensor> tensor); } // namespace QuantLSQ } // namespace Aidge diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp index 206985e..0b9fcc2 100644 --- a/python_binding/pybind_QAT_LSQ.cpp +++ b/python_binding/pybind_QAT_LSQ.cpp @@ -23,8 +23,9 @@ void init_QAT_LSQ(py::module &m) { auto mQuantLSQ = m.def_submodule("lsq"); - mQuantLSQ.def("insert_quantizers", &QuantLSQ::insertQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("step_size")); + mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits")); + + mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor")); - mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data")); } } // namespace Aidge diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index a09dbb2..04f2027 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -23,7 +23,42 @@ namespace Aidge { -void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize) +static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) +{ + auto valueTensor = (*tensor).abs().mean(); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu"); + return localTensor.get<float>(0); +} + +// INIT THE STEP SIZE OF A QUANTIZER NODE + +static bool initStepSize(std::shared_ptr<Node> quantizer) +{ + const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator()); + + float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0)); + + float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second)); + + auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + + // XXX Manage backend here ? + stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend()); + stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType()); + + auto stepSizeProducer = quantizer->getParent(1); + + stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); + + std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl; + + return false; +} + +// INPUT QUANTIZERS INSERTION + +static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); @@ -34,177 +69,76 @@ void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbB std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; - // INPUT QUANTIZERS INSERTION + // Create the input quantizer node - // TODO : double check this, and use createUniqueName() - auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); - auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName); + auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); + auto quantizerNode = LSQ(signedRange, quantizerName); - // Set the step size + // Init the step-size using the node call stack - auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator(); - auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - inputStepSizeOp->setOutput(0, inputStepSizeTensor); + quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); // Absorb the ReLU when possible ... - // XXX is this safe ??? - bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); - // bool nodeHasParent = (linearNode->getParents().size() != 0); + bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ? if (nodeHasParent) { auto parentNode = linearNode->getParents()[0]; if (parentNode->type() == "ReLU") { - auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator()); - inputQuantizerOp->range() = unsignedRange; + auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator()); + quantizerOp->range() = unsignedRange; graphView->replace({parentNode}, {}); } } - // We need to handle the case where the linear node is the first one ... + // Insert the quantizer in the graphView ... + // (We need to handle the case where the linear node is the first one) if (nodeHasParent) { - graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0); + graphView->insertParent(linearNode, quantizerNode, 0, 0, 0); } else { - inputQuantizerNode->addChild(graphView); - graphView->add(inputQuantizerNode); + quantizerNode->addChild(graphView); + graphView->add(quantizerNode); } - - // PARAM QUANTIZERS INSERTION - - // TODO : double check this, and use createUniqueName() - auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); - auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); - graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0); - - // Set the step size - - auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator(); - auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - paramStepSizeOp->setOutput(0, paramStepSizeTensor); } - } -static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) -{ - auto backend = tensor->backend(); - - if (backend == "cuda") - tensor->setBackend("cpu"); - - float value = (*tensor).abs().mean().get<float>(0); - - if (backend == "cuda") - tensor->setBackend("cuda"); - - return value; -} +// PARAM QUANTIZERS INSERTION -static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda) +static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { - // Propagate the calibration tensor + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - SequentialScheduler scheduler(graphView); - scheduler.resetScheduling(); - scheduler.forward(true, {calibrationData}); + std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; - // Store the input tensor statistics + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); - if (useCuda) - graphView->setBackend("cpu"); + // TODO : double check this, and use createUniqueName() + auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); + auto quantizerNode = LSQ(signedRange, quantizerName); - std::map<std::string, float> inputStats; - for (auto node : graphView->getNodes()) - { - if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! - { - const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); - float inputAbsMean = getTensorAbsMean(op->getInput(0)); - inputStats.insert(std::make_pair(node->name(), inputAbsMean)); - fmt::println("{} -> {}", node->name(), inputAbsMean); - } - } + // Init the step-size using the node call stack - if (useCuda) - graphView->setBackend("cuda"); + quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); - return inputStats; -} + // Insert the quantizer in the graphView -static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda) -{ - if (useCuda) - graphView->setBackend("cpu"); - - std::map<std::string, float> paramStats; - for (auto node : graphView->getNodes()) - { - if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! - { - const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); - float paramAbsMean = getTensorAbsMean(op->getInput(1)); - paramStats.insert(std::make_pair(node->name(), paramAbsMean)); - fmt::println("{} -> {}", node->name(), paramAbsMean); - } + graphView->insertParent(linearNode, quantizerNode, 1, 0, 0); } - - if (useCuda) - graphView->setBackend("cuda"); - - return paramStats; } -static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats) +void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - - for (const auto& match : matches) - { - auto linearNode = match.graph->rootNode(); - - // INPUT QUANTIZERS STEP-SIZES - - auto inputQuantNode = linearNode->getParent(0); - auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator()); - - float absMean = inputStats[linearNode->name()]; - float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second)); - - auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator(); - // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); - auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - inputStepSizeOp->setOutput(0, inputStepSizeTensor); - - // PARAM QUANTIZERS STEP-SIZES - - auto paramQuantNode = linearNode->getParent(1); - auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator()); - - absMean = paramStats[linearNode->name()]; - stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second)); - - auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator(); - // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); - auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - paramStepSizeOp->setOutput(0, paramStepSizeTensor); - } + setupInputQuantizers(graphView, nbBits); + setupParamQuantizers(graphView, nbBits); } -void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData) +void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) { - bool useCuda = (calibrationData->backend() == "cuda"); - - // Collect the tensor statisics - auto inputStats = collectInputStats(graphView, calibrationData, useCuda); - - auto paramStats = collectParamStats(graphView, useCuda); - - // Insert the quantizers - insertQuantizers(graphView, nbBits, 1.0); - - // Adjust the quantizers step-sizes - adjustQuantizersStepSizes(graphView, inputStats, paramStats); + float mean = (tensor->mean()).get<float> (0); + std::cout << " MEAN = " << mean << std::endl; } } \ No newline at end of file -- GitLab From 712bdd8a6fe9699e5f8abf4312a3d2cfff081ce2 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Tue, 21 Jan 2025 12:28:19 +0000 Subject: [PATCH 44/60] rework the ReLU handling --- src/QAT/QAT_LSQ.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 04f2027..f9ce554 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -82,13 +82,19 @@ static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nb bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ? - if (nodeHasParent) { - auto parentNode = linearNode->getParents()[0]; - if (parentNode->type() == "ReLU") { + if (nodeHasParent) + { + bool allParentsAreReLU = true; + for (auto parentNode : linearNode->getParents()) + if (parentNode->type() != "ReLU") + allParentsAreReLU = false; + + if (allParentsAreReLU) { auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator()); quantizerOp->range() = unsignedRange; - graphView->replace({parentNode}, {}); } + + // TODO : remove the ReLUs when possible } // Insert the quantizer in the graphView ... -- GitLab From 352026d34e883b39e91b622865cbec36c6df48d5 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Tue, 21 Jan 2025 15:14:32 +0000 Subject: [PATCH 45/60] revert changes for debug --- include/aidge/quantization/QAT/QAT_LSQ.hpp | 6 +- python_binding/pybind_QAT_LSQ.cpp | 4 +- src/QAT/QAT_LSQ.cpp | 199 ++++++++++++++++++++- 3 files changed, 204 insertions(+), 5 deletions(-) diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp index 9827ee2..4dc7048 100644 --- a/include/aidge/quantization/QAT/QAT_LSQ.hpp +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -27,9 +27,11 @@ namespace QuantLSQ { * @param graphView The GraphView containing the network to quantize. * @param nbBits Number of quantization bits. */ -void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); -void devLSQ(std::shared_ptr<Tensor> tensor); +//void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); +//void devLSQ(std::shared_ptr<Tensor> tensor); + +void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData); } // namespace QuantLSQ } // namespace Aidge diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp index 0b9fcc2..cb5b7f0 100644 --- a/python_binding/pybind_QAT_LSQ.cpp +++ b/python_binding/pybind_QAT_LSQ.cpp @@ -23,9 +23,11 @@ void init_QAT_LSQ(py::module &m) { auto mQuantLSQ = m.def_submodule("lsq"); +/* mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits")); - mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor")); +*/ + mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data")); } } // namespace Aidge diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index f9ce554..e52bafb 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -21,6 +21,201 @@ #include "aidge/graph/Matching.hpp" #include "aidge/recipes/QuantRecipes.hpp" + +namespace Aidge { + +static void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize) +{ + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; + std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; + + // INPUT QUANTIZERS INSERTION + + // TODO : double check this, and use createUniqueName() + auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); + auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName); + + // Set the step size + + auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator(); + auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + inputStepSizeOp->setOutput(0, inputStepSizeTensor); + + // Absorb the ReLU when possible ... + + // XXX is this safe ??? + bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); + // bool nodeHasParent = (linearNode->getParents().size() != 0); + + if (nodeHasParent) { + auto parentNode = linearNode->getParents()[0]; + if (parentNode->type() == "ReLU") { + auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator()); + inputQuantizerOp->range() = unsignedRange; + graphView->replace({parentNode}, {}); + } + } + + // We need to handle the case where the linear node is the first one ... + + if (nodeHasParent) { + graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0); + } else { + inputQuantizerNode->addChild(graphView); + graphView->add(inputQuantizerNode); + } + + // PARAM QUANTIZERS INSERTION + + // TODO : double check this, and use createUniqueName() + auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); + auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); + graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0); + + // Set the step size + + auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator(); + auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + paramStepSizeOp->setOutput(0, paramStepSizeTensor); + } + +} + +static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) +{ + auto backend = tensor->backend(); + if (backend == "cuda") + tensor->setBackend("cpu"); + + float acc = 0; + float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr()); + for(std::size_t i = 0; i < tensor->size(); i++) + acc += std::abs(castedTensor[i]); + acc /= static_cast<float> (tensor->size()); + + if (backend == "cuda") + tensor->setBackend("cuda"); + + return acc; +} + +static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda) +{ + // Propagate the calibration tensor + + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + scheduler.forward(true, {calibrationData}); + + // Store the input tensor statistics + + if (useCuda) + graphView->setBackend("cpu"); + + std::map<std::string, float> inputStats; + for (auto node : graphView->getNodes()) + { + if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! + { + const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); + float inputAbsMean = getTensorAbsMean(op->getInput(0)); + inputStats.insert(std::make_pair(node->name(), inputAbsMean)); + std::cout << node->name() << " -> " << inputAbsMean << std::endl; + } + } + + if (useCuda) + graphView->setBackend("cuda"); + + return inputStats; +} + +static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda) +{ + if (useCuda) + graphView->setBackend("cpu"); + + std::map<std::string, float> paramStats; + for (auto node : graphView->getNodes()) + { + if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! + { + const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); + float paramAbsMean = getTensorAbsMean(op->getInput(1)); + paramStats.insert(std::make_pair(node->name(), paramAbsMean)); + std::cout << node->name() << " -> " << paramAbsMean << std::endl; + } + } + + if (useCuda) + graphView->setBackend("cuda"); + + return paramStats; +} + +static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats) +{ + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + // INPUT QUANTIZERS STEP-SIZES + + auto inputQuantNode = linearNode->getParent(0); + auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator()); + + float absMean = inputStats[linearNode->name()]; + float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second)); + + auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator(); + // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); + auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + inputStepSizeOp->setOutput(0, inputStepSizeTensor); + + // PARAM QUANTIZERS STEP-SIZES + + auto paramQuantNode = linearNode->getParent(1); + auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator()); + + absMean = paramStats[linearNode->name()]; + stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second)); + + auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator(); + // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); + auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + paramStepSizeOp->setOutput(0, paramStepSizeTensor); + } +} + +void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData) +{ + bool useCuda = (calibrationData->backend() == "cuda"); + + // Collect the tensor statisics + auto inputStats = collectInputStats(graphView, calibrationData, useCuda); + + auto paramStats = collectParamStats(graphView, useCuda); + + // Insert the quantizers + insertQuantizers(graphView, nbBits, 1.0); + + // Adjust the quantizers step-sizes + adjustQuantizersStepSizes(graphView, inputStats, paramStats); +} +} + + +/* + XXX XXX XXX + namespace Aidge { static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) @@ -146,5 +341,5 @@ void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) float mean = (tensor->mean()).get<float> (0); std::cout << " MEAN = " << mean << std::endl; } - -} \ No newline at end of file +} +*/ \ No newline at end of file -- GitLab From e6d14185f40985e5f79ecfcbd06bd81cc0c8255e Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 22 Jan 2025 12:47:59 +0000 Subject: [PATCH 46/60] re-apply the LSQ changes --- include/aidge/quantization/QAT/QAT_LSQ.hpp | 6 +- python_binding/pybind_QAT_LSQ.cpp | 6 +- src/QAT/QAT_LSQ.cpp | 258 ++++++++++----------- 3 files changed, 133 insertions(+), 137 deletions(-) diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp index 4dc7048..68ce8e7 100644 --- a/include/aidge/quantization/QAT/QAT_LSQ.hpp +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -28,10 +28,8 @@ namespace QuantLSQ { * @param nbBits Number of quantization bits. */ -//void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); -//void devLSQ(std::shared_ptr<Tensor> tensor); - -void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData); +void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); +void devLSQ(std::shared_ptr<Tensor> tensor); } // namespace QuantLSQ } // namespace Aidge diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp index cb5b7f0..0dd4267 100644 --- a/python_binding/pybind_QAT_LSQ.cpp +++ b/python_binding/pybind_QAT_LSQ.cpp @@ -23,11 +23,11 @@ void init_QAT_LSQ(py::module &m) { auto mQuantLSQ = m.def_submodule("lsq"); -/* + mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits")); mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor")); -*/ - mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data")); + + //mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data")); } } // namespace Aidge diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index e52bafb..66e8ec7 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -21,6 +21,134 @@ #include "aidge/graph/Matching.hpp" #include "aidge/recipes/QuantRecipes.hpp" +namespace Aidge { + +static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) +{ + auto valueTensor = (*tensor).abs().mean(); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu"); + return localTensor.get<float>(0); +} + +// INIT THE STEP SIZE OF A QUANTIZER NODE + +static bool initStepSize(std::shared_ptr<Node> quantizer) +{ + const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator()); + + float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0)); + + float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second)); + + auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + + // XXX Manage backend here ? + stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend()); + stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType()); + + auto stepSizeProducer = quantizer->getParent(1); + + stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); + + std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl; + + return false; +} + +// INPUT QUANTIZERS INSERTION + +static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) +{ + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; + std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; + + // Create the input quantizer node + + auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); + auto quantizerNode = LSQ(signedRange, quantizerName); + + // Init the step-size using the node call stack + + quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); + + // Absorb the ReLU when possible ... + + bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ? + + if (nodeHasParent) + { + bool allParentsAreReLU = true; + for (auto parentNode : linearNode->getParents()) + if (parentNode->type() != "ReLU") + allParentsAreReLU = false; + + if (allParentsAreReLU) { + auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator()); + quantizerOp->range() = unsignedRange; + } + + // TODO : remove the ReLUs when possible + } + + // Insert the quantizer in the graphView ... + // (We need to handle the case where the linear node is the first one) + + if (nodeHasParent) { + graphView->insertParent(linearNode, quantizerNode, 0, 0, 0); + } else { + quantizerNode->addChild(graphView); + graphView->add(quantizerNode); + } + } +} + +// PARAM QUANTIZERS INSERTION + +static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) +{ + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); + + std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + // TODO : double check this, and use createUniqueName() + auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); + auto quantizerNode = LSQ(signedRange, quantizerName); + + // Init the step-size using the node call stack + + quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); + + // Insert the quantizer in the graphView + + graphView->insertParent(linearNode, quantizerNode, 1, 0, 0); + } +} + +void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) +{ + setupInputQuantizers(graphView, nbBits); + setupParamQuantizers(graphView, nbBits); +} + +void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) +{ + float mean = (tensor->mean()).get<float> (0); + std::cout << " MEAN = " << mean << std::endl; +} +} + +/* namespace Aidge { @@ -212,134 +340,4 @@ void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, siz } } - -/* - XXX XXX XXX - -namespace Aidge { - -static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) -{ - auto valueTensor = (*tensor).abs().mean(); - std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu"); - return localTensor.get<float>(0); -} - -// INIT THE STEP SIZE OF A QUANTIZER NODE - -static bool initStepSize(std::shared_ptr<Node> quantizer) -{ - const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator()); - - float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0)); - - float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second)); - - auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - - // XXX Manage backend here ? - stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend()); - stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType()); - - auto stepSizeProducer = quantizer->getParent(1); - - stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); - - std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl; - - return false; -} - -// INPUT QUANTIZERS INSERTION - -static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) -{ - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - - for (const auto& match : matches) - { - auto linearNode = match.graph->rootNode(); - - std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; - std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; - - // Create the input quantizer node - - auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); - auto quantizerNode = LSQ(signedRange, quantizerName); - - // Init the step-size using the node call stack - - quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); - - // Absorb the ReLU when possible ... - - bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ? - - if (nodeHasParent) - { - bool allParentsAreReLU = true; - for (auto parentNode : linearNode->getParents()) - if (parentNode->type() != "ReLU") - allParentsAreReLU = false; - - if (allParentsAreReLU) { - auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator()); - quantizerOp->range() = unsignedRange; - } - - // TODO : remove the ReLUs when possible - } - - // Insert the quantizer in the graphView ... - // (We need to handle the case where the linear node is the first one) - - if (nodeHasParent) { - graphView->insertParent(linearNode, quantizerNode, 0, 0, 0); - } else { - quantizerNode->addChild(graphView); - graphView->add(quantizerNode); - } - } -} - -// PARAM QUANTIZERS INSERTION - -static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) -{ - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - - std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; - - for (const auto& match : matches) - { - auto linearNode = match.graph->rootNode(); - - // TODO : double check this, and use createUniqueName() - auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); - auto quantizerNode = LSQ(signedRange, quantizerName); - - // Init the step-size using the node call stack - - quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); - - // Insert the quantizer in the graphView - - graphView->insertParent(linearNode, quantizerNode, 1, 0, 0); - } -} - -void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) -{ - setupInputQuantizers(graphView, nbBits); - setupParamQuantizers(graphView, nbBits); -} - -void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) -{ - float mean = (tensor->mean()).get<float> (0); - std::cout << " MEAN = " << mean << std::endl; -} -} */ \ No newline at end of file -- GitLab From 7307439f9a8e33c919ddf9372538c70aa3d1ff0e Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 27 Jan 2025 13:04:58 +0000 Subject: [PATCH 47/60] refactor the LSQ code --- include/aidge/quantization/QAT/QAT_LSQ.hpp | 1 - python_binding/pybind_QAT_LSQ.cpp | 5 - src/QAT/QAT_LSQ.cpp | 235 +++------------------ 3 files changed, 30 insertions(+), 211 deletions(-) diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp index 68ce8e7..922187a 100644 --- a/include/aidge/quantization/QAT/QAT_LSQ.hpp +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -29,7 +29,6 @@ namespace QuantLSQ { */ void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); -void devLSQ(std::shared_ptr<Tensor> tensor); } // namespace QuantLSQ } // namespace Aidge diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp index 0dd4267..4bba3b6 100644 --- a/python_binding/pybind_QAT_LSQ.cpp +++ b/python_binding/pybind_QAT_LSQ.cpp @@ -23,11 +23,6 @@ void init_QAT_LSQ(py::module &m) { auto mQuantLSQ = m.def_submodule("lsq"); - mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits")); - mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor")); - - //mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data")); - } } // namespace Aidge diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 66e8ec7..80e8a05 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -21,25 +21,50 @@ #include "aidge/graph/Matching.hpp" #include "aidge/recipes/QuantRecipes.hpp" -namespace Aidge { + +namespace Aidge +{ static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) { + //std::cout << " GET TENSOR ABS MEAN " << std::endl; auto valueTensor = (*tensor).abs().mean(); std::shared_ptr<Tensor> fallback; const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu"); return localTensor.get<float>(0); } +static float getTensorStd(std::shared_ptr<Tensor> tensor) +{ + auto valueTensor = (*tensor); + + auto skewedTensor = valueTensor - valueTensor.mean(); + auto squaredTensor = skewedTensor * skewedTensor; + auto varianceTensor = squaredTensor.mean(); + + std::shared_ptr<Tensor> fallback; + auto localTensor = varianceTensor.refCastFrom(fallback, DataType::Float32, "cpu"); + + float variance = localTensor.get<float>(0); + return std::sqrt(variance); +} + + // INIT THE STEP SIZE OF A QUANTIZER NODE static bool initStepSize(std::shared_ptr<Node> quantizer) { const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator()); - float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0)); + // This formula is the one proposed in the paper ... + + // float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0)); + // float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second)); - float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second)); + // .. but this formula seems to work better !!! + + float inputStd = getTensorStd(quantizerOp->getInput(0)); + float stepSize = 8.0f * (inputStd / (quantizerOp->range().second)); auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); @@ -56,8 +81,6 @@ static bool initStepSize(std::shared_ptr<Node> quantizer) return false; } -// INPUT QUANTIZERS INSERTION - static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); @@ -137,207 +160,9 @@ static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nb void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { + sanitizeNodeNames(graphView); setupInputQuantizers(graphView, nbBits); setupParamQuantizers(graphView, nbBits); } -void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) -{ - float mean = (tensor->mean()).get<float> (0); - std::cout << " MEAN = " << mean << std::endl; -} -} - -/* - -namespace Aidge { - -static void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize) -{ - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - - for (const auto& match : matches) - { - auto linearNode = match.graph->rootNode(); - - std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; - std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; - - // INPUT QUANTIZERS INSERTION - - // TODO : double check this, and use createUniqueName() - auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); - auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName); - - // Set the step size - - auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator(); - auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - inputStepSizeOp->setOutput(0, inputStepSizeTensor); - - // Absorb the ReLU when possible ... - - // XXX is this safe ??? - bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); - // bool nodeHasParent = (linearNode->getParents().size() != 0); - - if (nodeHasParent) { - auto parentNode = linearNode->getParents()[0]; - if (parentNode->type() == "ReLU") { - auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator()); - inputQuantizerOp->range() = unsignedRange; - graphView->replace({parentNode}, {}); - } - } - - // We need to handle the case where the linear node is the first one ... - - if (nodeHasParent) { - graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0); - } else { - inputQuantizerNode->addChild(graphView); - graphView->add(inputQuantizerNode); - } - - // PARAM QUANTIZERS INSERTION - - // TODO : double check this, and use createUniqueName() - auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); - auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); - graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0); - - // Set the step size - - auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator(); - auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - paramStepSizeOp->setOutput(0, paramStepSizeTensor); - } - -} - -static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) -{ - auto backend = tensor->backend(); - if (backend == "cuda") - tensor->setBackend("cpu"); - - float acc = 0; - float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr()); - for(std::size_t i = 0; i < tensor->size(); i++) - acc += std::abs(castedTensor[i]); - acc /= static_cast<float> (tensor->size()); - - if (backend == "cuda") - tensor->setBackend("cuda"); - - return acc; -} - -static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda) -{ - // Propagate the calibration tensor - - SequentialScheduler scheduler(graphView); - scheduler.resetScheduling(); - scheduler.forward(true, {calibrationData}); - - // Store the input tensor statistics - - if (useCuda) - graphView->setBackend("cpu"); - - std::map<std::string, float> inputStats; - for (auto node : graphView->getNodes()) - { - if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! - { - const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); - float inputAbsMean = getTensorAbsMean(op->getInput(0)); - inputStats.insert(std::make_pair(node->name(), inputAbsMean)); - std::cout << node->name() << " -> " << inputAbsMean << std::endl; - } - } - - if (useCuda) - graphView->setBackend("cuda"); - - return inputStats; -} - -static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda) -{ - if (useCuda) - graphView->setBackend("cpu"); - - std::map<std::string, float> paramStats; - for (auto node : graphView->getNodes()) - { - if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! - { - const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); - float paramAbsMean = getTensorAbsMean(op->getInput(1)); - paramStats.insert(std::make_pair(node->name(), paramAbsMean)); - std::cout << node->name() << " -> " << paramAbsMean << std::endl; - } - } - - if (useCuda) - graphView->setBackend("cuda"); - - return paramStats; -} - -static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats) -{ - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - - for (const auto& match : matches) - { - auto linearNode = match.graph->rootNode(); - - // INPUT QUANTIZERS STEP-SIZES - - auto inputQuantNode = linearNode->getParent(0); - auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator()); - - float absMean = inputStats[linearNode->name()]; - float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second)); - - auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator(); - // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); - auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - inputStepSizeOp->setOutput(0, inputStepSizeTensor); - - // PARAM QUANTIZERS STEP-SIZES - - auto paramQuantNode = linearNode->getParent(1); - auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator()); - - absMean = paramStats[linearNode->name()]; - stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second)); - - auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator(); - // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); - auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - paramStepSizeOp->setOutput(0, paramStepSizeTensor); - } -} - -void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData) -{ - bool useCuda = (calibrationData->backend() == "cuda"); - - // Collect the tensor statisics - auto inputStats = collectInputStats(graphView, calibrationData, useCuda); - - auto paramStats = collectParamStats(graphView, useCuda); - - // Insert the quantizers - insertQuantizers(graphView, nbBits, 1.0); - - // Adjust the quantizers step-sizes - adjustQuantizersStepSizes(graphView, inputStats, paramStats); -} -} - -*/ \ No newline at end of file +} \ No newline at end of file -- GitLab From 064fef3b9b7392bb700b631d117348a27136965c Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 27 Jan 2025 13:07:12 +0000 Subject: [PATCH 48/60] remove commented code --- src/backend/cuda/operator/LSQImpl.cpp | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/src/backend/cuda/operator/LSQImpl.cpp b/src/backend/cuda/operator/LSQImpl.cpp index c66bd8a..fa45f21 100644 --- a/src/backend/cuda/operator/LSQImpl.cpp +++ b/src/backend/cuda/operator/LSQImpl.cpp @@ -52,19 +52,6 @@ void Aidge::LSQImpl_cuda::backward() { std::shared_ptr<Tensor> gra_int1 = op_.getInput(1)->grad(); std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); - // XXX -/* - size_t tmp; - - cudaDeviceSetLimit(cudaLimitStackSize, 2048); - cudaDeviceGetLimit(&tmp, cudaLimitStackSize ); - printf(" stack limit = %ld \n", tmp); - - cudaDeviceSetLimit(cudaLimitMallocHeapSize, 100000000); - cudaDeviceGetLimit(&tmp, cudaLimitMallocHeapSize); - printf(" heap limit = %ld \n", tmp); -*/ - if (gra_int0->size() > mWorkspaceSize) { // std::cout << " reallocation " << sizeof(gra_int0) << " " << gra_int0->size() << std::endl; if (mWorkspace != nullptr) { @@ -87,12 +74,7 @@ void Aidge::LSQImpl_cuda::backward() { gra_int0->getImpl()->rawPtr(), gra_int1->getImpl()->rawPtr(), mWorkspace); -/* - gra_int1->setBackend("cpu"); - float *castedTensor = static_cast<float *> (gra_int1->getImpl()->rawPtr()); - std::cout << castedTensor[0] << std::endl; - gra_int1->setBackend("cuda"); -*/ + } Aidge::LSQImpl_cuda::~LSQImpl_cuda() { -- GitLab From 95ba99c966b6dd3002bbef24d20fb43cd9d437ed Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 27 Jan 2025 14:14:40 +0000 Subject: [PATCH 49/60] complete the PTQ float to double migration --- src/PTQ/CLE.cpp | 7 +------ src/PTQ/PTQ.cpp | 4 ++-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 63d3b45..c47c619 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -49,7 +49,7 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) mulOp.setDataType(tensor->dataType()); mulOp.setBackend(tensor->backend()); - std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling}); scalingTensor->setDataType(tensor->dataType()); scalingTensor->setBackend(tensor->backend()); @@ -120,12 +120,7 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD if (isAffine(node)) affineNodeVector.push_back(node); - if (affineNodeVector.empty()) { - Log::notice("No affine nodes found in the network. CLE cannot be applied."); - return; - } double maxRangeDelta; - int iteration = 0; do { diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 3677ae0..e510880 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -72,13 +72,13 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } -static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) { auto mulOp = Mul_Op(); mulOp.setDataType(tensor->dataType()); mulOp.setBackend(tensor->backend()); - std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling}); scalingTensor->setDataType(tensor->dataType()); scalingTensor->setBackend(tensor->backend()); -- GitLab From a30e2e52999b5874008971bde3005840295382c0 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 27 Jan 2025 14:35:45 +0000 Subject: [PATCH 50/60] replace the couts with logs --- src/PTQ/CLE.cpp | 7 ++----- src/PTQ/Clipping.cpp | 2 +- src/PTQ/PTQ.cpp | 13 ++++++------- src/QAT/QAT_FixedQ.cpp | 6 +++--- src/QAT/QAT_LSQ.cpp | 3 +-- 5 files changed, 13 insertions(+), 18 deletions(-) diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index c47c619..cbfb91f 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -126,11 +126,8 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD { ++iteration; maxRangeDelta = 0.0; - //std::cout << " ----- " << std::endl; - //for (std::shared_ptr<Node> node : affineNodeVector) - // std::cout << getTensorAbsoluteMax(getWeightTensor(node)) << std::endl; - - for (std::size_t i = 0; i < (affineNodeVector.size() - 1); i++) + + for (size_t i = 0; i < (affineNodeVector.size() - 1); i++) { std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp index 57ad7a8..66b0ab3 100644 --- a/src/PTQ/Clipping.cpp +++ b/src/PTQ/Clipping.cpp @@ -26,7 +26,7 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, std::shared_ptr<Node> firstNode = retrieveNodeVector(graphView)[0]; - //std::cout << " COMPUTING HISTOGRAMS ... " << std::endl; + // Log::debug(" COMPUTING HISTOGRAMS ... "); std::map<std::string, std::vector<int>> histograms; diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index e510880..073e5e0 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -987,7 +987,6 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool static void printScalingFactors(std::shared_ptr<GraphView> graphView) { - Log::info(" === SCALING FACTORS === "); for (auto node : retrieveNodeVector(graphView)) if (node->type() == "Scaling" || node->type() == "Quantizer") { @@ -1020,7 +1019,7 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri auto scheduling = scheduler.getStaticScheduling(); for (auto node : scheduling) if (node->type() == "Scaling") - fmt::println("{} range = {}", node->name(), valueRanges[node->name()]); + Log::info(" {} range = {} ", node->name(), valueRanges[node->name()]); } void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) @@ -1049,13 +1048,13 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, Log::info(" Computing the value ranges ..."); std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda); - //std::cout << " === RANGES (BEFORE ADJUST) ===" << std::endl; + //Log::info(" === RANGES (BEFORE ADJUST) ==="); //printRanges(graphView, valueRanges); Log::info(" Optimizing the clipping values ..."); valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose); - //std::cout << " === RANGES (AFTER ADJUST) ===" << std::endl; + //Log::info(" === RANGES (AFTER ADJUST) ==="); //printRanges(graphView, valueRanges); Log::info(" Normalizing the activations ..."); @@ -1076,7 +1075,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (verbose) printScalingFactors(graphView); - //std::cout << " === SCALINGS (BEFORE CAST) ===" << std::endl; + //Log::info(" === SCALINGS (BEFORE CAST) ==="); //printScalingFactors(graphView); setupDataType(graphView, inputDataSet, initialDataType); @@ -1084,7 +1083,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (useCuda) graphView->setBackend("cuda"); - //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; + //Log::info(" === SCALINGS (AFTER CAST) ==="); //printScalingFactors(graphView); Log::info(" Reseting the scheduler ..."); @@ -1124,7 +1123,7 @@ void clearBiases(std::shared_ptr<GraphView> graphView) void devPTQ(std::shared_ptr<GraphView> graphView) { for (std::shared_ptr<Node> node : graphView->getNodes()) - fmt::println(" UUU : {}", node->name()); + Log::info(" UUU : {}", node->name()); } } diff --git a/src/QAT/QAT_FixedQ.cpp b/src/QAT/QAT_FixedQ.cpp index 9160b4a..6ada532 100644 --- a/src/QAT/QAT_FixedQ.cpp +++ b/src/QAT/QAT_FixedQ.cpp @@ -91,7 +91,7 @@ static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator()); float inputStd = getTensorStd(op->getInput(0)); inputStats.insert(std::make_pair(node->name(), inputStd)); - fmt::println("{} -> {}", node->name(), inputStd); + Log::info(" {} -> {} ", node->name(), inputStd); } } @@ -108,7 +108,7 @@ static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator()); float paramStd = getTensorStd(op->getInput(1)); paramStats.insert(std::make_pair(node->name(), paramStd)); - fmt::println("{} -> {}", node->name(), paramStd); + Log::info(" {} -> {} ", node->name(), paramStd); } } @@ -156,7 +156,7 @@ void QuantFixedQ::devQAT(std::shared_ptr<GraphView> graphView) scheduler.generateScheduling(); auto s = scheduler.getStaticScheduling(); for (std::shared_ptr<Node> node : s) - fmt::println(" name : {}", node->name()); + Log::info(" name : {} ", node->name()); } } \ No newline at end of file diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 80e8a05..0508fc7 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -27,7 +27,6 @@ namespace Aidge static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) { - //std::cout << " GET TENSOR ABS MEAN " << std::endl; auto valueTensor = (*tensor).abs().mean(); std::shared_ptr<Tensor> fallback; const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu"); @@ -76,7 +75,7 @@ static bool initStepSize(std::shared_ptr<Node> quantizer) stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); - std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl; + Log::info(" [ INIT STEP SIZE = {} ] ", stepSize); return false; } -- GitLab From 6a52ae3d95cf9108e27ea1c80095cb8ac75ef943 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 27 Jan 2025 14:53:30 +0000 Subject: [PATCH 51/60] minor change --- src/recipes/QuantRecipes.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/recipes/QuantRecipes.cpp b/src/recipes/QuantRecipes.cpp index 6e1dcdb..7f01b24 100644 --- a/src/recipes/QuantRecipes.cpp +++ b/src/recipes/QuantRecipes.cpp @@ -58,11 +58,11 @@ void insertBatchNormNodes(std::shared_ptr<GraphView> graphView) if (parentNode->type() == "Conv2D") { std::shared_ptr<Conv_Op<2>> convOperator = std::static_pointer_cast<Conv_Op<2>> (parentNode->getOperator()); - int nb_channels = convOperator->getInput(1)->dims()[0]; - fmt::println(" NB CHANNELS = {}", nb_channels); // TODO : remove this ... + int nbChannels = convOperator->getInput(1)->dims()[0]; + Log::info(" NB CHANNELS = {} ", nbChannels); std::string batchnormNodeName = makeUniqueName(parentNode->name() + "_BN", graphView); - std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nb_channels, 1e-5, 0.1, false, batchnormNodeName); + std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nbChannels, 1e-5, 0.1, false, batchnormNodeName); batchnormNode->getOperator()->setDataType(DataType::Float32); batchnormNode->getOperator()->setBackend("cpu"); -- GitLab From 2353215b2678de012bd0f256449a436c47cc4dac Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 27 Jan 2025 15:03:44 +0000 Subject: [PATCH 52/60] move the PTQMetaOps files --- include/aidge/{quantization/PTQ => operator}/PTQMetaOps.hpp | 0 src/PTQ/PTQ.cpp | 2 +- src/{PTQ => operator}/PTQMetaOps.cpp | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename include/aidge/{quantization/PTQ => operator}/PTQMetaOps.hpp (100%) rename src/{PTQ => operator}/PTQMetaOps.cpp (100%) diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/operator/PTQMetaOps.hpp similarity index 100% rename from include/aidge/quantization/PTQ/PTQMetaOps.hpp rename to include/aidge/operator/PTQMetaOps.hpp diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 073e5e0..09b039f 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -12,7 +12,7 @@ #include "aidge/quantization/PTQ/CLE.hpp" #include "aidge/quantization/PTQ/Clipping.hpp" #include "aidge/quantization/PTQ/PTQ.hpp" -#include "aidge/quantization/PTQ/PTQMetaOps.hpp" +#include "aidge/operator/PTQMetaOps.hpp" #include "aidge/data/Tensor.hpp" diff --git a/src/PTQ/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp similarity index 100% rename from src/PTQ/PTQMetaOps.cpp rename to src/operator/PTQMetaOps.cpp -- GitLab From a0245411c756cd56e31e1b2addf9837520d9ea1f Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Tue, 28 Jan 2025 10:06:53 +0000 Subject: [PATCH 53/60] fix an include --- src/operator/PTQMetaOps.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index 77018c2..56245da 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -9,13 +9,12 @@ * ********************************************************************************/ -#include "aidge/quantization/PTQ/PTQMetaOps.hpp" +#include "aidge/operator/PTQMetaOps.hpp" #include <memory> #include <string> #include <utility> -//Operator #include "aidge/operator/Clip.hpp" #include "aidge/operator/Mul.hpp" #include "aidge/operator/Round.hpp" -- GitLab From 31385b7462bf438d0049eda771161ee5e9e55141 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 31 Jan 2025 15:25:10 +0000 Subject: [PATCH 54/60] enable the cuda backend --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b3c6d45..17dd74a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,7 +61,7 @@ option(PYBIND "python binding" OFF) option(WERROR "Warning as error" OFF) option(TEST "Enable tests" OFF) option(COVERAGE "Enable coverage" OFF) -option(CUDA "Enable CUDA backend" OFF) # XXX OFF +option(CUDA "Enable CUDA backend" ON) # XXX OFF option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memory use (over/underflow, memory leak, ...)" OFF) ############################################## -- GitLab From e4332f40767585a27e494445887088a52f2d711b Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 31 Jan 2025 15:26:07 +0000 Subject: [PATCH 55/60] remove unused log --- src/PTQ/CLE.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index cbfb91f..40b9e42 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -124,7 +124,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD do { - ++iteration; maxRangeDelta = 0.0; for (size_t i = 0; i < (affineNodeVector.size() - 1); i++) @@ -149,9 +148,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD } } while (maxRangeDelta > targetDelta); - - Log::notice("CLE completed after {} iterations. Final max range delta: {:.6f}", - iteration, maxRangeDelta); } } \ No newline at end of file -- GitLab From d7df89e8eadfcc5889dda15d3350fefeb69d7823 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Tue, 11 Feb 2025 15:50:16 +0000 Subject: [PATCH 56/60] handle PaddedConv2Ds in the QAT and BatchNorm insertion code --- setup.py | 2 +- src/QAT/QAT_LSQ.cpp | 8 ++++++-- src/recipes/QuantRecipes.cpp | 24 ++++++++---------------- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/setup.py b/setup.py index 1bfc0ac..cde7c1e 100644 --- a/setup.py +++ b/setup.py @@ -63,7 +63,7 @@ class AidgePkgBuild(build_ext): cxx_compiler = os.environ.get("AIDGE_CXX_COMPILER", "g++") build_type = os.environ.get("AIDGE_BUILD_TYPE", "Release") asan = os.environ.get("AIDGE_ASAN", "OFF") - with_cuda = os.environ.get("AIDGE_WITH_CUDA", "OFF") + with_cuda = os.environ.get("AIDGE_WITH_CUDA", "ON") # default could be "OFF" cmake_arch = os.environ.get("AIDGE_CMAKE_ARCH", "") build_gen = os.environ.get("AIDGE_BUILD_GEN", "") diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 0508fc7..ff1c44a 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -82,12 +82,14 @@ static bool initStepSize(std::shared_ptr<Node> quantizer) static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|PaddedConv2D#|FC#)"); for (const auto& match : matches) { auto linearNode = match.graph->rootNode(); + // Log::notice(" SET INPUT QUANTIZER : {} ", linearNode->type()); + std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; @@ -135,7 +137,7 @@ static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nb static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|PaddedConv2D#|FC#)"); std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; @@ -143,6 +145,8 @@ static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nb { auto linearNode = match.graph->rootNode(); + // Log::notice(" SET PARAM QUANTIZER : {} ", linearNode->type()); + // TODO : double check this, and use createUniqueName() auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); auto quantizerNode = LSQ(signedRange, quantizerName); diff --git a/src/recipes/QuantRecipes.cpp b/src/recipes/QuantRecipes.cpp index 7f01b24..f03eb46 100644 --- a/src/recipes/QuantRecipes.cpp +++ b/src/recipes/QuantRecipes.cpp @@ -9,24 +9,13 @@ * ********************************************************************************/ -/* -#include "aidge/data/Tensor.hpp" -#include "aidge/graph/GraphView.hpp" -#include "aidge/graph/Node.hpp" -#include "aidge/scheduler/SequentialScheduler.hpp" -#include "aidge/scheduler/Scheduler.hpp" -#include "aidge/utils/Log.hpp" - -#include "aidge/operator/Producer.hpp" -#include "aidge/operator/Mul.hpp" -#include "aidge/operator/ReLU.hpp" -#include "aidge/operator/Scaling.hpp" -*/ #include "aidge/operator/Conv.hpp" #include "aidge/operator/BatchNorm.hpp" //#include "aidge/quantization/PTQ/PTQ.hpp" #include "aidge/recipes/QuantRecipes.hpp" +#include "aidge/graph/Node.hpp" + namespace Aidge { @@ -55,11 +44,13 @@ void insertBatchNormNodes(std::shared_ptr<GraphView> graphView) { for (std::shared_ptr<Node> parentNode : graphView->getNodes()) { - if (parentNode->type() == "Conv2D") + // TODO : use graph matching + + if (parentNode->type() == "Conv2D" || parentNode->type() == "PaddedConv2D") { - std::shared_ptr<Conv_Op<2>> convOperator = std::static_pointer_cast<Conv_Op<2>> (parentNode->getOperator()); + std::shared_ptr<OperatorTensor> convOperator = std::static_pointer_cast<OperatorTensor> (parentNode->getOperator()); int nbChannels = convOperator->getInput(1)->dims()[0]; - Log::info(" NB CHANNELS = {} ", nbChannels); + Log::notice(" NB CHANNELS = {} ", nbChannels); std::string batchnormNodeName = makeUniqueName(parentNode->name() + "_BN", graphView); std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nbChannels, 1e-5, 0.1, false, batchnormNodeName); @@ -118,6 +109,7 @@ std::string makeUniqueName(std::string baseName, std::shared_ptr<GraphView> grap return newName; } + void sanitizeNodeNames(std::shared_ptr<GraphView> graphView) { for (std::shared_ptr<Node> node : graphView->getNodes()) -- GitLab From 3237e7a73e31ef1b66c554d7f8af5c7fbb8fbe66 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 14 Feb 2025 13:28:57 +0000 Subject: [PATCH 57/60] minor changes --- include/aidge/quantization/PTQ/PTQ.hpp | 8 ++++---- python_binding/pybind_PTQ.cpp | 4 ++-- src/QAT/QAT_LSQ.cpp | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index 4fc38bc..bfe671e 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -124,11 +124,11 @@ namespace Aidge { * @brief Quantize an already normalized (in term of parameters and activations) network. * @param graphView The GraphView to be quantized. * @param nbBits The desired number of bits of the quantization. - * @param applyRounding Whether to apply the rounding operations or not. + * @param noQuant Whether to apply the rounding operations or not. * @param optimizeSigns Whether to take account of the IO signs of the operators or not. * @param verbose Whether to print the sign map or not. */ - void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, bool applyRounding, bool optimizeSigns, bool verbose); + void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, bool noQuant, bool optimizeSigns, bool verbose); /** * @brief Main quantization routine. Performs every step of the quantization pipeline. @@ -136,12 +136,12 @@ namespace Aidge { * @param nbBits The desired number of bits of the quantization. * @param inputDataSet The input dataset on which the value ranges are computed. * @param clippingMode: Type of the clipping optimization. Can be either 'MAX', 'MSE', 'AA' or 'KL'. - * @param applyRounding Whether to apply the rounding operations or not. + * @param noQuant Whether to apply the rounding operations or not. * @param optimizeSigns Whether to take account of the IO signs of the operators or not. * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights. * @param verbose Whether to print internal informations about the quantization process. */ - void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool applyRounding, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose); + void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose); /** * @brief Compute the weight ranges of every affine node. Provided for debugging purposes. diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index b5193bd..1de7976 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -78,7 +78,7 @@ void init_PTQ(py::module &m) { :type value_ranges: list of float. )mydelimiter"); - m.def("quantize_normalized_network", &quantizeNormalizedNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("no_quant")=false, py::arg("optimize_signs"), py::arg("verbose") = false, + m.def("quantize_normalized_network", &quantizeNormalizedNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("no_quantization")=false, py::arg("optimize_signs"), py::arg("verbose") = false, R"mydelimiter( Quantize an already normalized (in term of parameters and activations) network. :param network: The GraphView to be quantized. @@ -93,7 +93,7 @@ void init_PTQ(py::module &m) { :type verbose: bool )mydelimiter"); - m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("clipping_mode") = Clipping::MAX , py::arg("no_quantization") = true, py::arg("optimize_signs") = false, py::arg("single_shift") = false, py::arg("use_cuda") = false, py::arg("verbose") = false, + m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("clipping_mode") = Clipping::MAX , py::arg("no_quantization") = false, py::arg("optimize_signs") = false, py::arg("single_shift") = false, py::arg("use_cuda") = false, py::arg("verbose") = false, R"mydelimiter( Main quantization routine. Performs every step of the quantization pipeline. :param network: The GraphView to be quantized. diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index ff1c44a..da09d62 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -75,7 +75,7 @@ static bool initStepSize(std::shared_ptr<Node> quantizer) stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); - Log::info(" [ INIT STEP SIZE = {} ] ", stepSize); + Log::notice(" [ INIT STEP SIZE = {} ] ", stepSize); return false; } -- GitLab From c43e17242f1dc5974a43aa748b953eced73031f3 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 14 Feb 2025 13:43:58 +0000 Subject: [PATCH 58/60] use the scalar tensor constructor --- src/PTQ/CLE.cpp | 2 +- src/PTQ/PTQ.cpp | 4 ++-- src/QAT/QAT_LSQ.cpp | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 40b9e42..28858d0 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -49,7 +49,7 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) mulOp.setDataType(tensor->dataType()); mulOp.setBackend(tensor->backend()); - std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling}); + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(scaling); scalingTensor->setDataType(tensor->dataType()); scalingTensor->setBackend(tensor->backend()); diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 09b039f..7c29ee0 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -78,7 +78,7 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) mulOp.setDataType(tensor->dataType()); mulOp.setBackend(tensor->backend()); - std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling}); + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(scaling); scalingTensor->setDataType(tensor->dataType()); scalingTensor->setBackend(tensor->backend()); @@ -932,7 +932,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u // Add the coeff producer to the multiplier node std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); - std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax}); + std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(signedMax); coeffProducer->getOperator()->setOutput(0, coeffTensor); coeffProducer->getOperator()->setDataType(DataType::Float64); diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index da09d62..6eae077 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -65,7 +65,8 @@ static bool initStepSize(std::shared_ptr<Node> quantizer) float inputStd = getTensorStd(quantizerOp->getInput(0)); float stepSize = 8.0f * (inputStd / (quantizerOp->range().second)); - auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + // TODO : use the scalar constructor + auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); // XXX Manage backend here ? stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend()); -- GitLab From ba6d6d8b62e3c4ccd98d0e6aa7108d4fceffbd23 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Mon, 17 Feb 2025 16:09:29 +0100 Subject: [PATCH 59/60] Fix Coverage related issue --- CMakeLists.txt | 1 - include/aidge/backend/cpu/operator/FixedQImpl_kernels.hpp | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 17dd74a..afb882a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -182,7 +182,6 @@ endif() # Coverage flags for GCC if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) - include(CodeCoverage) append_coverage_compiler_flags() endif() diff --git a/include/aidge/backend/cpu/operator/FixedQImpl_kernels.hpp b/include/aidge/backend/cpu/operator/FixedQImpl_kernels.hpp index 9d7a106..935d8f0 100644 --- a/include/aidge/backend/cpu/operator/FixedQImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FixedQImpl_kernels.hpp @@ -23,7 +23,7 @@ void FixedQImpl_cpu_forward_kernel( std::size_t nbBits, float span_, bool isOutputUnsigned, - std::size_t inputLenght, + std::size_t inputLength, const void* input_, void* output_) { @@ -40,7 +40,7 @@ void FixedQImpl_cpu_forward_kernel( const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); - for (std::size_t i = 0; i < inputLenght; ++i) { + for (std::size_t i = 0; i < inputLength; ++i) { I clipped = std::max(lower, std::min(input[i], upper)); output[i] = std::round(clipped / stepSize) * stepSize; } @@ -49,14 +49,14 @@ void FixedQImpl_cpu_forward_kernel( template <class GI, class GO> void FixedQImpl_cpu_backward_kernel( - const std::size_t inputLenght, + const std::size_t inputLength, const void* grad_output_, void* grad_input_) { const GO* grad_output = static_cast<const GO*>(grad_output_); GI* grad_input = static_cast<GI*>(grad_input_); - for (std::size_t i = 0; i < inputLenght; ++i) { + for (std::size_t i = 0; i < inputLength; ++i) { // Straight Through Estimator grad_input[i] = grad_output[i]; } -- GitLab From 68e85246993cb6bf2ef64ed14b4f791581c3ca6f Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Mon, 17 Feb 2025 16:14:00 +0100 Subject: [PATCH 60/60] Fixed bad merge --- pyproject.toml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bf4155e..088200e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,6 @@ readme = "README.md" license = { file = "LICENSE" } classifiers = [ "Development Status :: 2 - Pre-Alpha", -<<<<<<< HEAD "Intended Audience :: Developers", "Intended Audience :: Education", "Intended Audience :: Science/Research", @@ -28,10 +27,6 @@ classifiers = [ "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Software Development" ] -======= - "Programming Language :: Python :: 3" - ] ->>>>>>> 22e47ad9fb629f85ed4f1c5fa981c1d195c0201b dynamic = ["version"] #Â defined by pbr [build-system] -- GitLab