From 87f5891aa200535fd5b8744defed0e1eb00bf73c Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 8 Jan 2025 10:32:06 +0000 Subject: [PATCH 01/26] improve tensor manipulation routines + enhance insertCompensationNodes --- src/PTQ/CLE.cpp | 73 +++++++++++++++++------ src/PTQ/PTQ.cpp | 137 ++++++++++++++++++++++++++------------------ src/QAT/QAT_LSQ.cpp | 9 +-- 3 files changed, 138 insertions(+), 81 deletions(-) diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 2c81815..0fe9575 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -19,6 +19,12 @@ #include "aidge/utils/Log.hpp" #include "aidge/operator/OperatorTensor.hpp" +#include "aidge/operator/Mul.hpp" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/operator/Abs.hpp" +#include "aidge/operator/Reshape.hpp" +#include "aidge/operator/Round.hpp" + namespace Aidge { @@ -34,27 +40,58 @@ static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node) static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) { - // Get the tensor data pointer - double * castedTensor = static_cast<double *> (tensor->getImpl()->rawPtr()); - - // Rescale the tensor - for(std::size_t i = 0; i < tensor->size(); i++) - castedTensor[i] *= scaling; + auto mulOp = Mul_Op(); + mulOp.setDataType(tensor->dataType()); + mulOp.setBackend(tensor->backend()); + + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); + scalingTensor->setDataType(tensor->dataType()); + scalingTensor->setBackend(tensor->backend()); + + mulOp.associateInput(0, tensor); + mulOp.associateInput(1, scalingTensor); + + mulOp.forward(); + + auto outTensor = mulOp.getOutput(0); + *tensor = *outTensor; + //tensor->copyCast(*outTensor); } -static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +// TODO : make the retreival of argmax values backend independant (refCastFrom) +static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) { - // Get the tensor data pointer and edit it - double * castedTensor = static_cast<double*> (tensor->getImpl()->rawPtr()); - - // Get the tensor absolute max value - double maxValue = 0.0f; - for(std::size_t i = 0; i < tensor->size(); ++i) { - if(std::fabs(castedTensor[i]) > maxValue) { - maxValue = std::fabs(castedTensor[i]); - } - } - return maxValue; + // get the abs tensor + + std::shared_ptr<Tensor> absTensor = std::make_shared<Tensor>(tensor->abs()); + + // flatten the abs tensor + + std::int64_t nbElement = tensor->size(); + + auto reshapeOp = Reshape_Op({nbElement}); + reshapeOp.setDataType(tensor->dataType()); + reshapeOp.setBackend(tensor->backend()); + + reshapeOp.associateInput(0, absTensor); + reshapeOp.forward(); + std::shared_ptr<Tensor> flatTensor = reshapeOp.getOutput(0); + + // Get the argmax + + auto argmaxOp = ArgMax_Op(0, true, false); + argmaxOp.setDataType(tensor->dataType()); + argmaxOp.setBackend(tensor->backend()); + + argmaxOp.associateInput(0, flatTensor); + argmaxOp.forward(); + std::shared_ptr<Tensor> argmaxTensor = argmaxOp.getOutput(0); + + // Return the max + + int maxIndex = std::round(argmaxTensor->get<double>(0)); + + return flatTensor->get<double>(maxIndex); } void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 0e26313..6e0b29e 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -28,6 +28,12 @@ #include "aidge/operator/BatchNorm.hpp" #include "aidge/operator/Conv.hpp" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/operator/Abs.hpp" +#include "aidge/operator/Reshape.hpp" +#include "aidge/operator/Round.hpp" + + #include "aidge/recipes/Recipes.hpp" #include "aidge/recipes/QuantRecipes.hpp" @@ -66,51 +72,75 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } -static void fillTensor(std::shared_ptr<Tensor> tensor, double value) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) { - // Get the tensor data pointer - double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); + auto mulOp = Mul_Op(); + mulOp.setDataType(tensor->dataType()); + mulOp.setBackend(tensor->backend()); - // Fill the tensor - for(std::size_t i = 0; i < tensor->size(); i++) - castedTensor[i] = value; -} + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); + scalingTensor->setDataType(tensor->dataType()); + scalingTensor->setBackend(tensor->backend()); -static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) -{ - // Get the tensor data pointer - double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); + mulOp.associateInput(0, tensor); + mulOp.associateInput(1, scalingTensor); - // Rescale the tensor - for(std::size_t i = 0; i < tensor->size(); i++) - castedTensor[i] *= scaling; + mulOp.forward(); + + auto outTensor = mulOp.getOutput(0); + *tensor = *outTensor; } static void roundTensor(std::shared_ptr<Tensor> tensor) { - // Get the tensor data pointer - double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); + auto roundOp = Round_Op(); + roundOp.setDataType(tensor->dataType()); + roundOp.setBackend(tensor->backend()); - // Rescale the tensor - for(std::size_t i = 0; i < tensor->size(); i++) - castedTensor[i] = std::nearbyint(castedTensor[i]);//Round + roundOp.associateInput(0, tensor); + roundOp.forward(); + + auto outTensor = roundOp.getOutput(0); + *tensor = *outTensor; } -static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +// TODO : make the retreival of argmax values backend independant (refCastFrom) +static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) { - // Get the tensor data pointer and edit it - double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr()); - - // Get the tensor absolute max value - double maxValue = 0.0f; - for(std::size_t i = 0; i < tensor->size(); ++i) { - if(std::fabs(castedTensor[i]) > maxValue) { - maxValue = std::fabs(castedTensor[i]); - } - } - return maxValue; + // get the abs tensor + + std::shared_ptr<Tensor> absTensor = std::make_shared<Tensor>(tensor->abs()); + + // flatten the abs tensor + + std::int64_t nbElement = tensor->size(); + + auto reshapeOp = Reshape_Op({nbElement}); + reshapeOp.setDataType(tensor->dataType()); + reshapeOp.setBackend(tensor->backend()); + + reshapeOp.associateInput(0, absTensor); + reshapeOp.forward(); + std::shared_ptr<Tensor> flatTensor = reshapeOp.getOutput(0); + + // Get the argmax + + auto argmaxOp = ArgMax_Op(0, true, false); + argmaxOp.setDataType(tensor->dataType()); + argmaxOp.setBackend(tensor->backend()); + + argmaxOp.associateInput(0, flatTensor); + argmaxOp.forward(); + std::shared_ptr<Tensor> argmaxTensor = argmaxOp.getOutput(0); + + // Return the max + + int maxIndex = std::round(argmaxTensor->get<double>(0)); + + return flatTensor->get<double>(maxIndex); } + // TODO : pass nodeVector by reference ... static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::shared_ptr<Node>> nodeVector, std::string nodeType) { @@ -876,50 +906,42 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u for (std::shared_ptr<Node> node : nodeVector) { - // A merging node is always followed by a scaling node at this point ... + // A merging node is always followed by a Quantizer node at this point if (node->type() == "Quantizer") { + // check if the Quantizer is a residual one, and insert a compensation node if so ... + bool prevNodeIsForking = ((node->getParent(0))->getChildren().size() > 1); bool prevNodeIsAffine = isAffine(node->getParent(0)); bool insertNode = prevNodeIsForking || !prevNodeIsAffine; if (insertNode) { - // create and insert the multplicative node + // create and insert the multplicative node before the Quantizer std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); std::shared_ptr<Node> mulNode = Mul(mulNodeName); - mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) mulNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, mulNode, 0, 0, 0); - // create and insert the producer node - - std::shared_ptr<Tensor> inputTensor = std::static_pointer_cast<Tensor> (mulNode->getOperator()->getRawInput(0)); - std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(); + // Add the coeff producer to the multiplier node - coeffTensor->setDataType(DataType::Float64); // getDataType(parentNode) - coeffTensor->setBackend("cpu"); + std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); + std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax}); + coeffProducer->getOperator()->setOutput(0, coeffTensor); - coeffTensor->resize(inputTensor->dims()); - fillTensor(coeffTensor, 1); + coeffProducer->getOperator()->setDataType(DataType::Float64); + coeffProducer->getOperator()->setBackend("cpu"); - std::shared_ptr<Node> producerNode = Producer(coeffTensor, makeUniqueName("coeff", graphView)); - producerNode->addChild(mulNode); - graphView->add(producerNode); + graphView->add(coeffProducer); // needed ? - // rescale the coeffs and edit scaling factor + // Adapt the scaling factor value accordingly - fillTensor(coeffTensor, signedMax); - - double currScalingFactor = getScalingFactor(node); // XXX bad naming ! + double currScalingFactor = getScalingFactor(node); updateScalingFactor(node, currScalingFactor / signedMax); - - // TODO : double check this !!! - //std::cout << getTensorAbsoluteMax(coeffTensor) << std::endl; } } } @@ -931,7 +953,8 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool for (std::shared_ptr<Node> node : nodeVector) { - // Use A meatoperator of type Scaling of MulCompensation instead + // TODO : use Compensation nodes instead of Mul nodes + if (isAffine(node) || (node->type() == "Mul")) { std::shared_ptr<Node> scalingNode = (*node->getChildren().begin()); @@ -940,7 +963,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool double approx = std::pow(2, std::ceil(std::log2(base))); - updateScalingFactor(scalingNode,approx); + updateScalingFactor(scalingNode, approx); double ratio = base / approx; @@ -954,7 +977,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); rescaleTensor(biasTensor, ratio); if (!noQuant) - roundTensor(biasTensor); + roundTensor(biasTensor); } } } @@ -1058,8 +1081,8 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (useCuda) graphView->setBackend("cuda"); - //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; - //printScalingFactors(graphView); + std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; + printScalingFactors(graphView); Log::info(" Reseting the scheduler ..."); SequentialScheduler scheduler(graphView); diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 9b51e84..a09dbb2 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -89,19 +89,16 @@ void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbB static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) { auto backend = tensor->backend(); + if (backend == "cuda") tensor->setBackend("cpu"); - float acc = 0; - float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr()); - for(std::size_t i = 0; i < tensor->size(); i++) - acc += std::abs(castedTensor[i]); - acc /= static_cast<float> (tensor->size()); + float value = (*tensor).abs().mean().get<float>(0); if (backend == "cuda") tensor->setBackend("cuda"); - return acc; + return value; } static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda) -- GitLab From 261345f10db68b69077bef647fd645196c18baf3 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 8 Jan 2025 10:37:27 +0000 Subject: [PATCH 02/26] comment verbose --- src/PTQ/PTQ.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 6e0b29e..7f750f0 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -1081,8 +1081,8 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (useCuda) graphView->setBackend("cuda"); - std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; - printScalingFactors(graphView); + //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; + //printScalingFactors(graphView); Log::info(" Reseting the scheduler ..."); SequentialScheduler scheduler(graphView); -- GitLab From 227a9c7e575656ffc7094c0b4e66a42c931d54ee Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 8 Jan 2025 16:27:21 +0000 Subject: [PATCH 03/26] minor change --- src/PTQ/PTQ.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 7f750f0..3677ae0 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -215,6 +215,8 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView) { removeFlatten(graphView); + sanitizeNodeNames(graphView); + bool containsBatchNorm = false; std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); @@ -1078,6 +1080,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, //printScalingFactors(graphView); setupDataType(graphView, inputDataSet, initialDataType); + if (useCuda) graphView->setBackend("cuda"); -- GitLab From 9998b41f2a26ef738e1fbb829540b6c36dd2a0d3 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 13 Jan 2025 13:01:34 +0000 Subject: [PATCH 04/26] rework the LSQ code --- include/aidge/quantization/QAT/QAT_LSQ.hpp | 18 +- python_binding/pybind_QAT_LSQ.cpp | 5 +- src/QAT/QAT_LSQ.cpp | 204 +++++++-------------- 3 files changed, 77 insertions(+), 150 deletions(-) diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp index 4970be0..d7d03ca 100644 --- a/include/aidge/quantization/QAT/QAT_LSQ.hpp +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -20,22 +20,14 @@ namespace Aidge { namespace QuantLSQ { /** - * @brief Insert the LSQ quantizer nodes in a given GraphView - * @param graphView The GraphView containing the graph to quantize. + * @brief Given a GraphView with parameters properly initialized, insert + * the LSQ quantizer nodes, and setup the adjustment their step-sizes. + * @param graphView The GraphView containing the network to quantize. * @param nbBits Number of quantization bits. - * @param span Fixed output span of the quantizers. */ -void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float step_size); +void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); -/** - * @brief Given a GraphView with parameters properly initialized and some calibration data, - * insert the LSQ quantizer nodes, and adjust their step-sizes. - * @param graphView The GraphView containing the graph to quantize. - * @param nbBits Number of quantization bits. - * @param calibrationData Calibration data used to adjust the spans. - * @param scale Multiplicative constant applied to the spans. - */ -void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData); +void devLSQ(std::shared_ptr<Tensor> tensor); } } diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp index 206985e..0b9fcc2 100644 --- a/python_binding/pybind_QAT_LSQ.cpp +++ b/python_binding/pybind_QAT_LSQ.cpp @@ -23,8 +23,9 @@ void init_QAT_LSQ(py::module &m) { auto mQuantLSQ = m.def_submodule("lsq"); - mQuantLSQ.def("insert_quantizers", &QuantLSQ::insertQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("step_size")); + mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits")); + + mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor")); - mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data")); } } // namespace Aidge diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index a09dbb2..04f2027 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -23,7 +23,42 @@ namespace Aidge { -void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize) +static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) +{ + auto valueTensor = (*tensor).abs().mean(); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu"); + return localTensor.get<float>(0); +} + +// INIT THE STEP SIZE OF A QUANTIZER NODE + +static bool initStepSize(std::shared_ptr<Node> quantizer) +{ + const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator()); + + float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0)); + + float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second)); + + auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + + // XXX Manage backend here ? + stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend()); + stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType()); + + auto stepSizeProducer = quantizer->getParent(1); + + stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); + + std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl; + + return false; +} + +// INPUT QUANTIZERS INSERTION + +static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); @@ -34,177 +69,76 @@ void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbB std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; - // INPUT QUANTIZERS INSERTION + // Create the input quantizer node - // TODO : double check this, and use createUniqueName() - auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); - auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName); + auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); + auto quantizerNode = LSQ(signedRange, quantizerName); - // Set the step size + // Init the step-size using the node call stack - auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator(); - auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - inputStepSizeOp->setOutput(0, inputStepSizeTensor); + quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); // Absorb the ReLU when possible ... - // XXX is this safe ??? - bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); - // bool nodeHasParent = (linearNode->getParents().size() != 0); + bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ? if (nodeHasParent) { auto parentNode = linearNode->getParents()[0]; if (parentNode->type() == "ReLU") { - auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator()); - inputQuantizerOp->range() = unsignedRange; + auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator()); + quantizerOp->range() = unsignedRange; graphView->replace({parentNode}, {}); } } - // We need to handle the case where the linear node is the first one ... + // Insert the quantizer in the graphView ... + // (We need to handle the case where the linear node is the first one) if (nodeHasParent) { - graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0); + graphView->insertParent(linearNode, quantizerNode, 0, 0, 0); } else { - inputQuantizerNode->addChild(graphView); - graphView->add(inputQuantizerNode); + quantizerNode->addChild(graphView); + graphView->add(quantizerNode); } - - // PARAM QUANTIZERS INSERTION - - // TODO : double check this, and use createUniqueName() - auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); - auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); - graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0); - - // Set the step size - - auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator(); - auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - paramStepSizeOp->setOutput(0, paramStepSizeTensor); } - } -static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) -{ - auto backend = tensor->backend(); - - if (backend == "cuda") - tensor->setBackend("cpu"); - - float value = (*tensor).abs().mean().get<float>(0); - - if (backend == "cuda") - tensor->setBackend("cuda"); - - return value; -} +// PARAM QUANTIZERS INSERTION -static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda) +static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { - // Propagate the calibration tensor + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - SequentialScheduler scheduler(graphView); - scheduler.resetScheduling(); - scheduler.forward(true, {calibrationData}); + std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; - // Store the input tensor statistics + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); - if (useCuda) - graphView->setBackend("cpu"); + // TODO : double check this, and use createUniqueName() + auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); + auto quantizerNode = LSQ(signedRange, quantizerName); - std::map<std::string, float> inputStats; - for (auto node : graphView->getNodes()) - { - if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! - { - const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); - float inputAbsMean = getTensorAbsMean(op->getInput(0)); - inputStats.insert(std::make_pair(node->name(), inputAbsMean)); - fmt::println("{} -> {}", node->name(), inputAbsMean); - } - } + // Init the step-size using the node call stack - if (useCuda) - graphView->setBackend("cuda"); + quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); - return inputStats; -} + // Insert the quantizer in the graphView -static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda) -{ - if (useCuda) - graphView->setBackend("cpu"); - - std::map<std::string, float> paramStats; - for (auto node : graphView->getNodes()) - { - if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! - { - const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); - float paramAbsMean = getTensorAbsMean(op->getInput(1)); - paramStats.insert(std::make_pair(node->name(), paramAbsMean)); - fmt::println("{} -> {}", node->name(), paramAbsMean); - } + graphView->insertParent(linearNode, quantizerNode, 1, 0, 0); } - - if (useCuda) - graphView->setBackend("cuda"); - - return paramStats; } -static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats) +void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - - for (const auto& match : matches) - { - auto linearNode = match.graph->rootNode(); - - // INPUT QUANTIZERS STEP-SIZES - - auto inputQuantNode = linearNode->getParent(0); - auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator()); - - float absMean = inputStats[linearNode->name()]; - float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second)); - - auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator(); - // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); - auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - inputStepSizeOp->setOutput(0, inputStepSizeTensor); - - // PARAM QUANTIZERS STEP-SIZES - - auto paramQuantNode = linearNode->getParent(1); - auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator()); - - absMean = paramStats[linearNode->name()]; - stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second)); - - auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator(); - // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); - auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - paramStepSizeOp->setOutput(0, paramStepSizeTensor); - } + setupInputQuantizers(graphView, nbBits); + setupParamQuantizers(graphView, nbBits); } -void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData) +void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) { - bool useCuda = (calibrationData->backend() == "cuda"); - - // Collect the tensor statisics - auto inputStats = collectInputStats(graphView, calibrationData, useCuda); - - auto paramStats = collectParamStats(graphView, useCuda); - - // Insert the quantizers - insertQuantizers(graphView, nbBits, 1.0); - - // Adjust the quantizers step-sizes - adjustQuantizersStepSizes(graphView, inputStats, paramStats); + float mean = (tensor->mean()).get<float> (0); + std::cout << " MEAN = " << mean << std::endl; } } \ No newline at end of file -- GitLab From 4f1169676c6d3845d35416a4e3f0e3e98e7d9700 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 8 Jan 2025 16:07:59 +0000 Subject: [PATCH 05/26] Adding the isScaling tag in the PTQ pipeline in order to replace the previous and now deprecated Scaling Metaoperator --- aidge_quantization/_version.py | 4 + include/aidge/quantization/PTQ/PTQMetaOps.hpp | 14 +-- include/aidge/quantization_version.h | 6 +- python_binding/pybind_PTQ.cpp | 9 ++ src/PTQ/Clipping.cpp | 2 +- src/PTQ/PTQ.cpp | 88 ++++++++++++------- src/PTQ/PTQMetaOps.cpp | 39 ++++---- 7 files changed, 101 insertions(+), 61 deletions(-) create mode 100644 aidge_quantization/_version.py diff --git a/aidge_quantization/_version.py b/aidge_quantization/_version.py new file mode 100644 index 0000000..d4ec20e --- /dev/null +++ b/aidge_quantization/_version.py @@ -0,0 +1,4 @@ +# file generated by setuptools_scm +# don't change, don't track in version control +__version__ = version = '0.2.1.dev60+g8044e79.d20250106' +__version_tuple__ = version_tuple = (0, 2, 1, 'dev60', 'g8044e79.d20250106') diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/quantization/PTQ/PTQMetaOps.hpp index 62fac87..a8028c6 100644 --- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp +++ b/include/aidge/quantization/PTQ/PTQMetaOps.hpp @@ -37,13 +37,13 @@ namespace Aidge { /// @return A shared pointer to an instance of the meta-operator node. std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name); -/// @brief The purpose of Scaling is to encapsulate the Mul operator and tag it as a PTQ node rather than a regular Mul operator. -/// Therefore, this meta-operator consists solely of a [Mul] operation. -/// -/// @param scalingFactor The scaling factor to apply to the input (a scalar to multiply the input with). -/// @param name The name of the meta-operator node created. -/// @return A shared pointer to an instance of the scaling node. -std::shared_ptr<Aidge::Node> Scaling(double scalingFactor, const std::string& name = ""); +/// @brief Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. +/// This function multiplies the existing scaling factor by a given coefficient. It verifies that the node is of the correct type ("Mul") +/// and has the `isScaling` attribute. If these conditions are not met, a warning is logged. +/// @param node A shared pointer to an `Aidge::Node` object representing the node to modify. +/// @param coeff A double representing the multiplication coefficient to apply to the scaling factor. +void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff); + /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter. /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation. diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 546263a..f14a045 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -3,9 +3,9 @@ namespace Aidge { static constexpr const int PROJECT_VERSION_MAJOR = 0; -static constexpr const int PROJECT_VERSION_MINOR = 2; +static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; -static constexpr const char * PROJECT_VERSION = "0.2.0"; -static constexpr const char * PROJECT_GIT_HASH = "f50c860"; +static constexpr const char * PROJECT_VERSION = "0.3.0"; +static constexpr const char * PROJECT_GIT_HASH = "8c89214"; } #endif // VERSION_H diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index b5193bd..7f7c57d 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -17,6 +17,7 @@ #include "aidge/quantization/PTQ/Clipping.hpp" #include "aidge/quantization/PTQ/CLE.hpp" #include "aidge/quantization/PTQ/PTQ.hpp" +#include "aidge/quantization/PTQ/PTQMetaOps.hpp" #include "aidge/graph/GraphView.hpp" @@ -48,6 +49,14 @@ void init_PTQ(py::module &m) { :type network: :py:class:`aidge_core.GraphView` )mydelimiter"); + m.def( "multiply_scaling_factor",&multiplyScalingFactor,py::arg("node"), py::arg("coeff") + R"mydelimiter( + Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. This function multiplies the existing scaling factor by a given coefficient. + :param node: A node representing the node to modify. + :param coeff: A floating value representing the multiplication coefficient to apply to the scaling factor. + )mydelimiter" + ); + m.def("normalize_parameters", &normalizeParameters, py::arg("network"), R"mydelimiter( Normalize the parameters of each parametrized node, so that they fit in the [-1:1] range. diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp index 57ad7a8..1901e38 100644 --- a/src/PTQ/Clipping.cpp +++ b/src/PTQ/Clipping.cpp @@ -222,7 +222,7 @@ std::map<std::string, double> adjustRanges(Clipping clippingMode, std::map<std:: for (std::shared_ptr<Node> node : graphView->getNodes()) { - if (node->type() == "Scaling") + if (node->attributes()->hasAttr("isScaling")) { std::vector<int> histogram = histograms[node->name()]; diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 3677ae0..2d431f6 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -264,12 +264,19 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) Log::info(" ### inserting multiplicative node ..."); std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); - std::shared_ptr<Node> residualNode = Scaling(1.0, residualNodeName); + std::shared_ptr<Node> residualNode = Mul(residualNodeName); + residualNode->attributes()->addAttr("isScaling", 0.0); + + //Adding the SF as a producer of the node + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {1.0}); + std::shared_ptr<Node> scalingFactorProducer = addProducer(residualNode, 1, {1}, "ScalingFactor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); - residualNode->getOperator()->setDataType(DataType::Float64); //getDataType(parentNode) + residualNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) residualNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, residualNode, i, 0, 0); + graphView->add(scalingFactorProducer); } } } @@ -295,7 +302,16 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) if (isAffine(parentNode) || isMerging(parentNode)) { std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); - std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName); + //std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName); + + //Adding Mul operator with tag "isScaling" + std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName); + scalingNode->attributes()->addAttr("isScaling",0.0); + + //Adding the SF as a producer of the node + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {1.0}); + std::shared_ptr<Node> scalingFactorProducer = addProducer(scalingNode, 1, {1}, "ScalingFactor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) scalingNode->getOperator()->setBackend("cpu"); @@ -320,12 +336,14 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) for (std::size_t i = 0; i < nextNodes.size(); i++) scalingNode->addChild(nextNodes[i], 0, inputIndices[i]); + graphView->add(scalingFactorProducer); graphView->add(scalingNode); } else { // Log::info(" last node reached ! "); parentNode->addChild(scalingNode, 0, 0); + graphView->add(scalingFactorProducer); graphView->add(scalingNode); } } @@ -335,7 +353,7 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> mergingNode) { std::shared_ptr<Node> currNode = mergingNode; - while(currNode->type() != "Scaling") + while(!currNode->attributes()->hasAttr("isScaling")) { if (currNode->getParents().size() == 0) { @@ -378,7 +396,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) for (std::shared_ptr<Node> node : nodeVector) { // Scaling nodes still have a ratio of 1, so they are seamless ... - if (node->type() == "ReLU" || node->type() == "Scaling" || isSeamless(node)) + if (node->type() == "ReLU" || node->attributes()->hasAttr("isScaling") || isSeamless(node)) { if (node != firstNode) { @@ -439,8 +457,9 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); - double currScalingFactor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode, currScalingFactor / rescaling); + //double currScalingFactor = getScalingFactor(scalingNode); + //updateScalingFactor(scalingNode, currScalingFactor / rescaling); + multiplyScalingFactor(scalingNode,1/rescaling); accumulatedRatios[mergingNode->name()] /= rescaling; // optional ... } @@ -465,7 +484,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); for (std::shared_ptr<Node> node : nodeSet) { - if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) + if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) { std::shared_ptr<Operator> nodeOperator = node->getOperator(); std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0)); @@ -487,7 +506,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView // std::shared_ptr<Node> inputNode = getFirstNode(graphView); for (std::shared_ptr<Node> node : nodeSet) - if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) + if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) valueRanges.insert(std::make_pair(node->name(), 0)); if (useCuda) @@ -514,7 +533,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView std::map<std::string, double> sampleRanges; for (std::shared_ptr<Node> node : nodeSet) { - if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) + if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) { std::shared_ptr<Operator> nodeOperator = node->getOperator(); std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0)); @@ -536,7 +555,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView for (std::shared_ptr<Node> node : nodeSet) { - if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) + if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) { std::string nodeName = node->name(); if (sampleRanges[nodeName] > valueRanges[nodeName]) @@ -589,7 +608,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st // Here prevNode is either a 'Affine' or a 'Merging' // => do not split the cases, just handle the bias ... - if (node->type() == "Scaling") + if (node->attributes()->hasAttr("isScaling")) { // retrieve the previous scaling factor ... std::shared_ptr<Node> prevNode = node->getParent(0); @@ -598,8 +617,9 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st // ValueRanges must contains all the scaling nodes !!! double scalingFactor = valueRanges[node->name()]; - double currScalingFactor = getScalingFactor(node); - updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor)); + //double currScalingFactor = getScalingFactor(node); + //updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor)); + multiplyScalingFactor(node,1/(scalingFactor / prevScalingFactor)); scalingFactors[node->name()] = scalingFactor; @@ -642,8 +662,9 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name()); - double currScalingFactor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode, currScalingFactor * rescaling); + //double currScalingFactor = getScalingFactor(scalingNode); + //updateScalingFactor(scalingNode, currScalingFactor * rescaling); + multiplyScalingFactor(scalingNode,rescaling) ; } } } @@ -679,7 +700,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap signMap[node->name()].second = false; } - if (node->type() == "Scaling") + if (node->attributes()->hasAttr("isScaling")) { signMap[node->name()].second = false; @@ -726,7 +747,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap // Arbitration : Signed type wins ! for(std::shared_ptr<Node> parent : parentNodes) { - while (parent->type() != "Scaling") + while (!parent->attributes()->hasAttr("isScaling")) { signMap[parent->name()] = std::make_pair(false, false); // We are on a branch so nodes always have 1 parent ... @@ -842,8 +863,9 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - double currScalingFactor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode, currScalingFactor * rescaling); + // double currScalingFactor = getScalingFactor(scalingNode); + // updateScalingFactor(scalingNode, currScalingFactor * rescaling); + multiplyScalingFactor(scalingNode,rescaling) ; } if (isMerging(node)) @@ -858,23 +880,27 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming - updateScalingFactor(scalingNode, currScalingFactor * rescaling); + // double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming + // updateScalingFactor(scalingNode, currScalingFactor * rescaling); + multiplyScalingFactor(scalingNode,rescaling) ; } // Handle the Scaling Nodes ... - if (node->type() == "Scaling") + if (node->attributes()->hasAttr("isScaling")) { if (!noQuant) { // Replace the Scaling Node by Quantizer + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double old_sf = localTensor.get<double>(0);//!\\ - std::shared_ptr<Node> quantizerNode = Quantizer(getScalingFactor(node), -(signedMax + 1), signedMax, node->name()); + std::shared_ptr<Node> quantizerNode = Quantizer(old_sf, -(signedMax + 1), signedMax, node->name()); quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) quantizerNode->getOperator()->setBackend("cpu"); - - graphView->replace({node}, {quantizerNode}); + graphView->replace({node,node->getParent(1)}, {quantizerNode}); if (optimizeSigns) { @@ -888,6 +914,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ double currScalingFactor = getScalingFactor(quantizerNode); updateScalingFactor(quantizerNode, currScalingFactor * rescaling); + if(outputIsUnsigned) { @@ -965,7 +992,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool double approx = std::pow(2, std::ceil(std::log2(base))); - updateScalingFactor(scalingNode, approx); + updateScalingFactor(scalingNode,approx); double ratio = base / approx; @@ -989,7 +1016,7 @@ static void printScalingFactors(std::shared_ptr<GraphView> graphView) { Log::info(" === SCALING FACTORS === "); for (auto node : retrieveNodeVector(graphView)) - if (node->type() == "Scaling" || node->type() == "Quantizer") + if (node->attributes()->hasAttr("isScaling") || node->type() == "Quantizer") { double scalingFactor = getScalingFactor(node); Log::info(" {:.6f} ({})", scalingFactor, node->name()); @@ -1019,8 +1046,8 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri auto scheduling = scheduler.getStaticScheduling(); for (auto node : scheduling) - if (node->type() == "Scaling") - fmt::println("{} range = {}", node->name(), valueRanges[node->name()]); + if (node->attributes()->hasAttr("isScaling")) + std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl; } void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) @@ -1042,7 +1069,6 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, insertScalingNodes(graphView); crossLayerEqualization(graphView); - Log::info(" Normalizing the parameters ..."); normalizeParameters(graphView); diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp index 527d853..4c17f9b 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/PTQ/PTQMetaOps.cpp @@ -61,23 +61,6 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli return metaopNode; } -std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name) -{ - std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); - - std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_Scaling" : ""); - - std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); - scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); - - std::shared_ptr<GraphView> graphView = Sequential({mulNode}); - std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); - - NodePtr metaopNode = MetaOperator("Scaling", connectedGraphView, {}, name); - - return metaopNode; -} - static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType) { std::shared_ptr<Node> mulNode = nullptr; @@ -88,9 +71,27 @@ static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, st return mulNode; } +void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) +{ + if(node->type() == "Mul" && node->attributes()->hasAttr("isScaling")) + { + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double previousScalingFactor = localTensor.get<double>(0); + std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); + node->input(1).first->getOperator()->setOutput(0, finalTensor); + } + else + { + Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); + } +} + + void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) { - if(metaOpNode->type() != "Scaling" && metaOpNode->type() != "Quantizer") + if(metaOpNode->type() != "Quantizer") Log::warn(" Cannot update the scaling factor on Node of type {}", metaOpNode->type()); std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); @@ -107,7 +108,7 @@ void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) double getScalingFactor(std::shared_ptr<Node> MetaOpNode) { - if (MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") { + if (MetaOpNode->type() != "Quantizer") { Log::warn(" Cannot get the scaling factor on Node of type {}", MetaOpNode->type()); return 0; } -- GitLab From a98dbceaad16441d7449022992f3885332e7aaf4 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Mon, 13 Jan 2025 15:43:30 +0000 Subject: [PATCH 06/26] Refactoring Scaling Metaop deletions by removing old getScalingFactor and updateScalingFactor; Adding clear tag isCompensation for Mul used as compensations nodes --- .../PTQ => operator}/PTQMetaOps.hpp | 8 ---- include/aidge/quantization/PTQ/PTQ.hpp | 8 ++++ include/aidge/quantization_version.h | 2 +- python_binding/pybind_PTQ.cpp | 2 +- src/PTQ/PTQ.cpp | 37 +++++++++++-------- src/{PTQ => operator}/PTQMetaOps.cpp | 18 +-------- 6 files changed, 33 insertions(+), 42 deletions(-) rename include/aidge/{quantization/PTQ => operator}/PTQMetaOps.hpp (86%) rename src/{PTQ => operator}/PTQMetaOps.cpp (84%) diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/operator/PTQMetaOps.hpp similarity index 86% rename from include/aidge/quantization/PTQ/PTQMetaOps.hpp rename to include/aidge/operator/PTQMetaOps.hpp index a8028c6..22fb71e 100644 --- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp +++ b/include/aidge/operator/PTQMetaOps.hpp @@ -37,14 +37,6 @@ namespace Aidge { /// @return A shared pointer to an instance of the meta-operator node. std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name); -/// @brief Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. -/// This function multiplies the existing scaling factor by a given coefficient. It verifies that the node is of the correct type ("Mul") -/// and has the `isScaling` attribute. If these conditions are not met, a warning is logged. -/// @param node A shared pointer to an `Aidge::Node` object representing the node to modify. -/// @param coeff A double representing the multiplication coefficient to apply to the scaling factor. -void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff); - - /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter. /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation. /// The meta-operator node must be a PTQ-specific operator, such as a Quantizer or Scaling node. diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index d2b8b7f..e7cbddd 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -74,6 +74,14 @@ namespace Aidge { */ bool checkArchitecture(std::shared_ptr<GraphView> graphView); + /** + * @brief This function multiplies the existing scaling factor by a given coefficient. It verifies that the node is of the correct type ("Mul") + * and has the `isScaling` attribute. If these conditions are not met, a warning is logged. + * @param node A shared pointer to an `Aidge::Node` object representing the node to modify. + * @param coeff A double representing the multiplication coefficient to apply to the scaling factor. + */ + void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff); + void prepareNetwork(std::shared_ptr<GraphView> graphView); diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index f14a045..740621a 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "8c89214"; +static constexpr const char * PROJECT_GIT_HASH = "b4af1ce"; } #endif // VERSION_H diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index 7f7c57d..2c25dc6 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -49,7 +49,7 @@ void init_PTQ(py::module &m) { :type network: :py:class:`aidge_core.GraphView` )mydelimiter"); - m.def( "multiply_scaling_factor",&multiplyScalingFactor,py::arg("node"), py::arg("coeff") + m.def( "multiply_scaling_factor",&multiplyScalingFactor,py::arg("node"), py::arg("coeff"), R"mydelimiter( Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. This function multiplies the existing scaling factor by a given coefficient. :param node: A node representing the node to modify. diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 2d431f6..23d9f01 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -12,7 +12,7 @@ #include "aidge/quantization/PTQ/CLE.hpp" #include "aidge/quantization/PTQ/Clipping.hpp" #include "aidge/quantization/PTQ/PTQ.hpp" -#include "aidge/quantization/PTQ/PTQMetaOps.hpp" +#include "aidge/operator/PTQMetaOps.hpp" #include "aidge/data/Tensor.hpp" @@ -72,6 +72,23 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } +void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) +{ + if(node->type() == "Mul" && node->attributes()->hasAttr("isScaling")) + { + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double previousScalingFactor = localTensor.get<double>(0); + std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); + node->input(1).first->getOperator()->setOutput(0, finalTensor); + } + else + { + Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); + } +} + static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) { auto mulOp = Mul_Op(); @@ -457,8 +474,6 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); - //double currScalingFactor = getScalingFactor(scalingNode); - //updateScalingFactor(scalingNode, currScalingFactor / rescaling); multiplyScalingFactor(scalingNode,1/rescaling); accumulatedRatios[mergingNode->name()] /= rescaling; // optional ... @@ -617,8 +632,6 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st // ValueRanges must contains all the scaling nodes !!! double scalingFactor = valueRanges[node->name()]; - //double currScalingFactor = getScalingFactor(node); - //updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor)); multiplyScalingFactor(node,1/(scalingFactor / prevScalingFactor)); scalingFactors[node->name()] = scalingFactor; @@ -661,9 +674,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name()); - - //double currScalingFactor = getScalingFactor(scalingNode); - //updateScalingFactor(scalingNode, currScalingFactor * rescaling); + multiplyScalingFactor(scalingNode,rescaling) ; } } @@ -863,8 +874,6 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - // double currScalingFactor = getScalingFactor(scalingNode); - // updateScalingFactor(scalingNode, currScalingFactor * rescaling); multiplyScalingFactor(scalingNode,rescaling) ; } @@ -880,8 +889,6 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - // double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming - // updateScalingFactor(scalingNode, currScalingFactor * rescaling); multiplyScalingFactor(scalingNode,rescaling) ; } @@ -951,6 +958,8 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); std::shared_ptr<Node> mulNode = Mul(mulNodeName); + + mulNode->attributes()->addAttr("isCompensation",0.0); mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) mulNode->getOperator()->setBackend("cpu"); @@ -982,9 +991,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool for (std::shared_ptr<Node> node : nodeVector) { - // TODO : use Compensation nodes instead of Mul nodes - - if (isAffine(node) || (node->type() == "Mul")) + if (isAffine(node) || (node->type() == "Mul" && node->attributes()->hasAttr("isCompensation"))) { std::shared_ptr<Node> scalingNode = (*node->getChildren().begin()); diff --git a/src/PTQ/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp similarity index 84% rename from src/PTQ/PTQMetaOps.cpp rename to src/operator/PTQMetaOps.cpp index 4c17f9b..facfed2 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -9,7 +9,7 @@ * ********************************************************************************/ -#include "aidge/quantization/PTQ/PTQMetaOps.hpp" +#include "aidge/operator/PTQMetaOps.hpp" #include <array> #include <memory> @@ -71,22 +71,6 @@ static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, st return mulNode; } -void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) -{ - if(node->type() == "Mul" && node->attributes()->hasAttr("isScaling")) - { - auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); - std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); - double previousScalingFactor = localTensor.get<double>(0); - std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); - node->input(1).first->getOperator()->setOutput(0, finalTensor); - } - else - { - Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); - } -} void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) -- GitLab From 48427337c51e5e257d2794d304af0bd5b777529b Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Mon, 13 Jan 2025 15:56:11 +0000 Subject: [PATCH 07/26] Changing include in python bindings --- python_binding/pybind_PTQ.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index 2c25dc6..61a3cb9 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -17,8 +17,6 @@ #include "aidge/quantization/PTQ/Clipping.hpp" #include "aidge/quantization/PTQ/CLE.hpp" #include "aidge/quantization/PTQ/PTQ.hpp" -#include "aidge/quantization/PTQ/PTQMetaOps.hpp" - #include "aidge/graph/GraphView.hpp" namespace py = pybind11; -- GitLab From 496491774df40049dcb9e11640514ba0de7956e2 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 15 Jan 2025 11:05:21 +0000 Subject: [PATCH 08/26] rebasing with dev --- include/aidge/quantization_version.h | 2 +- src/PTQ/PTQ.cpp | 48 ++++++++++++---------------- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 740621a..d773aa8 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "b4af1ce"; +static constexpr const char * PROJECT_GIT_HASH = "94747bf"; } #endif // VERSION_H diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 23d9f01..9dee442 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -283,6 +283,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); std::shared_ptr<Node> residualNode = Mul(residualNodeName); residualNode->attributes()->addAttr("isScaling", 0.0); + residualNode->attributes()->addAttr("isResidual", 0.0); //Adding the SF as a producer of the node std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {1.0}); @@ -944,43 +945,36 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u { // A merging node is always followed by a Quantizer node at this point - if (node->type() == "Quantizer") + if (node->type() == "Quantizer" && node->attributes()->hasAttr("isResidual")) { // check if the Quantizer is a residual one, and insert a compensation node if so ... + // create and insert the multplicative node before the Quantizer - bool prevNodeIsForking = ((node->getParent(0))->getChildren().size() > 1); - bool prevNodeIsAffine = isAffine(node->getParent(0)); - bool insertNode = prevNodeIsForking || !prevNodeIsAffine; - - if (insertNode) - { - // create and insert the multplicative node before the Quantizer - - std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); - std::shared_ptr<Node> mulNode = Mul(mulNodeName); - - mulNode->attributes()->addAttr("isCompensation",0.0); - mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) - mulNode->getOperator()->setBackend("cpu"); + std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); + std::shared_ptr<Node> mulNode = Mul(mulNodeName); + + mulNode->attributes()->addAttr("isCompensation",0.0); + mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + mulNode->getOperator()->setBackend("cpu"); - graphView->insertParent(node, mulNode, 0, 0, 0); + graphView->insertParent(node, mulNode, 0, 0, 0); - // Add the coeff producer to the multiplier node + // Add the coeff producer to the multiplier node - std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); - std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax}); - coeffProducer->getOperator()->setOutput(0, coeffTensor); + std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); + std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax}); + coeffProducer->getOperator()->setOutput(0, coeffTensor); - coeffProducer->getOperator()->setDataType(DataType::Float64); - coeffProducer->getOperator()->setBackend("cpu"); + coeffProducer->getOperator()->setDataType(DataType::Float64); + coeffProducer->getOperator()->setBackend("cpu"); - graphView->add(coeffProducer); // needed ? + graphView->add(coeffProducer); // needed ? - // Adapt the scaling factor value accordingly + // Adapt the scaling factor value accordingly - double currScalingFactor = getScalingFactor(node); - updateScalingFactor(node, currScalingFactor / signedMax); - } + double currScalingFactor = getScalingFactor(node); + updateScalingFactor(node, currScalingFactor / signedMax); + } } } -- GitLab From c9adaf08fdbbddd76a76e60d5811c2cc77660138 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Mon, 20 Jan 2025 15:53:11 +0000 Subject: [PATCH 09/26] Fixing isResidual bug in SSA when using tag; replacemnt of std::cout with Log::debug --- include/aidge/quantization_version.h | 2 +- src/PTQ/PTQ.cpp | 14 +++++++------- src/QAT/QAT_LSQ.cpp | 5 ++--- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index d773aa8..429e4bd 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "94747bf"; +static constexpr const char * PROJECT_GIT_HASH = "e464870"; } #endif // VERSION_H diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 9dee442..a81b2b7 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -14,7 +14,6 @@ #include "aidge/quantization/PTQ/PTQ.hpp" #include "aidge/operator/PTQMetaOps.hpp" - #include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/graph/Node.hpp" @@ -945,8 +944,9 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u { // A merging node is always followed by a Quantizer node at this point - if (node->type() == "Quantizer" && node->attributes()->hasAttr("isResidual")) + if (node->type() == "Quantizer" && (node->attributes()->hasAttr("isResidual") || !isAffine(node->getParent(0)))) { + // check if the Quantizer is a residual one, and insert a compensation node if so ... // create and insert the multplicative node before the Quantizer @@ -1048,7 +1048,7 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri auto scheduling = scheduler.getStaticScheduling(); for (auto node : scheduling) if (node->attributes()->hasAttr("isScaling")) - std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl; + Log::debug("{} range = {}",node->name(),valueRanges[node->name()]); } void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) @@ -1076,13 +1076,13 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, Log::info(" Computing the value ranges ..."); std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda); - //std::cout << " === RANGES (BEFORE ADJUST) ===" << std::endl; + //Log:debug("=== RANGES (BEFORE ADJUST) ==="); //printRanges(graphView, valueRanges); Log::info(" Optimizing the clipping values ..."); valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose); - //std::cout << " === RANGES (AFTER ADJUST) ===" << std::endl; + //Log:debug("=== RANGES (AFTER ADJUST) ==="); //printRanges(graphView, valueRanges); Log::info(" Normalizing the activations ..."); @@ -1103,7 +1103,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (verbose) printScalingFactors(graphView); - //std::cout << " === SCALINGS (BEFORE CAST) ===" << std::endl; + //Log::debug(" === SCALINGS (BEFORE CAST) ==="); //printScalingFactors(graphView); setupDataType(graphView, inputDataSet, initialDataType); @@ -1111,7 +1111,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (useCuda) graphView->setBackend("cuda"); - //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; + //Log::debug(" === SCALINGS (AFTER CAST) ==="); //printScalingFactors(graphView); Log::info(" Reseting the scheduler ..."); diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 04f2027..8a42770 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -13,7 +13,6 @@ #include "aidge/operator/LSQ.hpp" #include "aidge/operator/ReLU.hpp" - #include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/scheduler/SequentialScheduler.hpp" @@ -51,7 +50,7 @@ static bool initStepSize(std::shared_ptr<Node> quantizer) stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); - std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl; + Log::debug("[ INIT STEP SIZE = {} ]",stepSize); return false; } @@ -138,7 +137,7 @@ void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBi void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) { float mean = (tensor->mean()).get<float> (0); - std::cout << " MEAN = " << mean << std::endl; + Log::debug("MEAN = {}",mean); } } \ No newline at end of file -- GitLab From f1eb07af4e073ace093647ae7d80e4481d2eb9aa Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Fri, 17 Jan 2025 15:29:47 +0000 Subject: [PATCH 10/26] Starting Work on adding Scaling Nodes (Tagged Mul) below Producers --- aidge_quantization/_version.py | 2 +- include/aidge/quantization/PTQ/PTQ.hpp | 1 + src/PTQ/CLE.cpp | 43 +++++- src/PTQ/PTQ.cpp | 178 +++++++++++++++++++++---- 4 files changed, 193 insertions(+), 31 deletions(-) diff --git a/aidge_quantization/_version.py b/aidge_quantization/_version.py index d4ec20e..2d34d35 100644 --- a/aidge_quantization/_version.py +++ b/aidge_quantization/_version.py @@ -1,4 +1,4 @@ # file generated by setuptools_scm # don't change, don't track in version control __version__ = version = '0.2.1.dev60+g8044e79.d20250106' -__version_tuple__ = version_tuple = (0, 2, 1, 'dev60', 'g8044e79.d20250106') +__version_tuple__ = version_tuple = (0, 2, 1, 'dev60', 'g8044e79.d20250106') \ No newline at end of file diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index e7cbddd..74a49c8 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -66,6 +66,7 @@ namespace Aidge { * @return The scheduled vector of nodes */ std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> graphView, bool newSchedule = true, bool verbose = false); + bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView); /** * @brief Determine whether an input GraphView can be quantized or not. diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 0fe9575..d0383eb 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -130,17 +130,48 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD { std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; + std::cout << "CLE\n"; + std::cout << "node name is: " << n1->name() << std::endl; + std::cout << "node name is: " << n2->name() << std::endl; + std::cout << "node parent name is: " << n1->name() << std::endl; + std::cout << "node parent name is: " << n2->name() << std::endl; + + std::shared_ptr<Aidge::Tensor> n1localTensor, n2localTensor; + if(n1->getParent(1)->attributes()->hasAttr("isProducerScaling")) + { + std::static_pointer_cast<OperatorTensor>(n1->getParent(1)->getOperator())->getOutput(0)->print(); + n1localTensor = std::static_pointer_cast<OperatorTensor>(n1->getParent(1)->getOperator())->getOutput(0); + } + else + { + n1localTensor = getWeightTensor(n1); + } + + if(n2->getParent(1)->attributes()->hasAttr("isProducerScaling")) + { + n2localTensor = std::static_pointer_cast<OperatorTensor>(n2->getParent(1)->getOperator())->getOutput(0); + + } + else + { + n2localTensor = getWeightTensor(n2); + } + + double r1 = getTensorAbsoluteMax(n1localTensor); + double r2 = getTensorAbsoluteMax(n2localTensor); + std::cout << "valeur: " << r1 <<std::endl; + std::cout << "valeur: " << r2 <<std::endl; - double r1 = getTensorAbsoluteMax(getWeightTensor(n1)); - double r2 = getTensorAbsoluteMax(getWeightTensor(n2)); double s1 = std::sqrt(r1 * r2) / r1; double s2 = std::sqrt(r1 * r2) / r2; - rescaleTensor(getWeightTensor(n1), s1); - rescaleTensor(getWeightTensor(n2), s2); - - rescaleTensor(getBiasTensor(n1), s1); + //rescaleTensor(getWeightTensor(n1), s1); + insertScalingBelowProducer(n1->getParent(1),s1,graphView); + //rescaleTensor(getWeightTensor(n2), s2); + insertScalingBelowProducer(n2->getParent(1),s2,graphView); + //rescaleTensor(getBiasTensor(n1), s1); + insertScalingBelowProducer(n1->getParent(2),s1,graphView); double rangeDelta = std::abs(r1 - r2); if (rangeDelta > maxRangeDelta) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index a81b2b7..25e5f20 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -54,6 +54,120 @@ bool isMerging(std::shared_ptr<Node> node) { return (mergingNodeTypes.find(node->type()) != mergingNodeTypes.end()); } +static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> parentNode) +{ + int index = 0; + while (node->getParent(index) != parentNode) + index++; + return index; +} + +void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) +{ + if(node->type() == "Mul" && node->attributes()->hasAttr("isProducerScaling")) + { + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double previousScalingFactor = localTensor.get<double>(0); + std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); + node->input(1).first->getOperator()->setOutput(0, finalTensor); + } + else + { + Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); + } +} +bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView) +{ + std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); + roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + roundNode->getOperator()->setBackend("cpu"); + + if (node->getChildren().size() > 0) + { + // SCALING NODE INSERTION + + // We always have one output from Affine and Add nodes, but possibly multiple childs + std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); + + // For each node in nextNodes store the connexion index + std::vector<int> inputIndices(nextNodes.size()); + for (std::size_t i = 0; i < nextNodes.size(); i++) + inputIndices[i] = getInputIndex(nextNodes[i], node); + + for (std::shared_ptr<Node> nextNode : nextNodes) + node->removeChild(nextNode, 0); + + node->addChild(roundNode, 0, 0); + + for (std::size_t i = 0; i < nextNodes.size(); i++) + roundNode->addChild(nextNodes[i], 0, inputIndices[i]); + graphView->add(roundNode); + } + else + { + Log::warn("Unusual producer "); + node->addChild(roundNode, 0, 0); + graphView->add(roundNode); + } + return true; +} +bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView) +{ + if(node->attributes()->hasAttr("isProducerScaling")) + { + multiplyScalingFactor(node,sf); + return true; + } + if(node->type() != "Producer") + { + Log::warn(" Cannot apply a scaling factor on a node which is not a producer", node->type()); + return false; + } + std::string scalingNodeName = makeUniqueName(node->name() + "_ProducerScaling", graphView); + + std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName); + scalingNode->attributes()->addAttr("isProducerScaling",0.0); + + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {sf}); + std::shared_ptr<Node> scalingFactorProducer = addProducer(scalingNode, 1, {1}, "Factor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); + graphView->add(scalingFactorProducer); + + scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + scalingNode->getOperator()->setBackend("cpu"); + + if (node->getChildren().size() > 0) + { + // SCALING NODE INSERTION + + // We always have one output from Affine and Add nodes, but possibly multiple childs + std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); + + // For each node in nextNodes store the connexion index + std::vector<int> inputIndices(nextNodes.size()); + for (std::size_t i = 0; i < nextNodes.size(); i++) + inputIndices[i] = getInputIndex(nextNodes[i], node); + + for (std::shared_ptr<Node> nextNode : nextNodes) + node->removeChild(nextNode, 0); + + node->addChild(scalingNode, 0, 0); + + for (std::size_t i = 0; i < nextNodes.size(); i++) + scalingNode->addChild(nextNodes[i], 0, inputIndices[i]); + + graphView->add(scalingNode); + } + else + { + Log::warn("Unusual producer "); + node->addChild(scalingNode, 0, 0); + graphView->add(scalingNode); + } + return true; +} bool checkArchitecture(std::shared_ptr<GraphView> graphView) { @@ -167,6 +281,15 @@ static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::s return remainingNodes; } +static std::vector<std::shared_ptr<Node>> removeProdScalingNodes(std::vector<std::shared_ptr<Node>> nodeVector) +{ + std::vector<std::shared_ptr<Node>> remainingNodes; + for (std::shared_ptr<Node> node : nodeVector) + if (!node->attributes()->hasAttr("isProducerScaling")) + remainingNodes.push_back(node); + + return remainingNodes; +} static void fixScheduling(std::vector<std::shared_ptr<Node>>& nodeVector) { @@ -211,6 +334,7 @@ std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> fixScheduling(nodeVector); nodeVector = removeMatchingNodes(nodeVector, "Producer"); + nodeVector = removeProdScalingNodes(nodeVector); if (verbose) { @@ -300,13 +424,6 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) } } -static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> parentNode) -{ - int index = 0; - while (node->getParent(index) != parentNode) - index++; - return index; -} void insertScalingNodes(std::shared_ptr<GraphView> graphView) { @@ -429,7 +546,8 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); double scaling = getTensorAbsoluteMax(weightTensor); double ratio = 1.0 / scaling; - rescaleTensor(weightTensor, ratio); + //rescaleTensor(weightTensor, ratio); + insertScalingBelowProducer(node->getParent(1),ratio,graphView); // Accumulate the ratio if (node == firstNode) @@ -447,7 +565,8 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) if (nodeHasBias(node)) { std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - rescaleTensor(biasTensor, accumulatedRatios[node->name()] ); + //rescaleTensor(biasTensor, accumulatedRatios[node->name()] ); + insertScalingBelowProducer(node->getParent(2),accumulatedRatios[node->name()],graphView); } } @@ -606,7 +725,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st for (std::shared_ptr<Node> node : nodeVector) { // Seamless scaling factor propagation ... - + if (isAffine(node) || isSeamless(node) || node->type() == "ReLU") { if (node == firstNode) @@ -620,11 +739,13 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st } } + // Here prevNode is either a 'Affine' or a 'Merging' // => do not split the cases, just handle the bias ... if (node->attributes()->hasAttr("isScaling")) { + // retrieve the previous scaling factor ... std::shared_ptr<Node> prevNode = node->getParent(0); double prevScalingFactor = scalingFactors[prevNode->name()]; @@ -640,11 +761,13 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st if (isAffine(prevNode)) { + bool prevNodeHasBias = nodeHasBias(prevNode); if (prevNodeHasBias) - { + { std::shared_ptr<Tensor> biasTensor = getBiasTensor(prevNode); - rescaleTensor(biasTensor, 1.0 / prevScalingFactor); + //rescaleTensor(biasTensor, 1.0 / prevScalingFactor); + insertScalingBelowProducer(prevNode->getParent(2),1.0 / prevScalingFactor,graphView); } } } @@ -842,10 +965,12 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ // Rescale the weight tensor std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - rescaleTensor(weightTensor, signedMax); + //rescaleTensor(weightTensor, signedMax); + insertScalingBelowProducer(node->getParent(1),signedMax,graphView); if (!noQuant) - roundTensor(weightTensor); + insertRoundBelowProducer(node->getParent(1),graphView); + //roundTensor(weightTensor); // Rescale the bias tensor @@ -856,10 +981,12 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - rescaleTensor(biasTensor, rescaling); + //rescaleTensor(biasTensor, rescaling); + insertScalingBelowProducer(node->getParent(2),rescaling,graphView); if (!noQuant) - roundTensor(biasTensor); + insertRoundBelowProducer(node->getParent(2),graphView); + //roundTensor(biasTensor); } // Compensate the rescaling using the next Scaling node @@ -997,17 +1124,20 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool double ratio = base / approx; - std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - rescaleTensor(weightTensor, ratio); + //std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); + //rescaleTensor(weightTensor, ratio); + insertScalingBelowProducer(node->getParent(1),ratio,graphView); if (!noQuant) - roundTensor(weightTensor); + insertRoundBelowProducer(node->getParent(1),graphView); if (nodeHasBias(node)) { - std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - rescaleTensor(biasTensor, ratio); + //std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); + //rescaleTensor(biasTensor, ratio); + insertScalingBelowProducer(node->getParent(2),ratio,graphView); + if (!noQuant) - roundTensor(biasTensor); + insertRoundBelowProducer(node->getParent(2),graphView); } } } @@ -1084,7 +1214,6 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, //Log:debug("=== RANGES (AFTER ADJUST) ==="); //printRanges(graphView, valueRanges); - Log::info(" Normalizing the activations ..."); normalizeActivations(graphView, valueRanges); @@ -1143,7 +1272,8 @@ void clearBiases(std::shared_ptr<GraphView> graphView) for (std::shared_ptr<Node> node : graphView->getNodes()) { if (node->type() == "FC" || node->type() == "Conv2D") { std::shared_ptr<Tensor> biasTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2); - rescaleTensor(biasTensor, 0); + //rescaleTensor(biasTensor, 0); + insertScalingBelowProducer(node->getParent(2),0,graphView); } } } -- GitLab From cf51e87cbaf34ea4372a2cfdf64ce9d32b3bfc28 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Mon, 20 Jan 2025 14:22:55 +0000 Subject: [PATCH 11/26] Correction the Single Shift Approximation error with the new method for updating weight and bias --- include/aidge/quantization_version.h | 2 +- src/PTQ/PTQ.cpp | 13 ++++++++++++- src/operator/PTQMetaOps.cpp | 16 ++++++++-------- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 429e4bd..37853e3 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "e464870"; +static constexpr const char * PROJECT_GIT_HASH = "03286c7"; } #endif // VERSION_H diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 25e5f20..fe2aef4 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -111,18 +111,27 @@ bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphVi node->addChild(roundNode, 0, 0); graphView->add(roundNode); } + roundNode->attributes()->addAttr("isProducerRounding",0.0); return true; } bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView) { + if(node->attributes()->hasAttr("isProducerRounding")) + { + //In this case we 'bump' the node to the one above him (an actual ProducerScaling) + // because the round node is not usable (only used when SSA is enabled) + node = node->getParent(0); + } if(node->attributes()->hasAttr("isProducerScaling")) { + // We accumulate the multiples scaling factors by multiplying the SF of the ProducerScaling node + // (adding new nodes each time would make the graph unusable) multiplyScalingFactor(node,sf); return true; } if(node->type() != "Producer") { - Log::warn(" Cannot apply a scaling factor on a node which is not a producer", node->type()); + Log::warn(" Cannot apply a scaling factor on a node which is not a producer on a node of type {} whose name is {}", node->type(),node->name()); return false; } std::string scalingNodeName = makeUniqueName(node->name() + "_ProducerScaling", graphView); @@ -1126,6 +1135,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool //std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); //rescaleTensor(weightTensor, ratio); + Log::warn("A\n"); insertScalingBelowProducer(node->getParent(1),ratio,graphView); if (!noQuant) insertRoundBelowProducer(node->getParent(1),graphView); @@ -1134,6 +1144,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool { //std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); //rescaleTensor(biasTensor, ratio); + Log::warn("B\n"); insertScalingBelowProducer(node->getParent(2),ratio,graphView); if (!noQuant) diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index facfed2..105d4e8 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -75,8 +75,8 @@ static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, st void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) { - if(metaOpNode->type() != "Quantizer") - Log::warn(" Cannot update the scaling factor on Node of type {}", metaOpNode->type()); + if(metaOpNode->type() != "Scaling" && metaOpNode->type() != "Quantizer") + Log::warn("Cannot update the scaling factor on Node of type {}", metaOpNode->type()); std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); @@ -85,15 +85,15 @@ void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); if (!mulNode) - Log::warn(" Invalid PTQ MetaOperator, no Mul node found inside ! "); + Log::warn("Invalid PTQ MetaOperator, no Mul node found inside ! "); mulNode->input(1).first->getOperator()->setOutput(0, scalingFactorTensor); } double getScalingFactor(std::shared_ptr<Node> MetaOpNode) { - if (MetaOpNode->type() != "Quantizer") { - Log::warn(" Cannot get the scaling factor on Node of type {}", MetaOpNode->type()); + if (MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") { + Log::warn("Cannot get the scaling factor on Node of type {}", MetaOpNode->type()); return 0; } @@ -102,7 +102,7 @@ double getScalingFactor(std::shared_ptr<Node> MetaOpNode) std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); if (!mulNode) { - Log::warn(" Invalid PTQ MetaOperator, no Mul found inside node of type {}", MetaOpNode->type()); + Log::warn("Invalid PTQ MetaOperator, no Mul found inside node of type {}", MetaOpNode->type()); return 0; } @@ -117,7 +117,7 @@ double getScalingFactor(std::shared_ptr<Node> MetaOpNode) void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max) { if (quantizerNode->type() != "Quantizer") { - Log::warn(" Cannot set the clipping range on Node of type {}", quantizerNode->type()); + Log::warn("Cannot set the clipping range on Node of type {}", quantizerNode->type()); return; } @@ -126,7 +126,7 @@ void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max) std::shared_ptr<Node> clipNode = getSubNode(metaOp->getMicroGraph(), "Clip"); if (!clipNode) { - Log::warn(" Invalid PTQ MetaOperator, no Clip found inside node of type {}", quantizerNode->type()); + Log::warn("Invalid PTQ MetaOperator, no Clip found inside node of type {}", quantizerNode->type()); return; } -- GitLab From a749505df0e1632345a5ddfb2fd6f38436ab9f83 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Tue, 21 Jan 2025 14:15:26 +0000 Subject: [PATCH 12/26] Fixing bug related to the lower result in resnet(switching the network to float64 solved it --- include/aidge/quantization_version.h | 2 +- src/PTQ/CLE.cpp | 1 + src/PTQ/PTQ.cpp | 22 +--------------------- 3 files changed, 3 insertions(+), 22 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 37853e3..2e53dfc 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "03286c7"; +static constexpr const char * PROJECT_GIT_HASH = "01880af"; } #endif // VERSION_H diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index d0383eb..d47a2c2 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -14,6 +14,7 @@ #include "aidge/quantization/PTQ/PTQ.hpp" #include "aidge/graph/GraphView.hpp" + #include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/scheduler/Scheduler.hpp" #include "aidge/utils/Log.hpp" diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index fe2aef4..60326e8 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -363,7 +363,6 @@ static std::shared_ptr<Node> getFirstNode(std::shared_ptr<GraphView> graphView) void prepareNetwork(std::shared_ptr<GraphView> graphView) { removeFlatten(graphView); - sanitizeNodeNames(graphView); bool containsBatchNorm = false; @@ -972,30 +971,23 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ if (isAffine(node)) { // Rescale the weight tensor - std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - //rescaleTensor(weightTensor, signedMax); insertScalingBelowProducer(node->getParent(1),signedMax,graphView); if (!noQuant) insertRoundBelowProducer(node->getParent(1),graphView); - //roundTensor(weightTensor); // Rescale the bias tensor - if (nodeHasBias(node)) { bool inputIsUnsigned = signMap[node->name()].first; double rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax; - - + std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - //rescaleTensor(biasTensor, rescaling); insertScalingBelowProducer(node->getParent(2),rescaling,graphView); if (!noQuant) insertRoundBelowProducer(node->getParent(2),graphView); - //roundTensor(biasTensor); } // Compensate the rescaling using the next Scaling node @@ -1133,18 +1125,12 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool double ratio = base / approx; - //std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - //rescaleTensor(weightTensor, ratio); - Log::warn("A\n"); insertScalingBelowProducer(node->getParent(1),ratio,graphView); if (!noQuant) insertRoundBelowProducer(node->getParent(1),graphView); if (nodeHasBias(node)) { - //std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - //rescaleTensor(biasTensor, ratio); - Log::warn("B\n"); insertScalingBelowProducer(node->getParent(2),ratio,graphView); if (!noQuant) @@ -1289,10 +1275,4 @@ void clearBiases(std::shared_ptr<GraphView> graphView) } } -void devPTQ(std::shared_ptr<GraphView> graphView) -{ - for (std::shared_ptr<Node> node : graphView->getNodes()) - fmt::println(" UUU : {}", node->name()); -} - } -- GitLab From 5ec65431e486d00adb4ca7ac432786a0b7467858 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 22 Jan 2025 10:27:01 +0000 Subject: [PATCH 13/26] Rebasing on dev --- include/aidge/quantization_version.h | 2 +- src/PTQ/CLE.cpp | 9 +-------- src/PTQ/PTQ.cpp | 22 +--------------------- 3 files changed, 3 insertions(+), 30 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 2e53dfc..5a7e98b 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "01880af"; +static constexpr const char * PROJECT_GIT_HASH = "a749505"; } #endif // VERSION_H diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index d47a2c2..52e4ec0 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -45,7 +45,7 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) mulOp.setDataType(tensor->dataType()); mulOp.setBackend(tensor->backend()); - std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling}); scalingTensor->setDataType(tensor->dataType()); scalingTensor->setBackend(tensor->backend()); @@ -131,11 +131,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD { std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; - std::cout << "CLE\n"; - std::cout << "node name is: " << n1->name() << std::endl; - std::cout << "node name is: " << n2->name() << std::endl; - std::cout << "node parent name is: " << n1->name() << std::endl; - std::cout << "node parent name is: " << n2->name() << std::endl; std::shared_ptr<Aidge::Tensor> n1localTensor, n2localTensor; if(n1->getParent(1)->attributes()->hasAttr("isProducerScaling")) @@ -160,8 +155,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD double r1 = getTensorAbsoluteMax(n1localTensor); double r2 = getTensorAbsoluteMax(n2localTensor); - std::cout << "valeur: " << r1 <<std::endl; - std::cout << "valeur: " << r2 <<std::endl; double s1 = std::sqrt(r1 * r2) / r1; diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 60326e8..108be02 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -26,11 +26,8 @@ #include "aidge/operator/ReLU.hpp" #include "aidge/operator/BatchNorm.hpp" #include "aidge/operator/Conv.hpp" - #include "aidge/operator/ArgMax.hpp" -#include "aidge/operator/Abs.hpp" #include "aidge/operator/Reshape.hpp" -#include "aidge/operator/Round.hpp" #include "aidge/recipes/Recipes.hpp" @@ -64,7 +61,7 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) { - if(node->type() == "Mul" && node->attributes()->hasAttr("isProducerScaling")) + if(node->type() == "Mul" && (node->attributes()->hasAttr("isProducerScaling") || node->attributes()->hasAttr("isScaling"))) { auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); std::shared_ptr<Tensor> fallback; @@ -194,23 +191,6 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } -void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) -{ - if(node->type() == "Mul" && node->attributes()->hasAttr("isScaling")) - { - auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); - std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); - double previousScalingFactor = localTensor.get<double>(0); - std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); - node->input(1).first->getOperator()->setOutput(0, finalTensor); - } - else - { - Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); - } -} - static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) { auto mulOp = Mul_Op(); -- GitLab From c374ce49cd3a60cab4521c1fb4b10abc8d1e6f43 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 22 Jan 2025 13:06:51 +0000 Subject: [PATCH 14/26] Correcting Log::warn into AIDGE_ASSERT to make the code safer --- include/aidge/quantization/PTQ/PTQ.hpp | 21 ++++- include/aidge/quantization_version.h | 2 +- src/PTQ/PTQ.cpp | 110 +++++++++---------------- 3 files changed, 61 insertions(+), 72 deletions(-) diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index 74a49c8..e1ef529 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -66,7 +66,26 @@ namespace Aidge { * @return The scheduled vector of nodes */ std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> graphView, bool newSchedule = true, bool verbose = false); - bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView); + + /** + * @brief Inserts a scaling node below the given producer node in the graph view. + * If the node is already a producer scaling node, it accumulates the scaling factor by multiplyins its value directly. + * + * @param node A shared pointer to the producer node where the scaling node will be inserted (below). + * @param scalingFactor The scaling factor to apply. + * @param graphView A shared pointer to the graph view in which the nodes are located. + * @return True if the scaling node was successfully inserted or the scaling factor was accumulated; False otherwise. + */ + bool insertScalingBelowProducer(std::shared_ptr<Node> node, double scalingFactor, std::shared_ptr<GraphView> graphView); + + /** + * @brief Inserts a rounding node below the given producer (also below its ows producerScaling) node in the graph view. + * + * @param node A shared pointer to the producer node where the rounding node will be inserted. + * @param graphView A shared pointer to the graph view in which the nodes are located. + * @return True if the rounding node was successfully inserted; False otherwise. + */ + bool insertRoundBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphView> graphView); /** * @brief Determine whether an input GraphView can be quantized or not. diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 5a7e98b..9b4e3de 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "a749505"; +static constexpr const char * PROJECT_GIT_HASH = "5ec6543"; } #endif // VERSION_H diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 108be02..bda0ae1 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -61,57 +61,39 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) { - if(node->type() == "Mul" && (node->attributes()->hasAttr("isProducerScaling") || node->attributes()->hasAttr("isScaling"))) - { - auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); - std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); - double previousScalingFactor = localTensor.get<double>(0); - std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); - node->input(1).first->getOperator()->setOutput(0, finalTensor); - } - else - { - Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); - } + AIDGE_ASSERT(node->type() == "Mul" && (node->attributes()->hasAttr("isProducerScaling") || node->attributes()->hasAttr("isScaling")), + "Cannot update the scaling factor on Node of type {} with no scaling tag",node->type()); + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double previousScalingFactor = localTensor.get<double>(0); + std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); + node->input(1).first->getOperator()->setOutput(0, finalTensor); } bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView) { std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) roundNode->getOperator()->setBackend("cpu"); - - if (node->getChildren().size() > 0) - { - // SCALING NODE INSERTION + AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a scaling node."); + std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); + std::vector<int> inputIndices(nextNodes.size()); + for (std::size_t i = 0; i < nextNodes.size(); i++) + inputIndices[i] = getInputIndex(nextNodes[i], node); - // We always have one output from Affine and Add nodes, but possibly multiple childs - std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); + for (std::shared_ptr<Node> nextNode : nextNodes) + node->removeChild(nextNode, 0); - // For each node in nextNodes store the connexion index - std::vector<int> inputIndices(nextNodes.size()); - for (std::size_t i = 0; i < nextNodes.size(); i++) - inputIndices[i] = getInputIndex(nextNodes[i], node); - - for (std::shared_ptr<Node> nextNode : nextNodes) - node->removeChild(nextNode, 0); - - node->addChild(roundNode, 0, 0); + node->addChild(roundNode, 0, 0); - for (std::size_t i = 0; i < nextNodes.size(); i++) - roundNode->addChild(nextNodes[i], 0, inputIndices[i]); - graphView->add(roundNode); - } - else - { - Log::warn("Unusual producer "); - node->addChild(roundNode, 0, 0); + for (std::size_t i = 0; i < nextNodes.size(); i++) + roundNode->addChild(nextNodes[i], 0, inputIndices[i]); graphView->add(roundNode); - } + roundNode->attributes()->addAttr("isProducerRounding",0.0); return true; } -bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView) +bool insertScalingBelowProducer(std::shared_ptr<Node> node,double scalingFactor, std::shared_ptr<GraphView> graphView) { if(node->attributes()->hasAttr("isProducerRounding")) { @@ -123,55 +105,39 @@ bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::share { // We accumulate the multiples scaling factors by multiplying the SF of the ProducerScaling node // (adding new nodes each time would make the graph unusable) - multiplyScalingFactor(node,sf); + multiplyScalingFactor(node,scalingFactor); return true; } - if(node->type() != "Producer") - { - Log::warn(" Cannot apply a scaling factor on a node which is not a producer on a node of type {} whose name is {}", node->type(),node->name()); - return false; - } + AIDGE_ASSERT(node->type() == "Producer","Cannot apply a scaling factor on node of type: {} which is not a producer", node->type()); std::string scalingNodeName = makeUniqueName(node->name() + "_ProducerScaling", graphView); std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName); scalingNode->attributes()->addAttr("isProducerScaling",0.0); - std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {sf}); + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); std::shared_ptr<Node> scalingFactorProducer = addProducer(scalingNode, 1, {1}, "Factor"); scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); graphView->add(scalingFactorProducer); scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) scalingNode->getOperator()->setBackend("cpu"); + AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a scaling node."); + std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); - if (node->getChildren().size() > 0) - { - // SCALING NODE INSERTION + // For each node in nextNodes store the connexion index + std::vector<int> inputIndices(nextNodes.size()); + for (std::size_t i = 0; i < nextNodes.size(); i++) + inputIndices[i] = getInputIndex(nextNodes[i], node); - // We always have one output from Affine and Add nodes, but possibly multiple childs - std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); + for (std::shared_ptr<Node> nextNode : nextNodes) + node->removeChild(nextNode, 0); - // For each node in nextNodes store the connexion index - std::vector<int> inputIndices(nextNodes.size()); - for (std::size_t i = 0; i < nextNodes.size(); i++) - inputIndices[i] = getInputIndex(nextNodes[i], node); - - for (std::shared_ptr<Node> nextNode : nextNodes) - node->removeChild(nextNode, 0); + node->addChild(scalingNode, 0, 0); - node->addChild(scalingNode, 0, 0); + for (std::size_t i = 0; i < nextNodes.size(); i++) + scalingNode->addChild(nextNodes[i], 0, inputIndices[i]); - for (std::size_t i = 0; i < nextNodes.size(); i++) - scalingNode->addChild(nextNodes[i], 0, inputIndices[i]); - - graphView->add(scalingNode); - } - else - { - Log::warn("Unusual producer "); - node->addChild(scalingNode, 0, 0); - graphView->add(scalingNode); - } + graphView->add(scalingNode); return true; } @@ -1254,5 +1220,9 @@ void clearBiases(std::shared_ptr<GraphView> graphView) } } } - +void devPTQ(std::shared_ptr<GraphView> graphView) +{ + for (std::shared_ptr<Node> node : graphView->getNodes()) + Log::debug(" UUU : {}", node->name()); +} } -- GitLab From f255dcad5d34ec2f62eacd52301c7b9377ef4b29 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 22 Jan 2025 14:36:02 +0000 Subject: [PATCH 15/26] Changing the CLE to fit with the new method of ProducerScaling --- include/aidge/quantization_version.h | 2 +- src/PTQ/CLE.cpp | 38 ++++++++++------------------ 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 9b4e3de..eba0eab 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "5ec6543"; +static constexpr const char * PROJECT_GIT_HASH = "c374ce4"; } #endif // VERSION_H diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 52e4ec0..eb5ca7a 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -94,6 +94,16 @@ static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) return flatTensor->get<double>(maxIndex); } +//Function used to extraxt the local tensor (from a ProducerScalingNode) +std::shared_ptr<Aidge::Tensor> getLocalTensor(std::shared_ptr<Node> node) { + if (node->getParent(1)->attributes()->hasAttr("isProducerScaling")) { + std::shared_ptr<Aidge::OperatorTensor> operatorTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getOperator()); + operatorTensor->forward();// We need the forward pass to compute the scaled value of the Tensor + return operatorTensor->getOutput(0); + } else { + return getWeightTensor(node); + } +} void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta) { @@ -132,39 +142,17 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; - std::shared_ptr<Aidge::Tensor> n1localTensor, n2localTensor; - if(n1->getParent(1)->attributes()->hasAttr("isProducerScaling")) - { - std::static_pointer_cast<OperatorTensor>(n1->getParent(1)->getOperator())->getOutput(0)->print(); - n1localTensor = std::static_pointer_cast<OperatorTensor>(n1->getParent(1)->getOperator())->getOutput(0); - } - else - { - n1localTensor = getWeightTensor(n1); - } - - if(n2->getParent(1)->attributes()->hasAttr("isProducerScaling")) - { - n2localTensor = std::static_pointer_cast<OperatorTensor>(n2->getParent(1)->getOperator())->getOutput(0); - - } - else - { - n2localTensor = getWeightTensor(n2); - } - + std::shared_ptr<Aidge::Tensor> n1localTensor = getLocalTensor(n1); + std::shared_ptr<Aidge::Tensor> n2localTensor = getLocalTensor(n2); + double r1 = getTensorAbsoluteMax(n1localTensor); double r2 = getTensorAbsoluteMax(n2localTensor); - double s1 = std::sqrt(r1 * r2) / r1; double s2 = std::sqrt(r1 * r2) / r2; - //rescaleTensor(getWeightTensor(n1), s1); insertScalingBelowProducer(n1->getParent(1),s1,graphView); - //rescaleTensor(getWeightTensor(n2), s2); insertScalingBelowProducer(n2->getParent(1),s2,graphView); - //rescaleTensor(getBiasTensor(n1), s1); insertScalingBelowProducer(n1->getParent(2),s1,graphView); double rangeDelta = std::abs(r1 - r2); -- GitLab From f0f9e607707966ca796dfcb4636a1354dc5568c0 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Thu, 23 Jan 2025 11:37:34 +0000 Subject: [PATCH 16/26] Minor refactorization of PTQ.cpp (deleting de^recated functions ) --- src/PTQ/PTQ.cpp | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index bda0ae1..28bd587 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -75,7 +75,7 @@ bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphVi std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) roundNode->getOperator()->setBackend("cpu"); - AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a scaling node."); + AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a rounding node."); std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); std::vector<int> inputIndices(nextNodes.size()); for (std::size_t i = 0; i < nextNodes.size(); i++) @@ -157,38 +157,6 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } -static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) -{ - auto mulOp = Mul_Op(); - mulOp.setDataType(tensor->dataType()); - mulOp.setBackend(tensor->backend()); - - std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); - scalingTensor->setDataType(tensor->dataType()); - scalingTensor->setBackend(tensor->backend()); - - mulOp.associateInput(0, tensor); - mulOp.associateInput(1, scalingTensor); - - mulOp.forward(); - - auto outTensor = mulOp.getOutput(0); - *tensor = *outTensor; -} - -static void roundTensor(std::shared_ptr<Tensor> tensor) -{ - auto roundOp = Round_Op(); - roundOp.setDataType(tensor->dataType()); - roundOp.setBackend(tensor->backend()); - - roundOp.associateInput(0, tensor); - roundOp.forward(); - - auto outTensor = roundOp.getOutput(0); - *tensor = *outTensor; -} - // TODO : make the retreival of argmax values backend independant (refCastFrom) static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) { -- GitLab From 487718d2508ffe898b253bab61ce583341401813 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Thu, 23 Jan 2025 12:08:34 +0000 Subject: [PATCH 17/26] Refactorizing the code to add the function InsertBetween to easily insert a node between 2 already connected --- include/aidge/quantization_version.h | 2 +- src/PTQ/PTQ.cpp | 79 +++++++++++----------------- 2 files changed, 33 insertions(+), 48 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index eba0eab..909ab28 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "c374ce4"; +static constexpr const char * PROJECT_GIT_HASH = "f0f9e60"; } #endif // VERSION_H diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 28bd587..92049a2 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -70,25 +70,42 @@ void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); node->input(1).first->getOperator()->setOutput(0, finalTensor); } +/* Util function to insert a node below another one already connected */ +void insertNodeBetween(std::shared_ptr<Node> parent, + std::shared_ptr<Node> newNode, + std::shared_ptr<GraphView> graphView) +{ + // Checking the parents always have at least 1 children + AIDGE_ASSERT(parent->getChildren().size() > 0, "The parent node must have at least one child to insert a new node."); + + // Retrieve children connection indexes + std::vector<std::shared_ptr<Node>> nextNodes = parent->getChildren(0); + std::vector<int> inputIndices(nextNodes.size()); + for (std::size_t i = 0; i < nextNodes.size(); i++) { + inputIndices[i] = getInputIndex(nextNodes[i], parent); + } + + // Disconnect childs from parent + for (std::shared_ptr<Node> nextNode : nextNodes) { + parent->removeChild(nextNode, 0); + } + + // Insert the new node between the child and the parent + parent->addChild(newNode, 0, 0); + for (std::size_t i = 0; i < nextNodes.size(); i++) { + newNode->addChild(nextNodes[i], 0, inputIndices[i]); + } + + graphView->add(newNode); +} + bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView) { std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) roundNode->getOperator()->setBackend("cpu"); - AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a rounding node."); - std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); - std::vector<int> inputIndices(nextNodes.size()); - for (std::size_t i = 0; i < nextNodes.size(); i++) - inputIndices[i] = getInputIndex(nextNodes[i], node); - - for (std::shared_ptr<Node> nextNode : nextNodes) - node->removeChild(nextNode, 0); - node->addChild(roundNode, 0, 0); - - for (std::size_t i = 0; i < nextNodes.size(); i++) - roundNode->addChild(nextNodes[i], 0, inputIndices[i]); - graphView->add(roundNode); + insertNodeBetween(node,roundNode,graphView); roundNode->attributes()->addAttr("isProducerRounding",0.0); return true; @@ -121,23 +138,9 @@ bool insertScalingBelowProducer(std::shared_ptr<Node> node,double scalingFactor, scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) scalingNode->getOperator()->setBackend("cpu"); - AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a scaling node."); - std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); - - // For each node in nextNodes store the connexion index - std::vector<int> inputIndices(nextNodes.size()); - for (std::size_t i = 0; i < nextNodes.size(); i++) - inputIndices[i] = getInputIndex(nextNodes[i], node); - - for (std::shared_ptr<Node> nextNode : nextNodes) - node->removeChild(nextNode, 0); - node->addChild(scalingNode, 0, 0); + insertNodeBetween(node, scalingNode, graphView); - for (std::size_t i = 0; i < nextNodes.size(); i++) - scalingNode->addChild(nextNodes[i], 0, inputIndices[i]); - - graphView->add(scalingNode); return true; } @@ -374,26 +377,8 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) if (parentNode->getChildren().size() > 0) { - // SCALING NODE INSERTION - - // We always have one output from Affine and Add nodes, but possibly multiple childs - std::vector<std::shared_ptr<Node>> nextNodes = parentNode->getChildren(0); - - // For each node in nextNodes store the connexion index - std::vector<int> inputIndices(nextNodes.size()); - for (std::size_t i = 0; i < nextNodes.size(); i++) - inputIndices[i] = getInputIndex(nextNodes[i], parentNode); - - for (std::shared_ptr<Node> nextNode : nextNodes) - parentNode->removeChild(nextNode, 0); - - parentNode->addChild(scalingNode, 0, 0); - - for (std::size_t i = 0; i < nextNodes.size(); i++) - scalingNode->addChild(nextNodes[i], 0, inputIndices[i]); - + insertNodeBetween(parentNode,scalingNode,graphView); graphView->add(scalingFactorProducer); - graphView->add(scalingNode); } else { -- GitLab From 00f86582c8a9512175348b2c89449d2c2fff3104 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Tue, 28 Jan 2025 15:17:17 +0000 Subject: [PATCH 18/26] Adding the real quantization pipeline (allowing to fully cast a network as Int32 for now) --- src/PTQ/PTQ.cpp | 121 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 115 insertions(+), 6 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 92049a2..bab8465 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -21,6 +21,7 @@ #include "aidge/scheduler/Scheduler.hpp" #include "aidge/utils/Log.hpp" +#include "aidge/operator/BitShift.hpp" #include "aidge/operator/Producer.hpp" #include "aidge/operator/Mul.hpp" #include "aidge/operator/ReLU.hpp" @@ -28,6 +29,7 @@ #include "aidge/operator/Conv.hpp" #include "aidge/operator/ArgMax.hpp" #include "aidge/operator/Reshape.hpp" +#include "aidge/operator/Cast.hpp" #include "aidge/recipes/Recipes.hpp" @@ -58,6 +60,23 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren index++; return index; } +/*Insert a node inside a graph*/ +bool insertNode(std::shared_ptr<Aidge::Node> baseNode, std::shared_ptr<Aidge::Node> insertNode, std::shared_ptr<GraphView> graphView) +{ + std::vector<std::shared_ptr<Node>> nextNodes = baseNode->getChildren(0); + std::vector<int> inputIndices(nextNodes.size()); + for (std::size_t i = 0; i < nextNodes.size(); i++) + inputIndices[i] = getInputIndex(nextNodes[i], baseNode); + + for (std::shared_ptr<Node> nextNode : nextNodes) + baseNode->removeChild(nextNode, 0); + + baseNode->addChild(insertNode, 0, 0); + + for (std::size_t i = 0; i < nextNodes.size(); i++) + insertNode->addChild(nextNodes[i], 0, inputIndices[i]); + graphView->add(insertNode); +} void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) { @@ -76,9 +95,12 @@ void insertNodeBetween(std::shared_ptr<Node> parent, std::shared_ptr<GraphView> graphView) { // Checking the parents always have at least 1 children - AIDGE_ASSERT(parent->getChildren().size() > 0, "The parent node must have at least one child to insert a new node."); - - // Retrieve children connection indexes + if(parent->getChildren().size() == 0) + { + parent->addChild(newNode, 0, 0); + graphView->add(newNode); + return; + } std::vector<std::shared_ptr<Node>> nextNodes = parent->getChildren(0); std::vector<int> inputIndices(nextNodes.size()); for (std::size_t i = 0; i < nextNodes.size(); i++) { @@ -99,6 +121,78 @@ void insertNodeBetween(std::shared_ptr<Node> parent, graphView->add(newNode); } +bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType targetType, bool singleShift) +{ + std::vector<std::shared_ptr<Node>> nodeVector; + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + scheduler.generateScheduling(); + nodeVector = scheduler.getStaticScheduling(); + for (std::shared_ptr<Node> node : nodeVector) + { + if (node->type() == "Round" && node->attributes()->hasAttr("isProducerRounding")) + { + std::shared_ptr<Aidge::Node> castNode = Cast(targetType,node->name() + "_Cast");/*!!*/ /*Change Name (it keeps the round inside)*/ + castNode->getOperator()->setDataType(targetType); /*!!*/ /*Set DataType on cast?*/ + castNode->getOperator()->setBackend("cpu"); + insertNodeBetween(node,castNode,graphView); + castNode->attributes()->addAttr("isProducerCasting",0.0); + node->getOperator()->setDataType(DataType::Float64); + + } + if(node->type() == "Quantizer") + { + if(singleShift) + { + double scalingFactor = getScalingFactor(node); + int shift = std::log2(scalingFactor); + BitShift_Op::BitShiftDirection direction = BitShift_Op::BitShiftDirection::left; + if(shift < 0 ) + { + direction = BitShift_Op::BitShiftDirection::right; + shift = -shift; + } + Log::warn("Valeur de décalage est : {}, orignale est {} for node {}",scalingFactor,(int)std::log2(scalingFactor),node->name()); + std::shared_ptr<Node> bitshiftNode = BitShift(direction,node->name()+"BitShiftQuantizer"); + std::shared_ptr<Tensor> bitshiftTensor = std::make_shared<Tensor>(Array1D<int, 1> {shift}); + std::shared_ptr<Node> bitshiftProducer = addProducer(bitshiftNode, 1, {1}, "ScalingFactor"); + bitshiftProducer->getOperator()->setOutput(0, bitshiftTensor); + + bitshiftNode->getOperator()->setDataType(targetType); // getDataType(parentNode) + bitshiftNode->getOperator()->setBackend("cpu"); + graphView->add(bitshiftProducer); + graphView->add(bitshiftNode); + graphView->replace({node}, {bitshiftProducer,bitshiftNode}); + graphView->updateInputsOutputs(); + + } + else + { + std::shared_ptr<Aidge::Node> castPreNode = Cast(DataType::Float64,node->name() + "_PreCast");/*!!*/ /*Change Name (it keeps the round inside)*/ + castPreNode->getOperator()->setBackend("cpu"); + insertNodeBetween(node->getParent(0),castPreNode,graphView); + castPreNode->attributes()->addAttr("isCasting",0.0); + castPreNode->getOperator()->setDataType(DataType::Float64); /*!!*/ /*Set DataType on cast?*/ + + std::shared_ptr<Aidge::Node> castPostNode = Cast(targetType,node->name() + "_PostCast");/*!!*/ /*Change Name (it keeps the round inside)*/ + castPostNode->getOperator()->setBackend("cpu"); + insertNodeBetween(node,castPostNode,graphView); + castPostNode->attributes()->addAttr("isCasting",0.0); + castPostNode->getOperator()->setDataType(targetType); /*!!*/ /*Set DataType on cast?*/ + } + } + if (node->type() != "Producer" && + node->type() != "Quantizer" && + !node->attributes()->hasAttr("isProducerRounding") && + !node->attributes()->hasAttr("isProducerScaling")) + { + Log::warn("Node set to int is of type: {} and name is {}",node->type(),node->name()); + node->getOperator()->setDataType(targetType); + } + } + //scheduler.forward(); + return true; +} bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView) { std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); @@ -1015,6 +1109,8 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool if (isAffine(node) || (node->type() == "Mul" && node->attributes()->hasAttr("isCompensation"))) { std::shared_ptr<Node> scalingNode = (*node->getChildren().begin()); + if(scalingNode->attributes()->hasAttr("isCasting")) + scalingNode = (*node->getChildren().begin()); double base = getScalingFactor(scalingNode); @@ -1025,14 +1121,14 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool double ratio = base / approx; insertScalingBelowProducer(node->getParent(1),ratio,graphView); - if (!noQuant) + if (!noQuant && !node->getParent(1)->attributes()->hasAttr("isProducerRounding")) insertRoundBelowProducer(node->getParent(1),graphView); if (nodeHasBias(node)) { insertScalingBelowProducer(node->getParent(2),ratio,graphView); - if (!noQuant) + if (!noQuant && !node->getParent(1)->attributes()->hasAttr("isProducerRounding")) insertRoundBelowProducer(node->getParent(2),graphView); } } @@ -1077,7 +1173,8 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri Log::debug("{} range = {}",node->name(),valueRanges[node->name()]); } -void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) +void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, + Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) { Log::info(" === QUANT PTQ 0.2.21 === "); @@ -1124,6 +1221,18 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, Log::info(" Performing the Single-Shift approximation ..."); performSingleShiftApproximation(graphView, noQuant); } + if(true) /*!!*/ /*Give a name to CAST BOOLEAN*/ + { + AIDGE_ASSERT(!noQuant,"Cannot cast operators with the noQuant(Fake Quantization) flag set to true!") + Log::info("Starting to cast operators into the desired type ..."); + castQuantizedGraph(graphView,DataType::Int32,singleShift); + } + else + { + setupDataType(graphView, inputDataSet, initialDataType); + } + //Mandatory to handle all of the newly added connections! + graphView->updateInputsOutputs(); if (verbose) printScalingFactors(graphView); -- GitLab From ebe0fb421524b5ee9b25db3ff7524709569845ed Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Fri, 31 Jan 2025 13:54:11 +0000 Subject: [PATCH 19/26] Full int32 with bitshift pipeline --- include/aidge/quantization/PTQ/PTQ.hpp | 3 +- python_binding/pybind_PTQ.cpp | 8 +- src/PTQ/PTQ.cpp | 119 ++++++++++++++----------- 3 files changed, 76 insertions(+), 54 deletions(-) diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index e1ef529..3a35017 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -166,7 +166,8 @@ namespace Aidge { * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights. * @param verbose Whether to print internal informations about the quantization process. */ - void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool applyRounding, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose); + void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, + Clipping clippingMode, DataType targetType, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda,bool foldGraph ,bool verbose); /** * @brief Compute the weight ranges of every affine node. Provided for debugging purposes. diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index 61a3cb9..290d59d 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -13,7 +13,7 @@ #include <pybind11/stl.h> #include <string> - +#include "aidge/operator/PTQMetaOps.hpp" #include "aidge/quantization/PTQ/Clipping.hpp" #include "aidge/quantization/PTQ/CLE.hpp" #include "aidge/quantization/PTQ/PTQ.hpp" @@ -39,6 +39,8 @@ void init_PTQ(py::module &m) { :rtype: bool )mydelimiter"); + m.def("quantizer",&Quantizer,py::arg("sf"),py::arg("min"),py::arg("max"),py::arg("name")); + m.def("insert_scaling_nodes", &insertScalingNodes, py::arg("network"), R"mydelimiter( Insert a scaling node after each affine node of the GraphView. @@ -100,7 +102,9 @@ void init_PTQ(py::module &m) { :type verbose: bool )mydelimiter"); - m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("clipping_mode") = Clipping::MAX , py::arg("no_quantization") = true, py::arg("optimize_signs") = false, py::arg("single_shift") = false, py::arg("use_cuda") = false, py::arg("verbose") = false, + m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), + py::arg("clipping_mode") = Clipping::MAX ,py::arg("target_type") = DataType::Float64 ,py::arg("no_quantization") = true, py::arg("optimize_signs") = false, + py::arg("single_shift") = false, py::arg("use_cuda") = false, py::arg("fold_graph") = true, py::arg("verbose") = false, R"mydelimiter( Main quantization routine. Performs every step of the quantization pipeline. :param network: The GraphView to be quantized. diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index bab8465..069a3d8 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -60,23 +60,6 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren index++; return index; } -/*Insert a node inside a graph*/ -bool insertNode(std::shared_ptr<Aidge::Node> baseNode, std::shared_ptr<Aidge::Node> insertNode, std::shared_ptr<GraphView> graphView) -{ - std::vector<std::shared_ptr<Node>> nextNodes = baseNode->getChildren(0); - std::vector<int> inputIndices(nextNodes.size()); - for (std::size_t i = 0; i < nextNodes.size(); i++) - inputIndices[i] = getInputIndex(nextNodes[i], baseNode); - - for (std::shared_ptr<Node> nextNode : nextNodes) - baseNode->removeChild(nextNode, 0); - - baseNode->addChild(insertNode, 0, 0); - - for (std::size_t i = 0; i < nextNodes.size(); i++) - insertNode->addChild(nextNodes[i], 0, inputIndices[i]); - graphView->add(insertNode); -} void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) { @@ -85,6 +68,7 @@ void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); std::shared_ptr<Tensor> fallback; const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double previousScalingFactor = localTensor.get<double>(0); std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); node->input(1).first->getOperator()->setOutput(0, finalTensor); @@ -121,26 +105,36 @@ void insertNodeBetween(std::shared_ptr<Node> parent, graphView->add(newNode); } -bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType targetType, bool singleShift) +void applyConstFold(std::shared_ptr<GraphView> &graphView) { - std::vector<std::shared_ptr<Node>> nodeVector; - SequentialScheduler scheduler(graphView); - scheduler.resetScheduling(); - scheduler.generateScheduling(); - nodeVector = scheduler.getStaticScheduling(); + for (const std::shared_ptr<Node> node : graphView->getNodes()) + { + if (node->type() == "Producer" ) + { + const auto& producer = std::static_pointer_cast<Producer_Op>(node->getOperator()); + producer->constant() = true; + } + } + constantFolding(graphView); +} + +bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType targetType, bool singleShift, bool MP/*Rename*/) +{ + //We need a deepcopy of the graphs nodes since we will replace some nodes + std::vector<std::shared_ptr<Node>> nodeVector(graphView->getNodes().begin(), graphView->getNodes().end()); + for (std::shared_ptr<Node> node : nodeVector) { if (node->type() == "Round" && node->attributes()->hasAttr("isProducerRounding")) { std::shared_ptr<Aidge::Node> castNode = Cast(targetType,node->name() + "_Cast");/*!!*/ /*Change Name (it keeps the round inside)*/ - castNode->getOperator()->setDataType(targetType); /*!!*/ /*Set DataType on cast?*/ - castNode->getOperator()->setBackend("cpu"); + castNode->getOperator()->setDataType(targetType); + castNode->getOperator()->setBackend(node->getOperator()->backend()); insertNodeBetween(node,castNode,graphView); castNode->attributes()->addAttr("isProducerCasting",0.0); node->getOperator()->setDataType(DataType::Float64); - } - if(node->type() == "Quantizer") + else if(node->type() == "Quantizer") { if(singleShift) { @@ -152,45 +146,51 @@ bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType t direction = BitShift_Op::BitShiftDirection::right; shift = -shift; } - Log::warn("Valeur de décalage est : {}, orignale est {} for node {}",scalingFactor,(int)std::log2(scalingFactor),node->name()); - std::shared_ptr<Node> bitshiftNode = BitShift(direction,node->name()+"BitShiftQuantizer"); + std::shared_ptr<Node> bitshiftNode = BitShift(direction,node->name()+"_BitShift_Quantizer"); std::shared_ptr<Tensor> bitshiftTensor = std::make_shared<Tensor>(Array1D<int, 1> {shift}); std::shared_ptr<Node> bitshiftProducer = addProducer(bitshiftNode, 1, {1}, "ScalingFactor"); bitshiftProducer->getOperator()->setOutput(0, bitshiftTensor); + bitshiftProducer->getOperator()->setDataType(DataType::Int32); - bitshiftNode->getOperator()->setDataType(targetType); // getDataType(parentNode) - bitshiftNode->getOperator()->setBackend("cpu"); + + bitshiftNode->getOperator()->setDataType(targetType); + bitshiftNode->getOperator()->setBackend(node->getOperator()->backend()); graphView->add(bitshiftProducer); graphView->add(bitshiftNode); graphView->replace({node}, {bitshiftProducer,bitshiftNode}); graphView->updateInputsOutputs(); } - else + else //If single shift is not enabled we keep using the Float Quantizer Metaoperator so we need to cast before and after each quantizer { - std::shared_ptr<Aidge::Node> castPreNode = Cast(DataType::Float64,node->name() + "_PreCast");/*!!*/ /*Change Name (it keeps the round inside)*/ - castPreNode->getOperator()->setBackend("cpu"); - insertNodeBetween(node->getParent(0),castPreNode,graphView); - castPreNode->attributes()->addAttr("isCasting",0.0); - castPreNode->getOperator()->setDataType(DataType::Float64); /*!!*/ /*Set DataType on cast?*/ + //we need this check since we dont want to add a second PRECAST + if(!node->getParent(0)->attributes()->hasAttr("isCasting")) + { + std::shared_ptr<Aidge::Node> castPreNode = Cast(DataType::Float64,node->name() + "_PreCast");/*!!*/ /*Change Name (it keeps the round inside)*/ + castPreNode->getOperator()->setBackend(node->getOperator()->backend()); + node->addParent(castPreNode,0); + castPreNode->attributes()->addAttr("isCasting",0.0); + castPreNode->getOperator()->setDataType(DataType::Float64); + } std::shared_ptr<Aidge::Node> castPostNode = Cast(targetType,node->name() + "_PostCast");/*!!*/ /*Change Name (it keeps the round inside)*/ - castPostNode->getOperator()->setBackend("cpu"); + castPostNode->getOperator()->setBackend(node->getOperator()->backend()); insertNodeBetween(node,castPostNode,graphView); castPostNode->attributes()->addAttr("isCasting",0.0); - castPostNode->getOperator()->setDataType(targetType); /*!!*/ /*Set DataType on cast?*/ + castPostNode->getOperator()->setDataType(targetType); } } - if (node->type() != "Producer" && - node->type() != "Quantizer" && - !node->attributes()->hasAttr("isProducerRounding") && + else if (node->type() != "Producer" && !node->attributes()->hasAttr("isProducerScaling")) - { - Log::warn("Node set to int is of type: {} and name is {}",node->type(),node->name()); + { node->getOperator()->setDataType(targetType); + if(isAffine(node)) + { + node->getOperator()->setDataType(DataType::Int32); + //node->getParent(2)->getOperator()->setDataType(DataType::Int32); + } } } - //scheduler.forward(); return true; } bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView) @@ -220,7 +220,7 @@ bool insertScalingBelowProducer(std::shared_ptr<Node> node,double scalingFactor, return true; } AIDGE_ASSERT(node->type() == "Producer","Cannot apply a scaling factor on node of type: {} which is not a producer", node->type()); - std::string scalingNodeName = makeUniqueName(node->name() + "_ProducerScaling", graphView); + std::string scalingNodeName = makeUniqueName(node->name() + "_Producer_Scaling", graphView); std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName); scalingNode->attributes()->addAttr("isProducerScaling",0.0); @@ -1174,7 +1174,7 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri } void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, - Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) + Clipping clippingMode, DataType targetType,bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool foldGraph ,bool verbose) { Log::info(" === QUANT PTQ 0.2.21 === "); @@ -1221,17 +1221,36 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, Log::info(" Performing the Single-Shift approximation ..."); performSingleShiftApproximation(graphView, noQuant); } - if(true) /*!!*/ /*Give a name to CAST BOOLEAN*/ + if(targetType != DataType::Float64) /*!!*/ /*Give a name to CAST BOOLEAN*/ { AIDGE_ASSERT(!noQuant,"Cannot cast operators with the noQuant(Fake Quantization) flag set to true!") Log::info("Starting to cast operators into the desired type ..."); - castQuantizedGraph(graphView,DataType::Int32,singleShift); + castQuantizedGraph(graphView,DataType::Int32,singleShift,true); + for (auto h :graphView->getNodes()) + { + if(h->name() == "resnetv15_conv0_fwd_1" || h->name() == "fc0_Gemm_1" ) + { + std::shared_ptr<Aidge::Node> castPreNode = Cast(DataType::Int32,h->name() + "_AJOUT"); + castPreNode->getOperator()->setBackend(h->getOperator()->backend()); + castPreNode->getOperator()->setDataType(DataType::Int32); + castPreNode->addChild(h,0,0); + graphView->add(castPreNode); + } + } } else { setupDataType(graphView, inputDataSet, initialDataType); } + //Mandatory to handle all of the newly added connections! + // graphView->updateInputsOutputs(); + + if(true) + { + Log::info("Applying constant folding recipe to the graph ..."); + applyConstFold(graphView); + } graphView->updateInputsOutputs(); if (verbose) @@ -1240,8 +1259,6 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, //Log::debug(" === SCALINGS (BEFORE CAST) ==="); //printScalingFactors(graphView); - setupDataType(graphView, inputDataSet, initialDataType); - if (useCuda) graphView->setBackend("cuda"); -- GitLab From 2be85f14c8c5aa73cd9e037a07ff8912404e1a24 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 5 Feb 2025 16:34:49 +0000 Subject: [PATCH 20/26] Adding Fully functional Cast to the desired type in the PTQ pipeline --- include/aidge/operator/PTQMetaOps.hpp | 14 ++++ src/PTQ/PTQ.cpp | 113 +++++++++----------------- src/operator/PTQMetaOps.cpp | 59 ++++++++++++-- 3 files changed, 106 insertions(+), 80 deletions(-) diff --git a/include/aidge/operator/PTQMetaOps.hpp b/include/aidge/operator/PTQMetaOps.hpp index 22fb71e..58571e0 100644 --- a/include/aidge/operator/PTQMetaOps.hpp +++ b/include/aidge/operator/PTQMetaOps.hpp @@ -37,6 +37,20 @@ namespace Aidge { /// @return A shared pointer to an instance of the meta-operator node. std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name); +/// @brief IntQuantizer acts as an extension of the Quantizer meta-operator, enabling seamless integration +/// into computation graphs with a data type other than Float while preserving floating-point precision. +/// +/// This operator modifies the provided Quantizer by inserting explicit casting operations before and after +/// the quantization process. It first casts the input to Float64, applies the quantization steps (Mul, Clip, Round), +/// and then casts the result back to the target data type. This ensures compatibility with integer-based computation graphs +/// while maintaining the precision of floating-point operations. +/// +/// @param oldQuantizer A shared pointer to the existing Quantizer node that will be adapted. +/// @param targetType The target data type to which the final output should be cast after the quantization process. +/// @param name The name of the meta-operator node created. +/// @return A shared pointer to a new instance of the modified meta-operator node. +std::shared_ptr<Node> IntQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name); + /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter. /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation. /// The meta-operator node must be a PTQ-specific operator, such as a Quantizer or Scaling node. diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 069a3d8..a9ac176 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -117,8 +117,8 @@ void applyConstFold(std::shared_ptr<GraphView> &graphView) } constantFolding(graphView); } - -bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType targetType, bool singleShift, bool MP/*Rename*/) +//Add a condition to insert Cast Node to cast User Input Data into the desired type +bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType targetType, bool singleShift) { //We need a deepcopy of the graphs nodes since we will replace some nodes std::vector<std::shared_ptr<Node>> nodeVector(graphView->getNodes().begin(), graphView->getNodes().end()); @@ -127,7 +127,7 @@ bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType t { if (node->type() == "Round" && node->attributes()->hasAttr("isProducerRounding")) { - std::shared_ptr<Aidge::Node> castNode = Cast(targetType,node->name() + "_Cast");/*!!*/ /*Change Name (it keeps the round inside)*/ + std::shared_ptr<Aidge::Node> castNode = Cast(targetType,node->name() + "_Cast"); castNode->getOperator()->setDataType(targetType); castNode->getOperator()->setBackend(node->getOperator()->backend()); insertNodeBetween(node,castNode,graphView); @@ -138,6 +138,7 @@ bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType t { if(singleShift) { + //If single shift is enabled we must replace each Quantizer by a bitShift double scalingFactor = getScalingFactor(node); int shift = std::log2(scalingFactor); BitShift_Op::BitShiftDirection direction = BitShift_Op::BitShiftDirection::left; @@ -150,8 +151,7 @@ bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType t std::shared_ptr<Tensor> bitshiftTensor = std::make_shared<Tensor>(Array1D<int, 1> {shift}); std::shared_ptr<Node> bitshiftProducer = addProducer(bitshiftNode, 1, {1}, "ScalingFactor"); bitshiftProducer->getOperator()->setOutput(0, bitshiftTensor); - bitshiftProducer->getOperator()->setDataType(DataType::Int32); - + bitshiftProducer->getOperator()->setDataType(targetType); bitshiftNode->getOperator()->setDataType(targetType); bitshiftNode->getOperator()->setBackend(node->getOperator()->backend()); @@ -161,34 +161,17 @@ bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType t graphView->updateInputsOutputs(); } - else //If single shift is not enabled we keep using the Float Quantizer Metaoperator so we need to cast before and after each quantizer + else //If single shift is not enabled we keep using the alternative Int Quantizer (which cast the data before and after the regular Quantizer Operations) { - //we need this check since we dont want to add a second PRECAST - if(!node->getParent(0)->attributes()->hasAttr("isCasting")) - { - std::shared_ptr<Aidge::Node> castPreNode = Cast(DataType::Float64,node->name() + "_PreCast");/*!!*/ /*Change Name (it keeps the round inside)*/ - castPreNode->getOperator()->setBackend(node->getOperator()->backend()); - node->addParent(castPreNode,0); - castPreNode->attributes()->addAttr("isCasting",0.0); - castPreNode->getOperator()->setDataType(DataType::Float64); - } - - std::shared_ptr<Aidge::Node> castPostNode = Cast(targetType,node->name() + "_PostCast");/*!!*/ /*Change Name (it keeps the round inside)*/ - castPostNode->getOperator()->setBackend(node->getOperator()->backend()); - insertNodeBetween(node,castPostNode,graphView); - castPostNode->attributes()->addAttr("isCasting",0.0); - castPostNode->getOperator()->setDataType(targetType); + std::shared_ptr<Node> newQuantizer = IntQuantizer(node,targetType,node->name()); + newQuantizer->getOperator()->setBackend(node->getOperator()->backend()); + graphView->replace({node},{newQuantizer}); } } else if (node->type() != "Producer" && !node->attributes()->hasAttr("isProducerScaling")) { node->getOperator()->setDataType(targetType); - if(isAffine(node)) - { - node->getOperator()->setDataType(DataType::Int32); - //node->getParent(2)->getOperator()->setDataType(DataType::Int32); - } } } return true; @@ -358,9 +341,9 @@ std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> if (verbose) { - Log::info("NB OF NODES = {}", nodeVector.size()); + Log::notice("NB OF NODES = {}", nodeVector.size()); for (std::shared_ptr<Node> node : nodeVector) - Log::info("{} {}", node->type(), node->name()); + Log::notice("{} {}", node->type(), node->name()); } return nodeVector; @@ -419,8 +402,8 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) if (parentIsForking) { // temporary verbose ... - Log::info(" ### found residual branch at index {}", i); - Log::info(" ### inserting multiplicative node ..."); + Log::notice(" ### found residual branch at index {}", i); + Log::notice(" ### inserting multiplicative node ..."); std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); std::shared_ptr<Node> residualNode = Mul(residualNodeName); @@ -476,7 +459,7 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) } else { - // Log::info(" last node reached ! "); + // Log::notice(" last node reached ! "); parentNode->addChild(scalingNode, 0, 0); graphView->add(scalingFactorProducer); graphView->add(scalingNode); @@ -654,7 +637,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView for (std::shared_ptr<Tensor> sample : inputDataSet) { - //Log::info(" IT : {}", it++); + //Log::notice(" IT : {}", it++); // Inference ... @@ -797,7 +780,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st double rescaling = mergingNodeScaling / maxScaling; std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); - //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name()); + //Log::notice(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name()); multiplyScalingFactor(scalingNode,rescaling) ; } @@ -913,9 +896,9 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap if (verbose) { - Log::info(" === SIGN MAP === "); + Log::notice(" === SIGN MAP === "); for (std::shared_ptr<Node> node : nodeVector) - Log::info(" {}{} | {}", static_cast<int>(signMap[node->name()].first), static_cast<int>(signMap[node->name()].second), node->name()); + Log::notice(" {}{} | {}", static_cast<int>(signMap[node->name()].first), static_cast<int>(signMap[node->name()].second), node->name()); } // SANITY CHECK (TEMPORARY) @@ -1137,12 +1120,12 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool static void printScalingFactors(std::shared_ptr<GraphView> graphView) { - Log::info(" === SCALING FACTORS === "); + Log::notice(" === SCALING FACTORS === "); for (auto node : retrieveNodeVector(graphView)) if (node->attributes()->hasAttr("isScaling") || node->type() == "Quantizer") { double scalingFactor = getScalingFactor(node); - Log::info(" {:.6f} ({})", scalingFactor, node->name()); + Log::notice(" {:.6f} ({})", scalingFactor, node->name()); } } @@ -1176,7 +1159,7 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, DataType targetType,bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool foldGraph ,bool verbose) { - Log::info(" === QUANT PTQ 0.2.21 === "); + Log::notice(" === QUANT PTQ 0.2.21 === "); graphView->setBackend("cpu"); @@ -1186,90 +1169,74 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (!checkArchitecture(graphView)) return; - Log::info(" Preparing the network for the PTQ ... "); + Log::notice(" Preparing the network for the PTQ ... "); prepareNetwork(graphView); - Log::info(" Inserting the scaling nodes ..."); + Log::notice(" Inserting the scaling nodes ..."); insertScalingNodes(graphView); crossLayerEqualization(graphView); - Log::info(" Normalizing the parameters ..."); + Log::notice(" Normalizing the parameters ..."); normalizeParameters(graphView); - Log::info(" Computing the value ranges ..."); + Log::notice(" Computing the value ranges ..."); std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda); //Log:debug("=== RANGES (BEFORE ADJUST) ==="); //printRanges(graphView, valueRanges); - Log::info(" Optimizing the clipping values ..."); + Log::notice(" Optimizing the clipping values ..."); valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose); //Log:debug("=== RANGES (AFTER ADJUST) ==="); //printRanges(graphView, valueRanges); - Log::info(" Normalizing the activations ..."); + Log::notice(" Normalizing the activations ..."); normalizeActivations(graphView, valueRanges); - Log::info(" Quantizing the normalized network ..."); + Log::notice(" Quantizing the normalized network ..."); quantizeNormalizedNetwork(graphView, nbBits, noQuant, optimizeSigns, verbose); if (singleShift) { - Log::info( " Inserting the compensation nodes ..."); + Log::notice( " Inserting the compensation nodes ..."); insertCompensationNodes(graphView, nbBits); - Log::info(" Performing the Single-Shift approximation ..."); + Log::notice(" Performing the Single-Shift approximation ..."); performSingleShiftApproximation(graphView, noQuant); } - if(targetType != DataType::Float64) /*!!*/ /*Give a name to CAST BOOLEAN*/ + + if(targetType != DataType::Float64) //!! Adapt the condition { AIDGE_ASSERT(!noQuant,"Cannot cast operators with the noQuant(Fake Quantization) flag set to true!") - Log::info("Starting to cast operators into the desired type ..."); - castQuantizedGraph(graphView,DataType::Int32,singleShift,true); - for (auto h :graphView->getNodes()) - { - if(h->name() == "resnetv15_conv0_fwd_1" || h->name() == "fc0_Gemm_1" ) - { - std::shared_ptr<Aidge::Node> castPreNode = Cast(DataType::Int32,h->name() + "_AJOUT"); - castPreNode->getOperator()->setBackend(h->getOperator()->backend()); - castPreNode->getOperator()->setDataType(DataType::Int32); - castPreNode->addChild(h,0,0); - graphView->add(castPreNode); - } - } + Log::notice("Starting to cast operators into the desired type ..."); + castQuantizedGraph(graphView,DataType::Int32,singleShift); } else { setupDataType(graphView, inputDataSet, initialDataType); } - - //Mandatory to handle all of the newly added connections! - // graphView->updateInputsOutputs(); - if(true) + if(foldGraph) { - Log::info("Applying constant folding recipe to the graph ..."); + Log::notice("Applying constant folding recipe to the graph ..."); applyConstFold(graphView); } + //Mandatory to handle all of the newly added connections! graphView->updateInputsOutputs(); if (verbose) printScalingFactors(graphView); - //Log::debug(" === SCALINGS (BEFORE CAST) ==="); - //printScalingFactors(graphView); if (useCuda) graphView->setBackend("cuda"); - //Log::debug(" === SCALINGS (AFTER CAST) ==="); - //printScalingFactors(graphView); - - Log::info(" Reseting the scheduler ..."); + Log::notice(" Reseting the scheduler ..."); SequentialScheduler scheduler(graphView); scheduler.resetScheduling(); - Log::info(" Network is quantized !"); + Log::notice(" Network is quantized !"); + } std::map<std::string, double> getWeightRanges(std::shared_ptr<GraphView> graphView) diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index 105d4e8..f3ddbcf 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -19,6 +19,7 @@ #include "aidge/operator/Clip.hpp" #include "aidge/operator/Mul.hpp" #include "aidge/operator/Round.hpp" +#include "aidge/operator/Cast.hpp" #include "aidge/graph/Node.hpp" #include "aidge/graph/OpArgs.hpp" @@ -34,6 +35,15 @@ namespace Aidge { +static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType) +{ + std::shared_ptr<Node> mulNode = nullptr; + for(std::shared_ptr<Node> node : graphView->getNodes()) + if (node->type() == nodeType) + mulNode = node; + + return mulNode; +} std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name) { @@ -61,16 +71,51 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli return metaopNode; } -static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType) +std::shared_ptr<Node> IntQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name) { - std::shared_ptr<Node> mulNode = nullptr; - for(std::shared_ptr<Node> node : graphView->getNodes()) - if (node->type() == nodeType) - mulNode = node; + double scalingFactor = getScalingFactor(oldQuantizer); - return mulNode; -} + std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op> (oldQuantizer->getOperator()); + std::shared_ptr<Node> oldclipNode = getSubNode(metaOp->getMicroGraph(), "Clip"); + + if (!oldclipNode) { + Log::warn("Invalid PTQ MetaOperator, no Clip found inside node of type {}", oldQuantizer->type()); + return nullptr; + } + std::shared_ptr<Clip_Op> clipOp = std::static_pointer_cast<Clip_Op>(oldclipNode->getOperator()); + + std::shared_ptr<Node> castPreNode = Cast(DataType::Float64,((!name.empty()) ? name + "_PreCast" : "")); + std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_MulIQuant" : ""); + std::shared_ptr<Node> roundNode = Round((!name.empty()) ? name + "_IRoundQuant" : ""); + std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_IClipQuant" : "", clipOp->min(), clipOp->max()); + + std::shared_ptr<Node> castPostNode = Cast(targetType,((!name.empty()) ? name + "_PostCast" : "")); + + // connect the scaling factor producer + + castPreNode->getOperator()->setDataType(DataType::Float64); + mulNode->getOperator()->setDataType(DataType::Float64); + roundNode->getOperator()->setDataType(DataType::Float64); + clipNode->getOperator()->setDataType(DataType::Float64); + + castPostNode->getOperator()->setDataType(targetType); + + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); + std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); + + // create the metaop graph + + std::shared_ptr<GraphView> graphView = Sequential({castPreNode, mulNode, roundNode, clipNode, castPostNode}); + std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ??? + + // return the metaop + + std::shared_ptr<Node> metaopNode = MetaOperator("IntQuantizer", connectedGraphView, {}, name); // XXX alternative prototype + + return metaopNode; +} void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) -- GitLab From 9289d0c48b0683080b53a9551a7d01f031cbafc4 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Mon, 17 Feb 2025 09:22:53 +0000 Subject: [PATCH 21/26] Changing the way we decide if one apply real cast to the PTQ or not --- src/PTQ/PTQ.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index a9ac176..d445624 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -1157,7 +1157,7 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri } void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, - Clipping clippingMode, DataType targetType,bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool foldGraph ,bool verbose) + Clipping clippingMode, DataType targetType,bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool foldGraph, bool verbose) { Log::notice(" === QUANT PTQ 0.2.21 === "); @@ -1195,7 +1195,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, Log::notice(" Quantizing the normalized network ..."); quantizeNormalizedNetwork(graphView, nbBits, noQuant, optimizeSigns, verbose); - + if (singleShift) { Log::notice( " Inserting the compensation nodes ..."); @@ -1204,8 +1204,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, Log::notice(" Performing the Single-Shift approximation ..."); performSingleShiftApproximation(graphView, noQuant); } - - if(targetType != DataType::Float64) //!! Adapt the condition + if(targetType != DataType::Float64 && targetType != DataType::Float32 && targetType != DataType::Float16) { AIDGE_ASSERT(!noQuant,"Cannot cast operators with the noQuant(Fake Quantization) flag set to true!") Log::notice("Starting to cast operators into the desired type ..."); @@ -1213,7 +1212,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, } else { - setupDataType(graphView, inputDataSet, initialDataType); + setupDataType(graphView, inputDataSet, targetType); } if(foldGraph) -- GitLab From f891fc6051aa0d5e5be33084a47405ad7839458d Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Mon, 17 Feb 2025 13:44:54 +0000 Subject: [PATCH 22/26] Adding BitShiftQuantizer MetaOperator that clip the value after the bitshift, during the inference with int32 (and ssa) --- include/aidge/operator/PTQMetaOps.hpp | 14 +++++++++ src/PTQ/PTQ.cpp | 33 +++++--------------- src/operator/PTQMetaOps.cpp | 45 +++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 25 deletions(-) diff --git a/include/aidge/operator/PTQMetaOps.hpp b/include/aidge/operator/PTQMetaOps.hpp index 58571e0..a65e4d5 100644 --- a/include/aidge/operator/PTQMetaOps.hpp +++ b/include/aidge/operator/PTQMetaOps.hpp @@ -51,6 +51,20 @@ std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, dou /// @return A shared pointer to a new instance of the modified meta-operator node. std::shared_ptr<Node> IntQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name); +/// @brief BitShiftQuantizer acts as an extension of the Quantizer meta-operator, enabling seamless integration +/// into computation graphs with a data type other than Float while preserving floating-point precision. +/// +/// This operator modifies the provided Quantizer by inserting explicit casting operations before and after +/// the quantization process. It first casts the input to Float64, applies the quantization steps (Mul, Clip, Round), +/// and then casts the result back to the target data type. This ensures compatibility with integer-based computation graphs +/// while maintaining the precision of floating-point operations. +/// +/// @param oldQuantizer A shared pointer to the existing Quantizer node that will be adapted. +/// @param targetType The target data type to which the final output should be cast after the quantization process. +/// @param name The name of the meta-operator node created. +/// @return A shared pointer to a new instance of the modified meta-operator node. +std::shared_ptr<Node> BitShiftQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name); + /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter. /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation. /// The meta-operator node must be a PTQ-specific operator, such as a Quantizer or Scaling node. diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index d445624..f9a34ad 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -138,34 +138,16 @@ bool castQuantizedGraph(std::shared_ptr<GraphView> &graphView, Aidge::DataType t { if(singleShift) { - //If single shift is enabled we must replace each Quantizer by a bitShift - double scalingFactor = getScalingFactor(node); - int shift = std::log2(scalingFactor); - BitShift_Op::BitShiftDirection direction = BitShift_Op::BitShiftDirection::left; - if(shift < 0 ) - { - direction = BitShift_Op::BitShiftDirection::right; - shift = -shift; - } - std::shared_ptr<Node> bitshiftNode = BitShift(direction,node->name()+"_BitShift_Quantizer"); - std::shared_ptr<Tensor> bitshiftTensor = std::make_shared<Tensor>(Array1D<int, 1> {shift}); - std::shared_ptr<Node> bitshiftProducer = addProducer(bitshiftNode, 1, {1}, "ScalingFactor"); - bitshiftProducer->getOperator()->setOutput(0, bitshiftTensor); - bitshiftProducer->getOperator()->setDataType(targetType); - - bitshiftNode->getOperator()->setDataType(targetType); - bitshiftNode->getOperator()->setBackend(node->getOperator()->backend()); - graphView->add(bitshiftProducer); - graphView->add(bitshiftNode); - graphView->replace({node}, {bitshiftProducer,bitshiftNode}); - graphView->updateInputsOutputs(); + std::shared_ptr<Node> newBitShiftQuantizer = BitShiftQuantizer(node,targetType,node->name()+"_BitShift_Quantizer"); + newBitShiftQuantizer->getOperator()->setBackend(node->getOperator()->backend()); + graphView->replace({node},{newBitShiftQuantizer}); } else //If single shift is not enabled we keep using the alternative Int Quantizer (which cast the data before and after the regular Quantizer Operations) { - std::shared_ptr<Node> newQuantizer = IntQuantizer(node,targetType,node->name()); - newQuantizer->getOperator()->setBackend(node->getOperator()->backend()); - graphView->replace({node},{newQuantizer}); + std::shared_ptr<Node> newIntQuantizer = IntQuantizer(node,targetType,node->name()); + newIntQuantizer->getOperator()->setBackend(node->getOperator()->backend()); + graphView->replace({node},{newIntQuantizer}); } } else if (node->type() != "Producer" && @@ -1206,9 +1188,10 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, } if(targetType != DataType::Float64 && targetType != DataType::Float32 && targetType != DataType::Float16) { - AIDGE_ASSERT(!noQuant,"Cannot cast operators with the noQuant(Fake Quantization) flag set to true!") + AIDGE_ASSERT(!noQuant,"Cannot cast operators with the noQuant (Fake Quantization) flag set to true!") Log::notice("Starting to cast operators into the desired type ..."); castQuantizedGraph(graphView,DataType::Int32,singleShift); + // Method to set all InputTensor as nullptr } else { diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index f3ddbcf..6b44366 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -20,6 +20,7 @@ #include "aidge/operator/Mul.hpp" #include "aidge/operator/Round.hpp" #include "aidge/operator/Cast.hpp" +#include "aidge/operator/BitShift.hpp" #include "aidge/graph/Node.hpp" #include "aidge/graph/OpArgs.hpp" @@ -70,7 +71,51 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli return metaopNode; } +std::shared_ptr<Node> BitShiftQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name) +{ + double scalingFactor = getScalingFactor(oldQuantizer); + + std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op> (oldQuantizer->getOperator()); + std::shared_ptr<Node> oldclipNode = getSubNode(metaOp->getMicroGraph(), "Clip"); + + if (!oldclipNode) { + Log::warn("Invalid PTQ MetaOperator, no Clip found inside node of type {}", oldQuantizer->type()); + return nullptr; + } + + std::shared_ptr<Clip_Op> clipOp = std::static_pointer_cast<Clip_Op>(oldclipNode->getOperator()); + int shift = std::log2(scalingFactor); + BitShift_Op::BitShiftDirection direction = BitShift_Op::BitShiftDirection::left; + + if(shift < 0 ) + { + direction = BitShift_Op::BitShiftDirection::right; + shift = -shift; + } + std::shared_ptr<Node> bitShiftNode = BitShift(direction,(!name.empty()) ? name + "_MulIQuant" : ""); + std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_IClipQuant" : "", clipOp->min(), clipOp->max()); + + std::shared_ptr<Tensor> bitshiftTensor = std::make_shared<Tensor>(Array1D<int, 1> {shift}); + std::shared_ptr<Node> bitshiftProducer = addProducer(bitShiftNode, 1, {1}, "ScalingFactor"); + bitshiftProducer->getOperator()->setOutput(0, bitshiftTensor); + bitshiftProducer->getOperator()->setDataType(targetType); + + // connect the scaling factor producer + + bitShiftNode->getOperator()->setDataType(targetType); + clipNode->getOperator()->setDataType(targetType); + + // create the metaop graph + + std::shared_ptr<GraphView> graphView = Sequential({bitShiftNode,clipNode}); + std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(bitShiftNode); // XXX why not use the graphView ??? + + // return the metaop + std::shared_ptr<Node> metaopNode = MetaOperator("BitShiftQuantizer", connectedGraphView, {}, name); // XXX alternative prototype + + return metaopNode; +} std::shared_ptr<Node> IntQuantizer(std::shared_ptr<Node> oldQuantizer, DataType targetType, const std::string& name) { double scalingFactor = getScalingFactor(oldQuantizer); -- GitLab From 35e30401735eea7109c45c1636ea2ee735c6d4c5 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Mon, 17 Feb 2025 15:53:42 +0000 Subject: [PATCH 23/26] updating gitignore --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ba5c593..c64cbb5 100644 --- a/.gitignore +++ b/.gitignore @@ -5,8 +5,10 @@ build*/ install*/ include/aidge/backend/quantization_version.h +include/aidge/quantization_version.h -# VSCode + +# VSCodes .vscode # Python -- GitLab From b627ea1fc1e85155a18f077c09d42207f22c4bd2 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Thu, 20 Feb 2025 10:54:31 +0000 Subject: [PATCH 24/26] Use cuda does not set the graphView at the end --- src/PTQ/PTQ.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index f9a34ad..1c279e5 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -1191,7 +1191,6 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, AIDGE_ASSERT(!noQuant,"Cannot cast operators with the noQuant (Fake Quantization) flag set to true!") Log::notice("Starting to cast operators into the desired type ..."); castQuantizedGraph(graphView,DataType::Int32,singleShift); - // Method to set all InputTensor as nullptr } else { @@ -1206,12 +1205,18 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, //Mandatory to handle all of the newly added connections! graphView->updateInputsOutputs(); + //reset input nodes + /*for(Aidge::NodePtr input_node : graphView->inputNodes()) + { + std::static_pointer_cast<OperatorTensor>(input_node->getOperator())->resetInput() + }*/ + if (verbose) printScalingFactors(graphView); if (useCuda) - graphView->setBackend("cuda"); + //graphView->setBackend("cuda"); Log::notice(" Reseting the scheduler ..."); SequentialScheduler scheduler(graphView); -- GitLab From 280506d04656bdad56445813d9f9f02c295b8a57 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Thu, 20 Feb 2025 13:58:59 +0000 Subject: [PATCH 25/26] Adding quantization tag to producers of BitShift and Compensation nodes in the graph to ease their use in the export --- src/PTQ/PTQ.cpp | 1 + src/operator/PTQMetaOps.cpp | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 1c279e5..c2bc0e2 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -1052,6 +1052,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u coeffProducer->getOperator()->setOutput(0, coeffTensor); coeffProducer->getOperator()->setDataType(DataType::Float64); + coeffProducer->attributes()->addAttr("quantization.ptq.CompensationCoeff",signedMax); coeffProducer->getOperator()->setBackend("cpu"); graphView->add(coeffProducer); // needed ? diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index 6b44366..fb73664 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -97,8 +97,10 @@ std::shared_ptr<Node> BitShiftQuantizer(std::shared_ptr<Node> oldQuantizer, Data std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_IClipQuant" : "", clipOp->min(), clipOp->max()); std::shared_ptr<Tensor> bitshiftTensor = std::make_shared<Tensor>(Array1D<int, 1> {shift}); - std::shared_ptr<Node> bitshiftProducer = addProducer(bitShiftNode, 1, {1}, "ScalingFactor"); + std::shared_ptr<Node> bitshiftProducer = addProducer(bitShiftNode, 1, {1}, "ScalingFactor"); + bitshiftProducer->getOperator()->setOutput(0, bitshiftTensor); + bitshiftProducer->attributes()->addAttr("quantization.ptq.ShiftAmount",shift); bitshiftProducer->getOperator()->setDataType(targetType); // connect the scaling factor producer -- GitLab From 5a53de588bbadfa839ddf747384431d151c66308 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Mon, 24 Feb 2025 13:34:57 +0000 Subject: [PATCH 26/26] Adding the script PTQ_tq.py to prevent regression in the PTQ pipeline --- scripts/PTQ/ptq_ts.py | 135 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 scripts/PTQ/ptq_ts.py diff --git a/scripts/PTQ/ptq_ts.py b/scripts/PTQ/ptq_ts.py new file mode 100644 index 0000000..b836a7b --- /dev/null +++ b/scripts/PTQ/ptq_ts.py @@ -0,0 +1,135 @@ +import unittest +import re +import numpy as np +import gzip +import aidge_core +import aidge_onnx +import os +import copy +import aidge_backend_cpu +import aidge_quantization +import sys +import concurrent.futures + +aidge_core.Log.set_console_level(aidge_core.Level.Error) + +SIGMA = 0.05 # Tolérance + +def print_in_color(text, color_code): + print(f"\033[{color_code}m{text}\033[0m") + +def run_model_test(model_name, expected_values, use_multithreading, asset_path, model_path): + NB_SAMPLES = 1000 + NB_BITS = 4 + CLIPPING = aidge_quantization.Clipping.MSE + VERBOSE = False + + results = [] + + samples = np.load(gzip.GzipFile(asset_path + '/mnist_samples.npy.gz', "r")) + labels = np.load(gzip.GzipFile(asset_path + '/mnist_labels.npy.gz', "r")) + + def load_model(): + model = aidge_onnx.load_onnx(model_path + '/' + model_name + ".onnx", verbose=False) + aidge_core.remove_flatten(model) + model.set_datatype(aidge_core.dtype.float32) + model.set_backend("cpu") + return model + + aidge_model = load_model() + scheduler = aidge_core.SequentialScheduler(aidge_model) + + def propagate(model, scheduler, sample): + sample = np.reshape(sample, (1, 1, 28, 28)) + input_tensor = aidge_core.Tensor(sample) + scheduler.forward(True, [input_tensor]) + output_node = model.get_output_nodes().pop() + output_tensor = output_node.get_operator().get_output(0) + return np.array(output_tensor) + + def compute_accuracy(model, samples, labels): + acc = sum(labels[i] == np.argmax(propagate(model, scheduler, x)) for i, x in enumerate(samples)) + return acc / len(samples) + + base_accuracy = compute_accuracy(aidge_model, samples[:NB_SAMPLES], labels) + if abs(base_accuracy * 100 - expected_values[0]) >= SIGMA: + results.append(f"⌠[ERROR] Baseline accuracy mismatch for {model_name}: Expected {expected_values[0]}, got {base_accuracy * 100:.2f}") + else: + results.append(f"✅ Baseline accuracy for {model_name}: Expected {expected_values[0]}, got {base_accuracy * 100:.2f}") + + quant_model = load_model() + tensors = [aidge_core.Tensor(np.reshape(sample, (1, 1, 28, 28))) for sample in samples[:NB_SAMPLES]] + aidge_quantization.quantize_network(quant_model, NB_BITS, tensors, CLIPPING, aidge_core.dtype.float64, False, True, False, VERBOSE) + scheduler = aidge_core.SequentialScheduler(quant_model) + + scaling = 2**(NB_BITS - 1) - 1 + samples = samples * scaling + + quant_accuracy = compute_accuracy(quant_model, samples[:NB_SAMPLES], labels) + if abs(quant_accuracy * 100 - expected_values[1]) >= SIGMA: + results.append(f"⌠[ERROR] Quantized accuracy mismatch for {model_name}: Expected {expected_values[1]}, got {quant_accuracy * 100:.2f}") + else: + results.append(f"✅ Quantized accuracy for {model_name}: Expected {expected_values[1]}, got {quant_accuracy * 100:.2f}") + + # Quantification Single Shift + quant_model_ss = load_model() + aidge_quantization.quantize_network(quant_model_ss, NB_BITS, tensors, CLIPPING, aidge_core.dtype.float64, False, True, True, VERBOSE) + scheduler = aidge_core.SequentialScheduler(quant_model_ss) + quant_accuracy_ss = compute_accuracy(quant_model_ss, samples[:NB_SAMPLES], labels) + + if abs(quant_accuracy_ss * 100 - expected_values[2]) >= SIGMA: + results.append(f"⌠[ERROR] Quantized Single Shift Approximation accuracy mismatch for {model_name}: Expected {expected_values[2]}, got {quant_accuracy_ss * 100:.2f}") + else: + results.append(f"✅ Quantized Single Shift Approximation accuracy for {model_name}: Expected {expected_values[2]}, got {quant_accuracy_ss * 100:.2f}") + + return model_name, results + +def run_quantization_test(use_multithreading,model_path,asset_path): + EXPECTED_RESULTS = { + "MiniResNet": (95.4, 94.5, 94.7), + "ConvNet": (97.9, 97.7, 97.4), + "BranchNetV4": (93.8, 93.2, 93.7), + "TestNet": (95.5, 94.2, 94.2), + "MLP": (94.7, 94.2, 93.3) + } + + all_results = [] + + if use_multithreading: + with concurrent.futures.ProcessPoolExecutor() as executor: + futures = {executor.submit(run_model_test, model, values, use_multithreading,asset_path,model_path): model for model, values in EXPECTED_RESULTS.items()} + + for future in concurrent.futures.as_completed(futures): + model_name = futures[future] + try: + model_name, results = future.result() + all_results.append((model_name, results)) + except Exception as exc: + all_results.append((model_name, [f"⌠[ERROR] {model_name} test failed with exception: {exc}"])) + else: + for model, values in EXPECTED_RESULTS.items(): + try: + model_name, results = run_model_test(model, values, use_multithreading,asset_path,model_path) + all_results.append((model_name, results)) + except Exception as exc: + all_results.append((model, [f"⌠[ERROR] {model} test failed with exception: {exc}"])) + + os.system("clear") + for model_name, results in all_results: + print(f"Results for {model_name}:") + for result in results: + if "⌠[ERROR]" in result: + print_in_color(result, 31) + else: + print_in_color(result, 32) + print() + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser(description="Run quantization tests.") + parser.add_argument("-j", action="store_true", help="Enable multithreading") + parser.add_argument("--models_path", type=str, default="/data1/is156025/nz280189/sbx/Models", help="Path to models directory (default: /data)") + parser.add_argument("--asset_path", type=str, default="/data1/is156025/nz280189/sbx/assets", help="Path to assets directory (default: /data)") + args = parser.parse_args() + + run_quantization_test(use_multithreading=args.j,model_path = args.models_path, asset_path = args.asset_path) -- GitLab