From 87f5891aa200535fd5b8744defed0e1eb00bf73c Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 8 Jan 2025 10:32:06 +0000 Subject: [PATCH 01/44] improve tensor manipulation routines + enhance insertCompensationNodes --- src/PTQ/CLE.cpp | 73 +++++++++++++++++------ src/PTQ/PTQ.cpp | 137 ++++++++++++++++++++++++++------------------ src/QAT/QAT_LSQ.cpp | 9 +-- 3 files changed, 138 insertions(+), 81 deletions(-) diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 2c81815..0fe9575 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -19,6 +19,12 @@ #include "aidge/utils/Log.hpp" #include "aidge/operator/OperatorTensor.hpp" +#include "aidge/operator/Mul.hpp" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/operator/Abs.hpp" +#include "aidge/operator/Reshape.hpp" +#include "aidge/operator/Round.hpp" + namespace Aidge { @@ -34,27 +40,58 @@ static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node) static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) { - // Get the tensor data pointer - double * castedTensor = static_cast<double *> (tensor->getImpl()->rawPtr()); - - // Rescale the tensor - for(std::size_t i = 0; i < tensor->size(); i++) - castedTensor[i] *= scaling; + auto mulOp = Mul_Op(); + mulOp.setDataType(tensor->dataType()); + mulOp.setBackend(tensor->backend()); + + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); + scalingTensor->setDataType(tensor->dataType()); + scalingTensor->setBackend(tensor->backend()); + + mulOp.associateInput(0, tensor); + mulOp.associateInput(1, scalingTensor); + + mulOp.forward(); + + auto outTensor = mulOp.getOutput(0); + *tensor = *outTensor; + //tensor->copyCast(*outTensor); } -static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +// TODO : make the retreival of argmax values backend independant (refCastFrom) +static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) { - // Get the tensor data pointer and edit it - double * castedTensor = static_cast<double*> (tensor->getImpl()->rawPtr()); - - // Get the tensor absolute max value - double maxValue = 0.0f; - for(std::size_t i = 0; i < tensor->size(); ++i) { - if(std::fabs(castedTensor[i]) > maxValue) { - maxValue = std::fabs(castedTensor[i]); - } - } - return maxValue; + // get the abs tensor + + std::shared_ptr<Tensor> absTensor = std::make_shared<Tensor>(tensor->abs()); + + // flatten the abs tensor + + std::int64_t nbElement = tensor->size(); + + auto reshapeOp = Reshape_Op({nbElement}); + reshapeOp.setDataType(tensor->dataType()); + reshapeOp.setBackend(tensor->backend()); + + reshapeOp.associateInput(0, absTensor); + reshapeOp.forward(); + std::shared_ptr<Tensor> flatTensor = reshapeOp.getOutput(0); + + // Get the argmax + + auto argmaxOp = ArgMax_Op(0, true, false); + argmaxOp.setDataType(tensor->dataType()); + argmaxOp.setBackend(tensor->backend()); + + argmaxOp.associateInput(0, flatTensor); + argmaxOp.forward(); + std::shared_ptr<Tensor> argmaxTensor = argmaxOp.getOutput(0); + + // Return the max + + int maxIndex = std::round(argmaxTensor->get<double>(0)); + + return flatTensor->get<double>(maxIndex); } void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 0e26313..6e0b29e 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -28,6 +28,12 @@ #include "aidge/operator/BatchNorm.hpp" #include "aidge/operator/Conv.hpp" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/operator/Abs.hpp" +#include "aidge/operator/Reshape.hpp" +#include "aidge/operator/Round.hpp" + + #include "aidge/recipes/Recipes.hpp" #include "aidge/recipes/QuantRecipes.hpp" @@ -66,51 +72,75 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } -static void fillTensor(std::shared_ptr<Tensor> tensor, double value) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) { - // Get the tensor data pointer - double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); + auto mulOp = Mul_Op(); + mulOp.setDataType(tensor->dataType()); + mulOp.setBackend(tensor->backend()); - // Fill the tensor - for(std::size_t i = 0; i < tensor->size(); i++) - castedTensor[i] = value; -} + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); + scalingTensor->setDataType(tensor->dataType()); + scalingTensor->setBackend(tensor->backend()); -static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) -{ - // Get the tensor data pointer - double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); + mulOp.associateInput(0, tensor); + mulOp.associateInput(1, scalingTensor); - // Rescale the tensor - for(std::size_t i = 0; i < tensor->size(); i++) - castedTensor[i] *= scaling; + mulOp.forward(); + + auto outTensor = mulOp.getOutput(0); + *tensor = *outTensor; } static void roundTensor(std::shared_ptr<Tensor> tensor) { - // Get the tensor data pointer - double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); + auto roundOp = Round_Op(); + roundOp.setDataType(tensor->dataType()); + roundOp.setBackend(tensor->backend()); - // Rescale the tensor - for(std::size_t i = 0; i < tensor->size(); i++) - castedTensor[i] = std::nearbyint(castedTensor[i]);//Round + roundOp.associateInput(0, tensor); + roundOp.forward(); + + auto outTensor = roundOp.getOutput(0); + *tensor = *outTensor; } -static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +// TODO : make the retreival of argmax values backend independant (refCastFrom) +static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) { - // Get the tensor data pointer and edit it - double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr()); - - // Get the tensor absolute max value - double maxValue = 0.0f; - for(std::size_t i = 0; i < tensor->size(); ++i) { - if(std::fabs(castedTensor[i]) > maxValue) { - maxValue = std::fabs(castedTensor[i]); - } - } - return maxValue; + // get the abs tensor + + std::shared_ptr<Tensor> absTensor = std::make_shared<Tensor>(tensor->abs()); + + // flatten the abs tensor + + std::int64_t nbElement = tensor->size(); + + auto reshapeOp = Reshape_Op({nbElement}); + reshapeOp.setDataType(tensor->dataType()); + reshapeOp.setBackend(tensor->backend()); + + reshapeOp.associateInput(0, absTensor); + reshapeOp.forward(); + std::shared_ptr<Tensor> flatTensor = reshapeOp.getOutput(0); + + // Get the argmax + + auto argmaxOp = ArgMax_Op(0, true, false); + argmaxOp.setDataType(tensor->dataType()); + argmaxOp.setBackend(tensor->backend()); + + argmaxOp.associateInput(0, flatTensor); + argmaxOp.forward(); + std::shared_ptr<Tensor> argmaxTensor = argmaxOp.getOutput(0); + + // Return the max + + int maxIndex = std::round(argmaxTensor->get<double>(0)); + + return flatTensor->get<double>(maxIndex); } + // TODO : pass nodeVector by reference ... static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::shared_ptr<Node>> nodeVector, std::string nodeType) { @@ -876,50 +906,42 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u for (std::shared_ptr<Node> node : nodeVector) { - // A merging node is always followed by a scaling node at this point ... + // A merging node is always followed by a Quantizer node at this point if (node->type() == "Quantizer") { + // check if the Quantizer is a residual one, and insert a compensation node if so ... + bool prevNodeIsForking = ((node->getParent(0))->getChildren().size() > 1); bool prevNodeIsAffine = isAffine(node->getParent(0)); bool insertNode = prevNodeIsForking || !prevNodeIsAffine; if (insertNode) { - // create and insert the multplicative node + // create and insert the multplicative node before the Quantizer std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); std::shared_ptr<Node> mulNode = Mul(mulNodeName); - mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) mulNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, mulNode, 0, 0, 0); - // create and insert the producer node - - std::shared_ptr<Tensor> inputTensor = std::static_pointer_cast<Tensor> (mulNode->getOperator()->getRawInput(0)); - std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(); + // Add the coeff producer to the multiplier node - coeffTensor->setDataType(DataType::Float64); // getDataType(parentNode) - coeffTensor->setBackend("cpu"); + std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); + std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax}); + coeffProducer->getOperator()->setOutput(0, coeffTensor); - coeffTensor->resize(inputTensor->dims()); - fillTensor(coeffTensor, 1); + coeffProducer->getOperator()->setDataType(DataType::Float64); + coeffProducer->getOperator()->setBackend("cpu"); - std::shared_ptr<Node> producerNode = Producer(coeffTensor, makeUniqueName("coeff", graphView)); - producerNode->addChild(mulNode); - graphView->add(producerNode); + graphView->add(coeffProducer); // needed ? - // rescale the coeffs and edit scaling factor + // Adapt the scaling factor value accordingly - fillTensor(coeffTensor, signedMax); - - double currScalingFactor = getScalingFactor(node); // XXX bad naming ! + double currScalingFactor = getScalingFactor(node); updateScalingFactor(node, currScalingFactor / signedMax); - - // TODO : double check this !!! - //std::cout << getTensorAbsoluteMax(coeffTensor) << std::endl; } } } @@ -931,7 +953,8 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool for (std::shared_ptr<Node> node : nodeVector) { - // Use A meatoperator of type Scaling of MulCompensation instead + // TODO : use Compensation nodes instead of Mul nodes + if (isAffine(node) || (node->type() == "Mul")) { std::shared_ptr<Node> scalingNode = (*node->getChildren().begin()); @@ -940,7 +963,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool double approx = std::pow(2, std::ceil(std::log2(base))); - updateScalingFactor(scalingNode,approx); + updateScalingFactor(scalingNode, approx); double ratio = base / approx; @@ -954,7 +977,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); rescaleTensor(biasTensor, ratio); if (!noQuant) - roundTensor(biasTensor); + roundTensor(biasTensor); } } } @@ -1058,8 +1081,8 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (useCuda) graphView->setBackend("cuda"); - //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; - //printScalingFactors(graphView); + std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; + printScalingFactors(graphView); Log::info(" Reseting the scheduler ..."); SequentialScheduler scheduler(graphView); diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 9b51e84..a09dbb2 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -89,19 +89,16 @@ void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbB static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) { auto backend = tensor->backend(); + if (backend == "cuda") tensor->setBackend("cpu"); - float acc = 0; - float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr()); - for(std::size_t i = 0; i < tensor->size(); i++) - acc += std::abs(castedTensor[i]); - acc /= static_cast<float> (tensor->size()); + float value = (*tensor).abs().mean().get<float>(0); if (backend == "cuda") tensor->setBackend("cuda"); - return acc; + return value; } static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda) -- GitLab From 261345f10db68b69077bef647fd645196c18baf3 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 8 Jan 2025 10:37:27 +0000 Subject: [PATCH 02/44] comment verbose --- src/PTQ/PTQ.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 6e0b29e..7f750f0 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -1081,8 +1081,8 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (useCuda) graphView->setBackend("cuda"); - std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; - printScalingFactors(graphView); + //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; + //printScalingFactors(graphView); Log::info(" Reseting the scheduler ..."); SequentialScheduler scheduler(graphView); -- GitLab From 227a9c7e575656ffc7094c0b4e66a42c931d54ee Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 8 Jan 2025 16:27:21 +0000 Subject: [PATCH 03/44] minor change --- src/PTQ/PTQ.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 7f750f0..3677ae0 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -215,6 +215,8 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView) { removeFlatten(graphView); + sanitizeNodeNames(graphView); + bool containsBatchNorm = false; std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); @@ -1078,6 +1080,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, //printScalingFactors(graphView); setupDataType(graphView, inputDataSet, initialDataType); + if (useCuda) graphView->setBackend("cuda"); -- GitLab From 9998b41f2a26ef738e1fbb829540b6c36dd2a0d3 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 13 Jan 2025 13:01:34 +0000 Subject: [PATCH 04/44] rework the LSQ code --- include/aidge/quantization/QAT/QAT_LSQ.hpp | 18 +- python_binding/pybind_QAT_LSQ.cpp | 5 +- src/QAT/QAT_LSQ.cpp | 204 +++++++-------------- 3 files changed, 77 insertions(+), 150 deletions(-) diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp index 4970be0..d7d03ca 100644 --- a/include/aidge/quantization/QAT/QAT_LSQ.hpp +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -20,22 +20,14 @@ namespace Aidge { namespace QuantLSQ { /** - * @brief Insert the LSQ quantizer nodes in a given GraphView - * @param graphView The GraphView containing the graph to quantize. + * @brief Given a GraphView with parameters properly initialized, insert + * the LSQ quantizer nodes, and setup the adjustment their step-sizes. + * @param graphView The GraphView containing the network to quantize. * @param nbBits Number of quantization bits. - * @param span Fixed output span of the quantizers. */ -void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float step_size); +void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); -/** - * @brief Given a GraphView with parameters properly initialized and some calibration data, - * insert the LSQ quantizer nodes, and adjust their step-sizes. - * @param graphView The GraphView containing the graph to quantize. - * @param nbBits Number of quantization bits. - * @param calibrationData Calibration data used to adjust the spans. - * @param scale Multiplicative constant applied to the spans. - */ -void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData); +void devLSQ(std::shared_ptr<Tensor> tensor); } } diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp index 206985e..0b9fcc2 100644 --- a/python_binding/pybind_QAT_LSQ.cpp +++ b/python_binding/pybind_QAT_LSQ.cpp @@ -23,8 +23,9 @@ void init_QAT_LSQ(py::module &m) { auto mQuantLSQ = m.def_submodule("lsq"); - mQuantLSQ.def("insert_quantizers", &QuantLSQ::insertQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("step_size")); + mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits")); + + mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor")); - mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data")); } } // namespace Aidge diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index a09dbb2..04f2027 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -23,7 +23,42 @@ namespace Aidge { -void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize) +static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) +{ + auto valueTensor = (*tensor).abs().mean(); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu"); + return localTensor.get<float>(0); +} + +// INIT THE STEP SIZE OF A QUANTIZER NODE + +static bool initStepSize(std::shared_ptr<Node> quantizer) +{ + const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator()); + + float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0)); + + float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second)); + + auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + + // XXX Manage backend here ? + stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend()); + stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType()); + + auto stepSizeProducer = quantizer->getParent(1); + + stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); + + std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl; + + return false; +} + +// INPUT QUANTIZERS INSERTION + +static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); @@ -34,177 +69,76 @@ void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbB std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; - // INPUT QUANTIZERS INSERTION + // Create the input quantizer node - // TODO : double check this, and use createUniqueName() - auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); - auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName); + auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); + auto quantizerNode = LSQ(signedRange, quantizerName); - // Set the step size + // Init the step-size using the node call stack - auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator(); - auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - inputStepSizeOp->setOutput(0, inputStepSizeTensor); + quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); // Absorb the ReLU when possible ... - // XXX is this safe ??? - bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); - // bool nodeHasParent = (linearNode->getParents().size() != 0); + bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ? if (nodeHasParent) { auto parentNode = linearNode->getParents()[0]; if (parentNode->type() == "ReLU") { - auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator()); - inputQuantizerOp->range() = unsignedRange; + auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator()); + quantizerOp->range() = unsignedRange; graphView->replace({parentNode}, {}); } } - // We need to handle the case where the linear node is the first one ... + // Insert the quantizer in the graphView ... + // (We need to handle the case where the linear node is the first one) if (nodeHasParent) { - graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0); + graphView->insertParent(linearNode, quantizerNode, 0, 0, 0); } else { - inputQuantizerNode->addChild(graphView); - graphView->add(inputQuantizerNode); + quantizerNode->addChild(graphView); + graphView->add(quantizerNode); } - - // PARAM QUANTIZERS INSERTION - - // TODO : double check this, and use createUniqueName() - auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); - auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); - graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0); - - // Set the step size - - auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator(); - auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - paramStepSizeOp->setOutput(0, paramStepSizeTensor); } - } -static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) -{ - auto backend = tensor->backend(); - - if (backend == "cuda") - tensor->setBackend("cpu"); - - float value = (*tensor).abs().mean().get<float>(0); - - if (backend == "cuda") - tensor->setBackend("cuda"); - - return value; -} +// PARAM QUANTIZERS INSERTION -static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda) +static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { - // Propagate the calibration tensor + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - SequentialScheduler scheduler(graphView); - scheduler.resetScheduling(); - scheduler.forward(true, {calibrationData}); + std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; - // Store the input tensor statistics + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); - if (useCuda) - graphView->setBackend("cpu"); + // TODO : double check this, and use createUniqueName() + auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); + auto quantizerNode = LSQ(signedRange, quantizerName); - std::map<std::string, float> inputStats; - for (auto node : graphView->getNodes()) - { - if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! - { - const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); - float inputAbsMean = getTensorAbsMean(op->getInput(0)); - inputStats.insert(std::make_pair(node->name(), inputAbsMean)); - fmt::println("{} -> {}", node->name(), inputAbsMean); - } - } + // Init the step-size using the node call stack - if (useCuda) - graphView->setBackend("cuda"); + quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); - return inputStats; -} + // Insert the quantizer in the graphView -static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda) -{ - if (useCuda) - graphView->setBackend("cpu"); - - std::map<std::string, float> paramStats; - for (auto node : graphView->getNodes()) - { - if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! - { - const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); - float paramAbsMean = getTensorAbsMean(op->getInput(1)); - paramStats.insert(std::make_pair(node->name(), paramAbsMean)); - fmt::println("{} -> {}", node->name(), paramAbsMean); - } + graphView->insertParent(linearNode, quantizerNode, 1, 0, 0); } - - if (useCuda) - graphView->setBackend("cuda"); - - return paramStats; } -static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats) +void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - - for (const auto& match : matches) - { - auto linearNode = match.graph->rootNode(); - - // INPUT QUANTIZERS STEP-SIZES - - auto inputQuantNode = linearNode->getParent(0); - auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator()); - - float absMean = inputStats[linearNode->name()]; - float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second)); - - auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator(); - // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); - auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - inputStepSizeOp->setOutput(0, inputStepSizeTensor); - - // PARAM QUANTIZERS STEP-SIZES - - auto paramQuantNode = linearNode->getParent(1); - auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator()); - - absMean = paramStats[linearNode->name()]; - stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second)); - - auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator(); - // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); - auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - paramStepSizeOp->setOutput(0, paramStepSizeTensor); - } + setupInputQuantizers(graphView, nbBits); + setupParamQuantizers(graphView, nbBits); } -void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData) +void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) { - bool useCuda = (calibrationData->backend() == "cuda"); - - // Collect the tensor statisics - auto inputStats = collectInputStats(graphView, calibrationData, useCuda); - - auto paramStats = collectParamStats(graphView, useCuda); - - // Insert the quantizers - insertQuantizers(graphView, nbBits, 1.0); - - // Adjust the quantizers step-sizes - adjustQuantizersStepSizes(graphView, inputStats, paramStats); + float mean = (tensor->mean()).get<float> (0); + std::cout << " MEAN = " << mean << std::endl; } } \ No newline at end of file -- GitLab From 4f1169676c6d3845d35416a4e3f0e3e98e7d9700 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 8 Jan 2025 16:07:59 +0000 Subject: [PATCH 05/44] Adding the isScaling tag in the PTQ pipeline in order to replace the previous and now deprecated Scaling Metaoperator --- aidge_quantization/_version.py | 4 + include/aidge/quantization/PTQ/PTQMetaOps.hpp | 14 +-- include/aidge/quantization_version.h | 6 +- python_binding/pybind_PTQ.cpp | 9 ++ src/PTQ/Clipping.cpp | 2 +- src/PTQ/PTQ.cpp | 88 ++++++++++++------- src/PTQ/PTQMetaOps.cpp | 39 ++++---- 7 files changed, 101 insertions(+), 61 deletions(-) create mode 100644 aidge_quantization/_version.py diff --git a/aidge_quantization/_version.py b/aidge_quantization/_version.py new file mode 100644 index 0000000..d4ec20e --- /dev/null +++ b/aidge_quantization/_version.py @@ -0,0 +1,4 @@ +# file generated by setuptools_scm +# don't change, don't track in version control +__version__ = version = '0.2.1.dev60+g8044e79.d20250106' +__version_tuple__ = version_tuple = (0, 2, 1, 'dev60', 'g8044e79.d20250106') diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/quantization/PTQ/PTQMetaOps.hpp index 62fac87..a8028c6 100644 --- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp +++ b/include/aidge/quantization/PTQ/PTQMetaOps.hpp @@ -37,13 +37,13 @@ namespace Aidge { /// @return A shared pointer to an instance of the meta-operator node. std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name); -/// @brief The purpose of Scaling is to encapsulate the Mul operator and tag it as a PTQ node rather than a regular Mul operator. -/// Therefore, this meta-operator consists solely of a [Mul] operation. -/// -/// @param scalingFactor The scaling factor to apply to the input (a scalar to multiply the input with). -/// @param name The name of the meta-operator node created. -/// @return A shared pointer to an instance of the scaling node. -std::shared_ptr<Aidge::Node> Scaling(double scalingFactor, const std::string& name = ""); +/// @brief Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. +/// This function multiplies the existing scaling factor by a given coefficient. It verifies that the node is of the correct type ("Mul") +/// and has the `isScaling` attribute. If these conditions are not met, a warning is logged. +/// @param node A shared pointer to an `Aidge::Node` object representing the node to modify. +/// @param coeff A double representing the multiplication coefficient to apply to the scaling factor. +void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff); + /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter. /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation. diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 546263a..f14a045 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -3,9 +3,9 @@ namespace Aidge { static constexpr const int PROJECT_VERSION_MAJOR = 0; -static constexpr const int PROJECT_VERSION_MINOR = 2; +static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; -static constexpr const char * PROJECT_VERSION = "0.2.0"; -static constexpr const char * PROJECT_GIT_HASH = "f50c860"; +static constexpr const char * PROJECT_VERSION = "0.3.0"; +static constexpr const char * PROJECT_GIT_HASH = "8c89214"; } #endif // VERSION_H diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index b5193bd..7f7c57d 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -17,6 +17,7 @@ #include "aidge/quantization/PTQ/Clipping.hpp" #include "aidge/quantization/PTQ/CLE.hpp" #include "aidge/quantization/PTQ/PTQ.hpp" +#include "aidge/quantization/PTQ/PTQMetaOps.hpp" #include "aidge/graph/GraphView.hpp" @@ -48,6 +49,14 @@ void init_PTQ(py::module &m) { :type network: :py:class:`aidge_core.GraphView` )mydelimiter"); + m.def( "multiply_scaling_factor",&multiplyScalingFactor,py::arg("node"), py::arg("coeff") + R"mydelimiter( + Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. This function multiplies the existing scaling factor by a given coefficient. + :param node: A node representing the node to modify. + :param coeff: A floating value representing the multiplication coefficient to apply to the scaling factor. + )mydelimiter" + ); + m.def("normalize_parameters", &normalizeParameters, py::arg("network"), R"mydelimiter( Normalize the parameters of each parametrized node, so that they fit in the [-1:1] range. diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp index 57ad7a8..1901e38 100644 --- a/src/PTQ/Clipping.cpp +++ b/src/PTQ/Clipping.cpp @@ -222,7 +222,7 @@ std::map<std::string, double> adjustRanges(Clipping clippingMode, std::map<std:: for (std::shared_ptr<Node> node : graphView->getNodes()) { - if (node->type() == "Scaling") + if (node->attributes()->hasAttr("isScaling")) { std::vector<int> histogram = histograms[node->name()]; diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 3677ae0..2d431f6 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -264,12 +264,19 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) Log::info(" ### inserting multiplicative node ..."); std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); - std::shared_ptr<Node> residualNode = Scaling(1.0, residualNodeName); + std::shared_ptr<Node> residualNode = Mul(residualNodeName); + residualNode->attributes()->addAttr("isScaling", 0.0); + + //Adding the SF as a producer of the node + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {1.0}); + std::shared_ptr<Node> scalingFactorProducer = addProducer(residualNode, 1, {1}, "ScalingFactor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); - residualNode->getOperator()->setDataType(DataType::Float64); //getDataType(parentNode) + residualNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) residualNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, residualNode, i, 0, 0); + graphView->add(scalingFactorProducer); } } } @@ -295,7 +302,16 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) if (isAffine(parentNode) || isMerging(parentNode)) { std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); - std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName); + //std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName); + + //Adding Mul operator with tag "isScaling" + std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName); + scalingNode->attributes()->addAttr("isScaling",0.0); + + //Adding the SF as a producer of the node + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {1.0}); + std::shared_ptr<Node> scalingFactorProducer = addProducer(scalingNode, 1, {1}, "ScalingFactor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) scalingNode->getOperator()->setBackend("cpu"); @@ -320,12 +336,14 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) for (std::size_t i = 0; i < nextNodes.size(); i++) scalingNode->addChild(nextNodes[i], 0, inputIndices[i]); + graphView->add(scalingFactorProducer); graphView->add(scalingNode); } else { // Log::info(" last node reached ! "); parentNode->addChild(scalingNode, 0, 0); + graphView->add(scalingFactorProducer); graphView->add(scalingNode); } } @@ -335,7 +353,7 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> mergingNode) { std::shared_ptr<Node> currNode = mergingNode; - while(currNode->type() != "Scaling") + while(!currNode->attributes()->hasAttr("isScaling")) { if (currNode->getParents().size() == 0) { @@ -378,7 +396,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) for (std::shared_ptr<Node> node : nodeVector) { // Scaling nodes still have a ratio of 1, so they are seamless ... - if (node->type() == "ReLU" || node->type() == "Scaling" || isSeamless(node)) + if (node->type() == "ReLU" || node->attributes()->hasAttr("isScaling") || isSeamless(node)) { if (node != firstNode) { @@ -439,8 +457,9 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); - double currScalingFactor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode, currScalingFactor / rescaling); + //double currScalingFactor = getScalingFactor(scalingNode); + //updateScalingFactor(scalingNode, currScalingFactor / rescaling); + multiplyScalingFactor(scalingNode,1/rescaling); accumulatedRatios[mergingNode->name()] /= rescaling; // optional ... } @@ -465,7 +484,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); for (std::shared_ptr<Node> node : nodeSet) { - if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) + if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) { std::shared_ptr<Operator> nodeOperator = node->getOperator(); std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0)); @@ -487,7 +506,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView // std::shared_ptr<Node> inputNode = getFirstNode(graphView); for (std::shared_ptr<Node> node : nodeSet) - if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) + if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) valueRanges.insert(std::make_pair(node->name(), 0)); if (useCuda) @@ -514,7 +533,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView std::map<std::string, double> sampleRanges; for (std::shared_ptr<Node> node : nodeSet) { - if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) + if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) { std::shared_ptr<Operator> nodeOperator = node->getOperator(); std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0)); @@ -536,7 +555,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView for (std::shared_ptr<Node> node : nodeSet) { - if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) + if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) { std::string nodeName = node->name(); if (sampleRanges[nodeName] > valueRanges[nodeName]) @@ -589,7 +608,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st // Here prevNode is either a 'Affine' or a 'Merging' // => do not split the cases, just handle the bias ... - if (node->type() == "Scaling") + if (node->attributes()->hasAttr("isScaling")) { // retrieve the previous scaling factor ... std::shared_ptr<Node> prevNode = node->getParent(0); @@ -598,8 +617,9 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st // ValueRanges must contains all the scaling nodes !!! double scalingFactor = valueRanges[node->name()]; - double currScalingFactor = getScalingFactor(node); - updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor)); + //double currScalingFactor = getScalingFactor(node); + //updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor)); + multiplyScalingFactor(node,1/(scalingFactor / prevScalingFactor)); scalingFactors[node->name()] = scalingFactor; @@ -642,8 +662,9 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name()); - double currScalingFactor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode, currScalingFactor * rescaling); + //double currScalingFactor = getScalingFactor(scalingNode); + //updateScalingFactor(scalingNode, currScalingFactor * rescaling); + multiplyScalingFactor(scalingNode,rescaling) ; } } } @@ -679,7 +700,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap signMap[node->name()].second = false; } - if (node->type() == "Scaling") + if (node->attributes()->hasAttr("isScaling")) { signMap[node->name()].second = false; @@ -726,7 +747,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap // Arbitration : Signed type wins ! for(std::shared_ptr<Node> parent : parentNodes) { - while (parent->type() != "Scaling") + while (!parent->attributes()->hasAttr("isScaling")) { signMap[parent->name()] = std::make_pair(false, false); // We are on a branch so nodes always have 1 parent ... @@ -842,8 +863,9 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - double currScalingFactor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode, currScalingFactor * rescaling); + // double currScalingFactor = getScalingFactor(scalingNode); + // updateScalingFactor(scalingNode, currScalingFactor * rescaling); + multiplyScalingFactor(scalingNode,rescaling) ; } if (isMerging(node)) @@ -858,23 +880,27 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming - updateScalingFactor(scalingNode, currScalingFactor * rescaling); + // double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming + // updateScalingFactor(scalingNode, currScalingFactor * rescaling); + multiplyScalingFactor(scalingNode,rescaling) ; } // Handle the Scaling Nodes ... - if (node->type() == "Scaling") + if (node->attributes()->hasAttr("isScaling")) { if (!noQuant) { // Replace the Scaling Node by Quantizer + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double old_sf = localTensor.get<double>(0);//!\\ - std::shared_ptr<Node> quantizerNode = Quantizer(getScalingFactor(node), -(signedMax + 1), signedMax, node->name()); + std::shared_ptr<Node> quantizerNode = Quantizer(old_sf, -(signedMax + 1), signedMax, node->name()); quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) quantizerNode->getOperator()->setBackend("cpu"); - - graphView->replace({node}, {quantizerNode}); + graphView->replace({node,node->getParent(1)}, {quantizerNode}); if (optimizeSigns) { @@ -888,6 +914,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ double currScalingFactor = getScalingFactor(quantizerNode); updateScalingFactor(quantizerNode, currScalingFactor * rescaling); + if(outputIsUnsigned) { @@ -965,7 +992,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool double approx = std::pow(2, std::ceil(std::log2(base))); - updateScalingFactor(scalingNode, approx); + updateScalingFactor(scalingNode,approx); double ratio = base / approx; @@ -989,7 +1016,7 @@ static void printScalingFactors(std::shared_ptr<GraphView> graphView) { Log::info(" === SCALING FACTORS === "); for (auto node : retrieveNodeVector(graphView)) - if (node->type() == "Scaling" || node->type() == "Quantizer") + if (node->attributes()->hasAttr("isScaling") || node->type() == "Quantizer") { double scalingFactor = getScalingFactor(node); Log::info(" {:.6f} ({})", scalingFactor, node->name()); @@ -1019,8 +1046,8 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri auto scheduling = scheduler.getStaticScheduling(); for (auto node : scheduling) - if (node->type() == "Scaling") - fmt::println("{} range = {}", node->name(), valueRanges[node->name()]); + if (node->attributes()->hasAttr("isScaling")) + std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl; } void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) @@ -1042,7 +1069,6 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, insertScalingNodes(graphView); crossLayerEqualization(graphView); - Log::info(" Normalizing the parameters ..."); normalizeParameters(graphView); diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp index 527d853..4c17f9b 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/PTQ/PTQMetaOps.cpp @@ -61,23 +61,6 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli return metaopNode; } -std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name) -{ - std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); - - std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_Scaling" : ""); - - std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); - scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); - - std::shared_ptr<GraphView> graphView = Sequential({mulNode}); - std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); - - NodePtr metaopNode = MetaOperator("Scaling", connectedGraphView, {}, name); - - return metaopNode; -} - static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType) { std::shared_ptr<Node> mulNode = nullptr; @@ -88,9 +71,27 @@ static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, st return mulNode; } +void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) +{ + if(node->type() == "Mul" && node->attributes()->hasAttr("isScaling")) + { + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double previousScalingFactor = localTensor.get<double>(0); + std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); + node->input(1).first->getOperator()->setOutput(0, finalTensor); + } + else + { + Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); + } +} + + void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) { - if(metaOpNode->type() != "Scaling" && metaOpNode->type() != "Quantizer") + if(metaOpNode->type() != "Quantizer") Log::warn(" Cannot update the scaling factor on Node of type {}", metaOpNode->type()); std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); @@ -107,7 +108,7 @@ void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) double getScalingFactor(std::shared_ptr<Node> MetaOpNode) { - if (MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") { + if (MetaOpNode->type() != "Quantizer") { Log::warn(" Cannot get the scaling factor on Node of type {}", MetaOpNode->type()); return 0; } -- GitLab From a98dbceaad16441d7449022992f3885332e7aaf4 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Mon, 13 Jan 2025 15:43:30 +0000 Subject: [PATCH 06/44] Refactoring Scaling Metaop deletions by removing old getScalingFactor and updateScalingFactor; Adding clear tag isCompensation for Mul used as compensations nodes --- .../PTQ => operator}/PTQMetaOps.hpp | 8 ---- include/aidge/quantization/PTQ/PTQ.hpp | 8 ++++ include/aidge/quantization_version.h | 2 +- python_binding/pybind_PTQ.cpp | 2 +- src/PTQ/PTQ.cpp | 37 +++++++++++-------- src/{PTQ => operator}/PTQMetaOps.cpp | 18 +-------- 6 files changed, 33 insertions(+), 42 deletions(-) rename include/aidge/{quantization/PTQ => operator}/PTQMetaOps.hpp (86%) rename src/{PTQ => operator}/PTQMetaOps.cpp (84%) diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/operator/PTQMetaOps.hpp similarity index 86% rename from include/aidge/quantization/PTQ/PTQMetaOps.hpp rename to include/aidge/operator/PTQMetaOps.hpp index a8028c6..22fb71e 100644 --- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp +++ b/include/aidge/operator/PTQMetaOps.hpp @@ -37,14 +37,6 @@ namespace Aidge { /// @return A shared pointer to an instance of the meta-operator node. std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name); -/// @brief Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. -/// This function multiplies the existing scaling factor by a given coefficient. It verifies that the node is of the correct type ("Mul") -/// and has the `isScaling` attribute. If these conditions are not met, a warning is logged. -/// @param node A shared pointer to an `Aidge::Node` object representing the node to modify. -/// @param coeff A double representing the multiplication coefficient to apply to the scaling factor. -void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff); - - /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter. /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation. /// The meta-operator node must be a PTQ-specific operator, such as a Quantizer or Scaling node. diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index d2b8b7f..e7cbddd 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -74,6 +74,14 @@ namespace Aidge { */ bool checkArchitecture(std::shared_ptr<GraphView> graphView); + /** + * @brief This function multiplies the existing scaling factor by a given coefficient. It verifies that the node is of the correct type ("Mul") + * and has the `isScaling` attribute. If these conditions are not met, a warning is logged. + * @param node A shared pointer to an `Aidge::Node` object representing the node to modify. + * @param coeff A double representing the multiplication coefficient to apply to the scaling factor. + */ + void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff); + void prepareNetwork(std::shared_ptr<GraphView> graphView); diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index f14a045..740621a 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "8c89214"; +static constexpr const char * PROJECT_GIT_HASH = "b4af1ce"; } #endif // VERSION_H diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index 7f7c57d..2c25dc6 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -49,7 +49,7 @@ void init_PTQ(py::module &m) { :type network: :py:class:`aidge_core.GraphView` )mydelimiter"); - m.def( "multiply_scaling_factor",&multiplyScalingFactor,py::arg("node"), py::arg("coeff") + m.def( "multiply_scaling_factor",&multiplyScalingFactor,py::arg("node"), py::arg("coeff"), R"mydelimiter( Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. This function multiplies the existing scaling factor by a given coefficient. :param node: A node representing the node to modify. diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 2d431f6..23d9f01 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -12,7 +12,7 @@ #include "aidge/quantization/PTQ/CLE.hpp" #include "aidge/quantization/PTQ/Clipping.hpp" #include "aidge/quantization/PTQ/PTQ.hpp" -#include "aidge/quantization/PTQ/PTQMetaOps.hpp" +#include "aidge/operator/PTQMetaOps.hpp" #include "aidge/data/Tensor.hpp" @@ -72,6 +72,23 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } +void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) +{ + if(node->type() == "Mul" && node->attributes()->hasAttr("isScaling")) + { + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double previousScalingFactor = localTensor.get<double>(0); + std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); + node->input(1).first->getOperator()->setOutput(0, finalTensor); + } + else + { + Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); + } +} + static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) { auto mulOp = Mul_Op(); @@ -457,8 +474,6 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); - //double currScalingFactor = getScalingFactor(scalingNode); - //updateScalingFactor(scalingNode, currScalingFactor / rescaling); multiplyScalingFactor(scalingNode,1/rescaling); accumulatedRatios[mergingNode->name()] /= rescaling; // optional ... @@ -617,8 +632,6 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st // ValueRanges must contains all the scaling nodes !!! double scalingFactor = valueRanges[node->name()]; - //double currScalingFactor = getScalingFactor(node); - //updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor)); multiplyScalingFactor(node,1/(scalingFactor / prevScalingFactor)); scalingFactors[node->name()] = scalingFactor; @@ -661,9 +674,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name()); - - //double currScalingFactor = getScalingFactor(scalingNode); - //updateScalingFactor(scalingNode, currScalingFactor * rescaling); + multiplyScalingFactor(scalingNode,rescaling) ; } } @@ -863,8 +874,6 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - // double currScalingFactor = getScalingFactor(scalingNode); - // updateScalingFactor(scalingNode, currScalingFactor * rescaling); multiplyScalingFactor(scalingNode,rescaling) ; } @@ -880,8 +889,6 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - // double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming - // updateScalingFactor(scalingNode, currScalingFactor * rescaling); multiplyScalingFactor(scalingNode,rescaling) ; } @@ -951,6 +958,8 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); std::shared_ptr<Node> mulNode = Mul(mulNodeName); + + mulNode->attributes()->addAttr("isCompensation",0.0); mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) mulNode->getOperator()->setBackend("cpu"); @@ -982,9 +991,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool for (std::shared_ptr<Node> node : nodeVector) { - // TODO : use Compensation nodes instead of Mul nodes - - if (isAffine(node) || (node->type() == "Mul")) + if (isAffine(node) || (node->type() == "Mul" && node->attributes()->hasAttr("isCompensation"))) { std::shared_ptr<Node> scalingNode = (*node->getChildren().begin()); diff --git a/src/PTQ/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp similarity index 84% rename from src/PTQ/PTQMetaOps.cpp rename to src/operator/PTQMetaOps.cpp index 4c17f9b..facfed2 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -9,7 +9,7 @@ * ********************************************************************************/ -#include "aidge/quantization/PTQ/PTQMetaOps.hpp" +#include "aidge/operator/PTQMetaOps.hpp" #include <array> #include <memory> @@ -71,22 +71,6 @@ static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, st return mulNode; } -void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) -{ - if(node->type() == "Mul" && node->attributes()->hasAttr("isScaling")) - { - auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); - std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); - double previousScalingFactor = localTensor.get<double>(0); - std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); - node->input(1).first->getOperator()->setOutput(0, finalTensor); - } - else - { - Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); - } -} void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) -- GitLab From 48427337c51e5e257d2794d304af0bd5b777529b Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Mon, 13 Jan 2025 15:56:11 +0000 Subject: [PATCH 07/44] Changing include in python bindings --- python_binding/pybind_PTQ.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index 2c25dc6..61a3cb9 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -17,8 +17,6 @@ #include "aidge/quantization/PTQ/Clipping.hpp" #include "aidge/quantization/PTQ/CLE.hpp" #include "aidge/quantization/PTQ/PTQ.hpp" -#include "aidge/quantization/PTQ/PTQMetaOps.hpp" - #include "aidge/graph/GraphView.hpp" namespace py = pybind11; -- GitLab From 496491774df40049dcb9e11640514ba0de7956e2 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 15 Jan 2025 11:05:21 +0000 Subject: [PATCH 08/44] rebasing with dev --- include/aidge/quantization_version.h | 2 +- src/PTQ/PTQ.cpp | 48 ++++++++++++---------------- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 740621a..d773aa8 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "b4af1ce"; +static constexpr const char * PROJECT_GIT_HASH = "94747bf"; } #endif // VERSION_H diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 23d9f01..9dee442 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -283,6 +283,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); std::shared_ptr<Node> residualNode = Mul(residualNodeName); residualNode->attributes()->addAttr("isScaling", 0.0); + residualNode->attributes()->addAttr("isResidual", 0.0); //Adding the SF as a producer of the node std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {1.0}); @@ -944,43 +945,36 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u { // A merging node is always followed by a Quantizer node at this point - if (node->type() == "Quantizer") + if (node->type() == "Quantizer" && node->attributes()->hasAttr("isResidual")) { // check if the Quantizer is a residual one, and insert a compensation node if so ... + // create and insert the multplicative node before the Quantizer - bool prevNodeIsForking = ((node->getParent(0))->getChildren().size() > 1); - bool prevNodeIsAffine = isAffine(node->getParent(0)); - bool insertNode = prevNodeIsForking || !prevNodeIsAffine; - - if (insertNode) - { - // create and insert the multplicative node before the Quantizer - - std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); - std::shared_ptr<Node> mulNode = Mul(mulNodeName); - - mulNode->attributes()->addAttr("isCompensation",0.0); - mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) - mulNode->getOperator()->setBackend("cpu"); + std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); + std::shared_ptr<Node> mulNode = Mul(mulNodeName); + + mulNode->attributes()->addAttr("isCompensation",0.0); + mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + mulNode->getOperator()->setBackend("cpu"); - graphView->insertParent(node, mulNode, 0, 0, 0); + graphView->insertParent(node, mulNode, 0, 0, 0); - // Add the coeff producer to the multiplier node + // Add the coeff producer to the multiplier node - std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); - std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax}); - coeffProducer->getOperator()->setOutput(0, coeffTensor); + std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); + std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax}); + coeffProducer->getOperator()->setOutput(0, coeffTensor); - coeffProducer->getOperator()->setDataType(DataType::Float64); - coeffProducer->getOperator()->setBackend("cpu"); + coeffProducer->getOperator()->setDataType(DataType::Float64); + coeffProducer->getOperator()->setBackend("cpu"); - graphView->add(coeffProducer); // needed ? + graphView->add(coeffProducer); // needed ? - // Adapt the scaling factor value accordingly + // Adapt the scaling factor value accordingly - double currScalingFactor = getScalingFactor(node); - updateScalingFactor(node, currScalingFactor / signedMax); - } + double currScalingFactor = getScalingFactor(node); + updateScalingFactor(node, currScalingFactor / signedMax); + } } } -- GitLab From c9adaf08fdbbddd76a76e60d5811c2cc77660138 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Mon, 20 Jan 2025 15:53:11 +0000 Subject: [PATCH 09/44] Fixing isResidual bug in SSA when using tag; replacemnt of std::cout with Log::debug --- include/aidge/quantization_version.h | 2 +- src/PTQ/PTQ.cpp | 14 +++++++------- src/QAT/QAT_LSQ.cpp | 5 ++--- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index d773aa8..429e4bd 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "94747bf"; +static constexpr const char * PROJECT_GIT_HASH = "e464870"; } #endif // VERSION_H diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 9dee442..a81b2b7 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -14,7 +14,6 @@ #include "aidge/quantization/PTQ/PTQ.hpp" #include "aidge/operator/PTQMetaOps.hpp" - #include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/graph/Node.hpp" @@ -945,8 +944,9 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u { // A merging node is always followed by a Quantizer node at this point - if (node->type() == "Quantizer" && node->attributes()->hasAttr("isResidual")) + if (node->type() == "Quantizer" && (node->attributes()->hasAttr("isResidual") || !isAffine(node->getParent(0)))) { + // check if the Quantizer is a residual one, and insert a compensation node if so ... // create and insert the multplicative node before the Quantizer @@ -1048,7 +1048,7 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri auto scheduling = scheduler.getStaticScheduling(); for (auto node : scheduling) if (node->attributes()->hasAttr("isScaling")) - std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl; + Log::debug("{} range = {}",node->name(),valueRanges[node->name()]); } void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) @@ -1076,13 +1076,13 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, Log::info(" Computing the value ranges ..."); std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda); - //std::cout << " === RANGES (BEFORE ADJUST) ===" << std::endl; + //Log:debug("=== RANGES (BEFORE ADJUST) ==="); //printRanges(graphView, valueRanges); Log::info(" Optimizing the clipping values ..."); valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose); - //std::cout << " === RANGES (AFTER ADJUST) ===" << std::endl; + //Log:debug("=== RANGES (AFTER ADJUST) ==="); //printRanges(graphView, valueRanges); Log::info(" Normalizing the activations ..."); @@ -1103,7 +1103,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (verbose) printScalingFactors(graphView); - //std::cout << " === SCALINGS (BEFORE CAST) ===" << std::endl; + //Log::debug(" === SCALINGS (BEFORE CAST) ==="); //printScalingFactors(graphView); setupDataType(graphView, inputDataSet, initialDataType); @@ -1111,7 +1111,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (useCuda) graphView->setBackend("cuda"); - //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; + //Log::debug(" === SCALINGS (AFTER CAST) ==="); //printScalingFactors(graphView); Log::info(" Reseting the scheduler ..."); diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 04f2027..8a42770 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -13,7 +13,6 @@ #include "aidge/operator/LSQ.hpp" #include "aidge/operator/ReLU.hpp" - #include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/scheduler/SequentialScheduler.hpp" @@ -51,7 +50,7 @@ static bool initStepSize(std::shared_ptr<Node> quantizer) stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); - std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl; + Log::debug("[ INIT STEP SIZE = {} ]",stepSize); return false; } @@ -138,7 +137,7 @@ void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBi void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) { float mean = (tensor->mean()).get<float> (0); - std::cout << " MEAN = " << mean << std::endl; + Log::debug("MEAN = {}",mean); } } \ No newline at end of file -- GitLab From f1eb07af4e073ace093647ae7d80e4481d2eb9aa Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Fri, 17 Jan 2025 15:29:47 +0000 Subject: [PATCH 10/44] Starting Work on adding Scaling Nodes (Tagged Mul) below Producers --- aidge_quantization/_version.py | 2 +- include/aidge/quantization/PTQ/PTQ.hpp | 1 + src/PTQ/CLE.cpp | 43 +++++- src/PTQ/PTQ.cpp | 178 +++++++++++++++++++++---- 4 files changed, 193 insertions(+), 31 deletions(-) diff --git a/aidge_quantization/_version.py b/aidge_quantization/_version.py index d4ec20e..2d34d35 100644 --- a/aidge_quantization/_version.py +++ b/aidge_quantization/_version.py @@ -1,4 +1,4 @@ # file generated by setuptools_scm # don't change, don't track in version control __version__ = version = '0.2.1.dev60+g8044e79.d20250106' -__version_tuple__ = version_tuple = (0, 2, 1, 'dev60', 'g8044e79.d20250106') +__version_tuple__ = version_tuple = (0, 2, 1, 'dev60', 'g8044e79.d20250106') \ No newline at end of file diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index e7cbddd..74a49c8 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -66,6 +66,7 @@ namespace Aidge { * @return The scheduled vector of nodes */ std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> graphView, bool newSchedule = true, bool verbose = false); + bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView); /** * @brief Determine whether an input GraphView can be quantized or not. diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 0fe9575..d0383eb 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -130,17 +130,48 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD { std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; + std::cout << "CLE\n"; + std::cout << "node name is: " << n1->name() << std::endl; + std::cout << "node name is: " << n2->name() << std::endl; + std::cout << "node parent name is: " << n1->name() << std::endl; + std::cout << "node parent name is: " << n2->name() << std::endl; + + std::shared_ptr<Aidge::Tensor> n1localTensor, n2localTensor; + if(n1->getParent(1)->attributes()->hasAttr("isProducerScaling")) + { + std::static_pointer_cast<OperatorTensor>(n1->getParent(1)->getOperator())->getOutput(0)->print(); + n1localTensor = std::static_pointer_cast<OperatorTensor>(n1->getParent(1)->getOperator())->getOutput(0); + } + else + { + n1localTensor = getWeightTensor(n1); + } + + if(n2->getParent(1)->attributes()->hasAttr("isProducerScaling")) + { + n2localTensor = std::static_pointer_cast<OperatorTensor>(n2->getParent(1)->getOperator())->getOutput(0); + + } + else + { + n2localTensor = getWeightTensor(n2); + } + + double r1 = getTensorAbsoluteMax(n1localTensor); + double r2 = getTensorAbsoluteMax(n2localTensor); + std::cout << "valeur: " << r1 <<std::endl; + std::cout << "valeur: " << r2 <<std::endl; - double r1 = getTensorAbsoluteMax(getWeightTensor(n1)); - double r2 = getTensorAbsoluteMax(getWeightTensor(n2)); double s1 = std::sqrt(r1 * r2) / r1; double s2 = std::sqrt(r1 * r2) / r2; - rescaleTensor(getWeightTensor(n1), s1); - rescaleTensor(getWeightTensor(n2), s2); - - rescaleTensor(getBiasTensor(n1), s1); + //rescaleTensor(getWeightTensor(n1), s1); + insertScalingBelowProducer(n1->getParent(1),s1,graphView); + //rescaleTensor(getWeightTensor(n2), s2); + insertScalingBelowProducer(n2->getParent(1),s2,graphView); + //rescaleTensor(getBiasTensor(n1), s1); + insertScalingBelowProducer(n1->getParent(2),s1,graphView); double rangeDelta = std::abs(r1 - r2); if (rangeDelta > maxRangeDelta) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index a81b2b7..25e5f20 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -54,6 +54,120 @@ bool isMerging(std::shared_ptr<Node> node) { return (mergingNodeTypes.find(node->type()) != mergingNodeTypes.end()); } +static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> parentNode) +{ + int index = 0; + while (node->getParent(index) != parentNode) + index++; + return index; +} + +void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) +{ + if(node->type() == "Mul" && node->attributes()->hasAttr("isProducerScaling")) + { + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double previousScalingFactor = localTensor.get<double>(0); + std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); + node->input(1).first->getOperator()->setOutput(0, finalTensor); + } + else + { + Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); + } +} +bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView) +{ + std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); + roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + roundNode->getOperator()->setBackend("cpu"); + + if (node->getChildren().size() > 0) + { + // SCALING NODE INSERTION + + // We always have one output from Affine and Add nodes, but possibly multiple childs + std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); + + // For each node in nextNodes store the connexion index + std::vector<int> inputIndices(nextNodes.size()); + for (std::size_t i = 0; i < nextNodes.size(); i++) + inputIndices[i] = getInputIndex(nextNodes[i], node); + + for (std::shared_ptr<Node> nextNode : nextNodes) + node->removeChild(nextNode, 0); + + node->addChild(roundNode, 0, 0); + + for (std::size_t i = 0; i < nextNodes.size(); i++) + roundNode->addChild(nextNodes[i], 0, inputIndices[i]); + graphView->add(roundNode); + } + else + { + Log::warn("Unusual producer "); + node->addChild(roundNode, 0, 0); + graphView->add(roundNode); + } + return true; +} +bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView) +{ + if(node->attributes()->hasAttr("isProducerScaling")) + { + multiplyScalingFactor(node,sf); + return true; + } + if(node->type() != "Producer") + { + Log::warn(" Cannot apply a scaling factor on a node which is not a producer", node->type()); + return false; + } + std::string scalingNodeName = makeUniqueName(node->name() + "_ProducerScaling", graphView); + + std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName); + scalingNode->attributes()->addAttr("isProducerScaling",0.0); + + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {sf}); + std::shared_ptr<Node> scalingFactorProducer = addProducer(scalingNode, 1, {1}, "Factor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); + graphView->add(scalingFactorProducer); + + scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + scalingNode->getOperator()->setBackend("cpu"); + + if (node->getChildren().size() > 0) + { + // SCALING NODE INSERTION + + // We always have one output from Affine and Add nodes, but possibly multiple childs + std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); + + // For each node in nextNodes store the connexion index + std::vector<int> inputIndices(nextNodes.size()); + for (std::size_t i = 0; i < nextNodes.size(); i++) + inputIndices[i] = getInputIndex(nextNodes[i], node); + + for (std::shared_ptr<Node> nextNode : nextNodes) + node->removeChild(nextNode, 0); + + node->addChild(scalingNode, 0, 0); + + for (std::size_t i = 0; i < nextNodes.size(); i++) + scalingNode->addChild(nextNodes[i], 0, inputIndices[i]); + + graphView->add(scalingNode); + } + else + { + Log::warn("Unusual producer "); + node->addChild(scalingNode, 0, 0); + graphView->add(scalingNode); + } + return true; +} bool checkArchitecture(std::shared_ptr<GraphView> graphView) { @@ -167,6 +281,15 @@ static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::s return remainingNodes; } +static std::vector<std::shared_ptr<Node>> removeProdScalingNodes(std::vector<std::shared_ptr<Node>> nodeVector) +{ + std::vector<std::shared_ptr<Node>> remainingNodes; + for (std::shared_ptr<Node> node : nodeVector) + if (!node->attributes()->hasAttr("isProducerScaling")) + remainingNodes.push_back(node); + + return remainingNodes; +} static void fixScheduling(std::vector<std::shared_ptr<Node>>& nodeVector) { @@ -211,6 +334,7 @@ std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> fixScheduling(nodeVector); nodeVector = removeMatchingNodes(nodeVector, "Producer"); + nodeVector = removeProdScalingNodes(nodeVector); if (verbose) { @@ -300,13 +424,6 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) } } -static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> parentNode) -{ - int index = 0; - while (node->getParent(index) != parentNode) - index++; - return index; -} void insertScalingNodes(std::shared_ptr<GraphView> graphView) { @@ -429,7 +546,8 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); double scaling = getTensorAbsoluteMax(weightTensor); double ratio = 1.0 / scaling; - rescaleTensor(weightTensor, ratio); + //rescaleTensor(weightTensor, ratio); + insertScalingBelowProducer(node->getParent(1),ratio,graphView); // Accumulate the ratio if (node == firstNode) @@ -447,7 +565,8 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) if (nodeHasBias(node)) { std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - rescaleTensor(biasTensor, accumulatedRatios[node->name()] ); + //rescaleTensor(biasTensor, accumulatedRatios[node->name()] ); + insertScalingBelowProducer(node->getParent(2),accumulatedRatios[node->name()],graphView); } } @@ -606,7 +725,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st for (std::shared_ptr<Node> node : nodeVector) { // Seamless scaling factor propagation ... - + if (isAffine(node) || isSeamless(node) || node->type() == "ReLU") { if (node == firstNode) @@ -620,11 +739,13 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st } } + // Here prevNode is either a 'Affine' or a 'Merging' // => do not split the cases, just handle the bias ... if (node->attributes()->hasAttr("isScaling")) { + // retrieve the previous scaling factor ... std::shared_ptr<Node> prevNode = node->getParent(0); double prevScalingFactor = scalingFactors[prevNode->name()]; @@ -640,11 +761,13 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st if (isAffine(prevNode)) { + bool prevNodeHasBias = nodeHasBias(prevNode); if (prevNodeHasBias) - { + { std::shared_ptr<Tensor> biasTensor = getBiasTensor(prevNode); - rescaleTensor(biasTensor, 1.0 / prevScalingFactor); + //rescaleTensor(biasTensor, 1.0 / prevScalingFactor); + insertScalingBelowProducer(prevNode->getParent(2),1.0 / prevScalingFactor,graphView); } } } @@ -842,10 +965,12 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ // Rescale the weight tensor std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - rescaleTensor(weightTensor, signedMax); + //rescaleTensor(weightTensor, signedMax); + insertScalingBelowProducer(node->getParent(1),signedMax,graphView); if (!noQuant) - roundTensor(weightTensor); + insertRoundBelowProducer(node->getParent(1),graphView); + //roundTensor(weightTensor); // Rescale the bias tensor @@ -856,10 +981,12 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - rescaleTensor(biasTensor, rescaling); + //rescaleTensor(biasTensor, rescaling); + insertScalingBelowProducer(node->getParent(2),rescaling,graphView); if (!noQuant) - roundTensor(biasTensor); + insertRoundBelowProducer(node->getParent(2),graphView); + //roundTensor(biasTensor); } // Compensate the rescaling using the next Scaling node @@ -997,17 +1124,20 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool double ratio = base / approx; - std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - rescaleTensor(weightTensor, ratio); + //std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); + //rescaleTensor(weightTensor, ratio); + insertScalingBelowProducer(node->getParent(1),ratio,graphView); if (!noQuant) - roundTensor(weightTensor); + insertRoundBelowProducer(node->getParent(1),graphView); if (nodeHasBias(node)) { - std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - rescaleTensor(biasTensor, ratio); + //std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); + //rescaleTensor(biasTensor, ratio); + insertScalingBelowProducer(node->getParent(2),ratio,graphView); + if (!noQuant) - roundTensor(biasTensor); + insertRoundBelowProducer(node->getParent(2),graphView); } } } @@ -1084,7 +1214,6 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, //Log:debug("=== RANGES (AFTER ADJUST) ==="); //printRanges(graphView, valueRanges); - Log::info(" Normalizing the activations ..."); normalizeActivations(graphView, valueRanges); @@ -1143,7 +1272,8 @@ void clearBiases(std::shared_ptr<GraphView> graphView) for (std::shared_ptr<Node> node : graphView->getNodes()) { if (node->type() == "FC" || node->type() == "Conv2D") { std::shared_ptr<Tensor> biasTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2); - rescaleTensor(biasTensor, 0); + //rescaleTensor(biasTensor, 0); + insertScalingBelowProducer(node->getParent(2),0,graphView); } } } -- GitLab From cf51e87cbaf34ea4372a2cfdf64ce9d32b3bfc28 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Mon, 20 Jan 2025 14:22:55 +0000 Subject: [PATCH 11/44] Correction the Single Shift Approximation error with the new method for updating weight and bias --- include/aidge/quantization_version.h | 2 +- src/PTQ/PTQ.cpp | 13 ++++++++++++- src/operator/PTQMetaOps.cpp | 16 ++++++++-------- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 429e4bd..37853e3 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "e464870"; +static constexpr const char * PROJECT_GIT_HASH = "03286c7"; } #endif // VERSION_H diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 25e5f20..fe2aef4 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -111,18 +111,27 @@ bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphVi node->addChild(roundNode, 0, 0); graphView->add(roundNode); } + roundNode->attributes()->addAttr("isProducerRounding",0.0); return true; } bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView) { + if(node->attributes()->hasAttr("isProducerRounding")) + { + //In this case we 'bump' the node to the one above him (an actual ProducerScaling) + // because the round node is not usable (only used when SSA is enabled) + node = node->getParent(0); + } if(node->attributes()->hasAttr("isProducerScaling")) { + // We accumulate the multiples scaling factors by multiplying the SF of the ProducerScaling node + // (adding new nodes each time would make the graph unusable) multiplyScalingFactor(node,sf); return true; } if(node->type() != "Producer") { - Log::warn(" Cannot apply a scaling factor on a node which is not a producer", node->type()); + Log::warn(" Cannot apply a scaling factor on a node which is not a producer on a node of type {} whose name is {}", node->type(),node->name()); return false; } std::string scalingNodeName = makeUniqueName(node->name() + "_ProducerScaling", graphView); @@ -1126,6 +1135,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool //std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); //rescaleTensor(weightTensor, ratio); + Log::warn("A\n"); insertScalingBelowProducer(node->getParent(1),ratio,graphView); if (!noQuant) insertRoundBelowProducer(node->getParent(1),graphView); @@ -1134,6 +1144,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool { //std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); //rescaleTensor(biasTensor, ratio); + Log::warn("B\n"); insertScalingBelowProducer(node->getParent(2),ratio,graphView); if (!noQuant) diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index facfed2..105d4e8 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -75,8 +75,8 @@ static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, st void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) { - if(metaOpNode->type() != "Quantizer") - Log::warn(" Cannot update the scaling factor on Node of type {}", metaOpNode->type()); + if(metaOpNode->type() != "Scaling" && metaOpNode->type() != "Quantizer") + Log::warn("Cannot update the scaling factor on Node of type {}", metaOpNode->type()); std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); @@ -85,15 +85,15 @@ void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); if (!mulNode) - Log::warn(" Invalid PTQ MetaOperator, no Mul node found inside ! "); + Log::warn("Invalid PTQ MetaOperator, no Mul node found inside ! "); mulNode->input(1).first->getOperator()->setOutput(0, scalingFactorTensor); } double getScalingFactor(std::shared_ptr<Node> MetaOpNode) { - if (MetaOpNode->type() != "Quantizer") { - Log::warn(" Cannot get the scaling factor on Node of type {}", MetaOpNode->type()); + if (MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") { + Log::warn("Cannot get the scaling factor on Node of type {}", MetaOpNode->type()); return 0; } @@ -102,7 +102,7 @@ double getScalingFactor(std::shared_ptr<Node> MetaOpNode) std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); if (!mulNode) { - Log::warn(" Invalid PTQ MetaOperator, no Mul found inside node of type {}", MetaOpNode->type()); + Log::warn("Invalid PTQ MetaOperator, no Mul found inside node of type {}", MetaOpNode->type()); return 0; } @@ -117,7 +117,7 @@ double getScalingFactor(std::shared_ptr<Node> MetaOpNode) void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max) { if (quantizerNode->type() != "Quantizer") { - Log::warn(" Cannot set the clipping range on Node of type {}", quantizerNode->type()); + Log::warn("Cannot set the clipping range on Node of type {}", quantizerNode->type()); return; } @@ -126,7 +126,7 @@ void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max) std::shared_ptr<Node> clipNode = getSubNode(metaOp->getMicroGraph(), "Clip"); if (!clipNode) { - Log::warn(" Invalid PTQ MetaOperator, no Clip found inside node of type {}", quantizerNode->type()); + Log::warn("Invalid PTQ MetaOperator, no Clip found inside node of type {}", quantizerNode->type()); return; } -- GitLab From a749505df0e1632345a5ddfb2fd6f38436ab9f83 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Tue, 21 Jan 2025 14:15:26 +0000 Subject: [PATCH 12/44] Fixing bug related to the lower result in resnet(switching the network to float64 solved it --- include/aidge/quantization_version.h | 2 +- src/PTQ/CLE.cpp | 1 + src/PTQ/PTQ.cpp | 22 +--------------------- 3 files changed, 3 insertions(+), 22 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 37853e3..2e53dfc 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "03286c7"; +static constexpr const char * PROJECT_GIT_HASH = "01880af"; } #endif // VERSION_H diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index d0383eb..d47a2c2 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -14,6 +14,7 @@ #include "aidge/quantization/PTQ/PTQ.hpp" #include "aidge/graph/GraphView.hpp" + #include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/scheduler/Scheduler.hpp" #include "aidge/utils/Log.hpp" diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index fe2aef4..60326e8 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -363,7 +363,6 @@ static std::shared_ptr<Node> getFirstNode(std::shared_ptr<GraphView> graphView) void prepareNetwork(std::shared_ptr<GraphView> graphView) { removeFlatten(graphView); - sanitizeNodeNames(graphView); bool containsBatchNorm = false; @@ -972,30 +971,23 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ if (isAffine(node)) { // Rescale the weight tensor - std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - //rescaleTensor(weightTensor, signedMax); insertScalingBelowProducer(node->getParent(1),signedMax,graphView); if (!noQuant) insertRoundBelowProducer(node->getParent(1),graphView); - //roundTensor(weightTensor); // Rescale the bias tensor - if (nodeHasBias(node)) { bool inputIsUnsigned = signMap[node->name()].first; double rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax; - - + std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - //rescaleTensor(biasTensor, rescaling); insertScalingBelowProducer(node->getParent(2),rescaling,graphView); if (!noQuant) insertRoundBelowProducer(node->getParent(2),graphView); - //roundTensor(biasTensor); } // Compensate the rescaling using the next Scaling node @@ -1133,18 +1125,12 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool double ratio = base / approx; - //std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - //rescaleTensor(weightTensor, ratio); - Log::warn("A\n"); insertScalingBelowProducer(node->getParent(1),ratio,graphView); if (!noQuant) insertRoundBelowProducer(node->getParent(1),graphView); if (nodeHasBias(node)) { - //std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - //rescaleTensor(biasTensor, ratio); - Log::warn("B\n"); insertScalingBelowProducer(node->getParent(2),ratio,graphView); if (!noQuant) @@ -1289,10 +1275,4 @@ void clearBiases(std::shared_ptr<GraphView> graphView) } } -void devPTQ(std::shared_ptr<GraphView> graphView) -{ - for (std::shared_ptr<Node> node : graphView->getNodes()) - fmt::println(" UUU : {}", node->name()); -} - } -- GitLab From 5ec65431e486d00adb4ca7ac432786a0b7467858 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 22 Jan 2025 10:27:01 +0000 Subject: [PATCH 13/44] Rebasing on dev --- include/aidge/quantization_version.h | 2 +- src/PTQ/CLE.cpp | 9 +-------- src/PTQ/PTQ.cpp | 22 +--------------------- 3 files changed, 3 insertions(+), 30 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 2e53dfc..5a7e98b 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "01880af"; +static constexpr const char * PROJECT_GIT_HASH = "a749505"; } #endif // VERSION_H diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index d47a2c2..52e4ec0 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -45,7 +45,7 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) mulOp.setDataType(tensor->dataType()); mulOp.setBackend(tensor->backend()); - std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling}); scalingTensor->setDataType(tensor->dataType()); scalingTensor->setBackend(tensor->backend()); @@ -131,11 +131,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD { std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; - std::cout << "CLE\n"; - std::cout << "node name is: " << n1->name() << std::endl; - std::cout << "node name is: " << n2->name() << std::endl; - std::cout << "node parent name is: " << n1->name() << std::endl; - std::cout << "node parent name is: " << n2->name() << std::endl; std::shared_ptr<Aidge::Tensor> n1localTensor, n2localTensor; if(n1->getParent(1)->attributes()->hasAttr("isProducerScaling")) @@ -160,8 +155,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD double r1 = getTensorAbsoluteMax(n1localTensor); double r2 = getTensorAbsoluteMax(n2localTensor); - std::cout << "valeur: " << r1 <<std::endl; - std::cout << "valeur: " << r2 <<std::endl; double s1 = std::sqrt(r1 * r2) / r1; diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 60326e8..108be02 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -26,11 +26,8 @@ #include "aidge/operator/ReLU.hpp" #include "aidge/operator/BatchNorm.hpp" #include "aidge/operator/Conv.hpp" - #include "aidge/operator/ArgMax.hpp" -#include "aidge/operator/Abs.hpp" #include "aidge/operator/Reshape.hpp" -#include "aidge/operator/Round.hpp" #include "aidge/recipes/Recipes.hpp" @@ -64,7 +61,7 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) { - if(node->type() == "Mul" && node->attributes()->hasAttr("isProducerScaling")) + if(node->type() == "Mul" && (node->attributes()->hasAttr("isProducerScaling") || node->attributes()->hasAttr("isScaling"))) { auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); std::shared_ptr<Tensor> fallback; @@ -194,23 +191,6 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } -void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) -{ - if(node->type() == "Mul" && node->attributes()->hasAttr("isScaling")) - { - auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); - std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); - double previousScalingFactor = localTensor.get<double>(0); - std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); - node->input(1).first->getOperator()->setOutput(0, finalTensor); - } - else - { - Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); - } -} - static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) { auto mulOp = Mul_Op(); -- GitLab From c374ce49cd3a60cab4521c1fb4b10abc8d1e6f43 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 22 Jan 2025 13:06:51 +0000 Subject: [PATCH 14/44] Correcting Log::warn into AIDGE_ASSERT to make the code safer --- include/aidge/quantization/PTQ/PTQ.hpp | 21 ++++- include/aidge/quantization_version.h | 2 +- src/PTQ/PTQ.cpp | 110 +++++++++---------------- 3 files changed, 61 insertions(+), 72 deletions(-) diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index 74a49c8..e1ef529 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -66,7 +66,26 @@ namespace Aidge { * @return The scheduled vector of nodes */ std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> graphView, bool newSchedule = true, bool verbose = false); - bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView); + + /** + * @brief Inserts a scaling node below the given producer node in the graph view. + * If the node is already a producer scaling node, it accumulates the scaling factor by multiplyins its value directly. + * + * @param node A shared pointer to the producer node where the scaling node will be inserted (below). + * @param scalingFactor The scaling factor to apply. + * @param graphView A shared pointer to the graph view in which the nodes are located. + * @return True if the scaling node was successfully inserted or the scaling factor was accumulated; False otherwise. + */ + bool insertScalingBelowProducer(std::shared_ptr<Node> node, double scalingFactor, std::shared_ptr<GraphView> graphView); + + /** + * @brief Inserts a rounding node below the given producer (also below its ows producerScaling) node in the graph view. + * + * @param node A shared pointer to the producer node where the rounding node will be inserted. + * @param graphView A shared pointer to the graph view in which the nodes are located. + * @return True if the rounding node was successfully inserted; False otherwise. + */ + bool insertRoundBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphView> graphView); /** * @brief Determine whether an input GraphView can be quantized or not. diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 5a7e98b..9b4e3de 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "a749505"; +static constexpr const char * PROJECT_GIT_HASH = "5ec6543"; } #endif // VERSION_H diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 108be02..bda0ae1 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -61,57 +61,39 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) { - if(node->type() == "Mul" && (node->attributes()->hasAttr("isProducerScaling") || node->attributes()->hasAttr("isScaling"))) - { - auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); - std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); - double previousScalingFactor = localTensor.get<double>(0); - std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); - node->input(1).first->getOperator()->setOutput(0, finalTensor); - } - else - { - Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); - } + AIDGE_ASSERT(node->type() == "Mul" && (node->attributes()->hasAttr("isProducerScaling") || node->attributes()->hasAttr("isScaling")), + "Cannot update the scaling factor on Node of type {} with no scaling tag",node->type()); + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double previousScalingFactor = localTensor.get<double>(0); + std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); + node->input(1).first->getOperator()->setOutput(0, finalTensor); } bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView) { std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) roundNode->getOperator()->setBackend("cpu"); - - if (node->getChildren().size() > 0) - { - // SCALING NODE INSERTION + AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a scaling node."); + std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); + std::vector<int> inputIndices(nextNodes.size()); + for (std::size_t i = 0; i < nextNodes.size(); i++) + inputIndices[i] = getInputIndex(nextNodes[i], node); - // We always have one output from Affine and Add nodes, but possibly multiple childs - std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); + for (std::shared_ptr<Node> nextNode : nextNodes) + node->removeChild(nextNode, 0); - // For each node in nextNodes store the connexion index - std::vector<int> inputIndices(nextNodes.size()); - for (std::size_t i = 0; i < nextNodes.size(); i++) - inputIndices[i] = getInputIndex(nextNodes[i], node); - - for (std::shared_ptr<Node> nextNode : nextNodes) - node->removeChild(nextNode, 0); - - node->addChild(roundNode, 0, 0); + node->addChild(roundNode, 0, 0); - for (std::size_t i = 0; i < nextNodes.size(); i++) - roundNode->addChild(nextNodes[i], 0, inputIndices[i]); - graphView->add(roundNode); - } - else - { - Log::warn("Unusual producer "); - node->addChild(roundNode, 0, 0); + for (std::size_t i = 0; i < nextNodes.size(); i++) + roundNode->addChild(nextNodes[i], 0, inputIndices[i]); graphView->add(roundNode); - } + roundNode->attributes()->addAttr("isProducerRounding",0.0); return true; } -bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView) +bool insertScalingBelowProducer(std::shared_ptr<Node> node,double scalingFactor, std::shared_ptr<GraphView> graphView) { if(node->attributes()->hasAttr("isProducerRounding")) { @@ -123,55 +105,39 @@ bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::share { // We accumulate the multiples scaling factors by multiplying the SF of the ProducerScaling node // (adding new nodes each time would make the graph unusable) - multiplyScalingFactor(node,sf); + multiplyScalingFactor(node,scalingFactor); return true; } - if(node->type() != "Producer") - { - Log::warn(" Cannot apply a scaling factor on a node which is not a producer on a node of type {} whose name is {}", node->type(),node->name()); - return false; - } + AIDGE_ASSERT(node->type() == "Producer","Cannot apply a scaling factor on node of type: {} which is not a producer", node->type()); std::string scalingNodeName = makeUniqueName(node->name() + "_ProducerScaling", graphView); std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName); scalingNode->attributes()->addAttr("isProducerScaling",0.0); - std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {sf}); + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); std::shared_ptr<Node> scalingFactorProducer = addProducer(scalingNode, 1, {1}, "Factor"); scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); graphView->add(scalingFactorProducer); scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) scalingNode->getOperator()->setBackend("cpu"); + AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a scaling node."); + std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); - if (node->getChildren().size() > 0) - { - // SCALING NODE INSERTION + // For each node in nextNodes store the connexion index + std::vector<int> inputIndices(nextNodes.size()); + for (std::size_t i = 0; i < nextNodes.size(); i++) + inputIndices[i] = getInputIndex(nextNodes[i], node); - // We always have one output from Affine and Add nodes, but possibly multiple childs - std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); + for (std::shared_ptr<Node> nextNode : nextNodes) + node->removeChild(nextNode, 0); - // For each node in nextNodes store the connexion index - std::vector<int> inputIndices(nextNodes.size()); - for (std::size_t i = 0; i < nextNodes.size(); i++) - inputIndices[i] = getInputIndex(nextNodes[i], node); - - for (std::shared_ptr<Node> nextNode : nextNodes) - node->removeChild(nextNode, 0); + node->addChild(scalingNode, 0, 0); - node->addChild(scalingNode, 0, 0); + for (std::size_t i = 0; i < nextNodes.size(); i++) + scalingNode->addChild(nextNodes[i], 0, inputIndices[i]); - for (std::size_t i = 0; i < nextNodes.size(); i++) - scalingNode->addChild(nextNodes[i], 0, inputIndices[i]); - - graphView->add(scalingNode); - } - else - { - Log::warn("Unusual producer "); - node->addChild(scalingNode, 0, 0); - graphView->add(scalingNode); - } + graphView->add(scalingNode); return true; } @@ -1254,5 +1220,9 @@ void clearBiases(std::shared_ptr<GraphView> graphView) } } } - +void devPTQ(std::shared_ptr<GraphView> graphView) +{ + for (std::shared_ptr<Node> node : graphView->getNodes()) + Log::debug(" UUU : {}", node->name()); +} } -- GitLab From f255dcad5d34ec2f62eacd52301c7b9377ef4b29 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 22 Jan 2025 14:36:02 +0000 Subject: [PATCH 15/44] Changing the CLE to fit with the new method of ProducerScaling --- include/aidge/quantization_version.h | 2 +- src/PTQ/CLE.cpp | 38 ++++++++++------------------ 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 9b4e3de..eba0eab 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "5ec6543"; +static constexpr const char * PROJECT_GIT_HASH = "c374ce4"; } #endif // VERSION_H diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 52e4ec0..eb5ca7a 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -94,6 +94,16 @@ static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) return flatTensor->get<double>(maxIndex); } +//Function used to extraxt the local tensor (from a ProducerScalingNode) +std::shared_ptr<Aidge::Tensor> getLocalTensor(std::shared_ptr<Node> node) { + if (node->getParent(1)->attributes()->hasAttr("isProducerScaling")) { + std::shared_ptr<Aidge::OperatorTensor> operatorTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getOperator()); + operatorTensor->forward();// We need the forward pass to compute the scaled value of the Tensor + return operatorTensor->getOutput(0); + } else { + return getWeightTensor(node); + } +} void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta) { @@ -132,39 +142,17 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; - std::shared_ptr<Aidge::Tensor> n1localTensor, n2localTensor; - if(n1->getParent(1)->attributes()->hasAttr("isProducerScaling")) - { - std::static_pointer_cast<OperatorTensor>(n1->getParent(1)->getOperator())->getOutput(0)->print(); - n1localTensor = std::static_pointer_cast<OperatorTensor>(n1->getParent(1)->getOperator())->getOutput(0); - } - else - { - n1localTensor = getWeightTensor(n1); - } - - if(n2->getParent(1)->attributes()->hasAttr("isProducerScaling")) - { - n2localTensor = std::static_pointer_cast<OperatorTensor>(n2->getParent(1)->getOperator())->getOutput(0); - - } - else - { - n2localTensor = getWeightTensor(n2); - } - + std::shared_ptr<Aidge::Tensor> n1localTensor = getLocalTensor(n1); + std::shared_ptr<Aidge::Tensor> n2localTensor = getLocalTensor(n2); + double r1 = getTensorAbsoluteMax(n1localTensor); double r2 = getTensorAbsoluteMax(n2localTensor); - double s1 = std::sqrt(r1 * r2) / r1; double s2 = std::sqrt(r1 * r2) / r2; - //rescaleTensor(getWeightTensor(n1), s1); insertScalingBelowProducer(n1->getParent(1),s1,graphView); - //rescaleTensor(getWeightTensor(n2), s2); insertScalingBelowProducer(n2->getParent(1),s2,graphView); - //rescaleTensor(getBiasTensor(n1), s1); insertScalingBelowProducer(n1->getParent(2),s1,graphView); double rangeDelta = std::abs(r1 - r2); -- GitLab From 58753e8843dc3d328bca8471df72fc51a3855349 Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Wed, 29 Jan 2025 14:09:27 +0000 Subject: [PATCH 16/44] initial commit, first 'working version' with all debug artifacts present, TODO --- include/aidge/recipes/ONNXRecipes.hpp | 31 +++ python_binding/pybind_Quantization.cpp | 2 + python_binding/recipes/pybind_ONNXRecipes.cpp | 28 ++ setup.py | 2 +- src/recipes/ONNXRecipes.cpp | 248 ++++++++++++++++++ 5 files changed, 310 insertions(+), 1 deletion(-) create mode 100644 include/aidge/recipes/ONNXRecipes.hpp create mode 100644 python_binding/recipes/pybind_ONNXRecipes.cpp create mode 100644 src/recipes/ONNXRecipes.cpp diff --git a/include/aidge/recipes/ONNXRecipes.hpp b/include/aidge/recipes/ONNXRecipes.hpp new file mode 100644 index 0000000..d5d9281 --- /dev/null +++ b/include/aidge/recipes/ONNXRecipes.hpp @@ -0,0 +1,31 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_QUANTIZATION_RECIPES_ONNXRECIPES_H_ +#define AIDGE_QUANTIZATION_RECIPES_ONNXRECIPES_H_ + +#include <memory> + +#include "aidge/graph/GraphView.hpp" +#include "aidge/graph/Matching.hpp" +#include "aidge/operator/MetaOperator.hpp" + +namespace Aidge { + /** + * @brief Prepare a Aidge model for ONNX export: regroup aidge nodes into quantizelinear,dequantizelinear or qlinearconv operators. + * @param graphView The GraphView to process. + * @param qop if true indicates inclusion of metaoperator qlinearconv, if false only quantizelinear and dequantizelinear will be created + */ +void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop); + +} + +#endif //AIDGE_QUANTIZATION_RECIPES_ONNXRECIPES_H_ diff --git a/python_binding/pybind_Quantization.cpp b/python_binding/pybind_Quantization.cpp index 7ac344d..b91b0b8 100644 --- a/python_binding/pybind_Quantization.cpp +++ b/python_binding/pybind_Quantization.cpp @@ -34,6 +34,7 @@ void init_PTQ(py::module &m); void init_QAT_FixedQ(py::module &m); void init_QAT_LSQ(py::module &m); void init_QuantRecipes(py::module &m); +void init_ONNXRecipes(py::module &m); void init_QuantizationVersionInfo(py::module &m); @@ -48,6 +49,7 @@ PYBIND11_MODULE(aidge_quantization, m) init_QAT_FixedQ(m); init_QAT_LSQ(m); init_QuantRecipes(m); + init_ONNXRecipes(m); init_QuantizationVersionInfo(m); } diff --git a/python_binding/recipes/pybind_ONNXRecipes.cpp b/python_binding/recipes/pybind_ONNXRecipes.cpp new file mode 100644 index 0000000..0e5c8e9 --- /dev/null +++ b/python_binding/recipes/pybind_ONNXRecipes.cpp @@ -0,0 +1,28 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> +#include <pybind11/stl.h> + +#include "aidge/recipes/ONNXRecipes.hpp" +#include "aidge/graph/GraphView.hpp" + +namespace py = pybind11; + +namespace Aidge { + +void init_ONNXRecipes(py::module &m) { + + m.def("quantize_matching_to_export", &quantizeMatchingtoExport, py::arg("graphView"), py::arg("qop")=true); + +} + +} // namespace Aidge diff --git a/setup.py b/setup.py index 8774d01..45ec292 100644 --- a/setup.py +++ b/setup.py @@ -74,7 +74,7 @@ class AidgePkgBuild(build_ext): str(cwd), f"-DTEST={test_onoff}", f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}", - f"-DCMAKE_BUILD_TYPE={compile_type}", + "-DCMAKE_BUILD_TYPE=Debug", #f"-DCMAKE_BUILD_TYPE={compile_type}", "-DPYBIND=ON", f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}", "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp new file mode 100644 index 0000000..f067ea8 --- /dev/null +++ b/src/recipes/ONNXRecipes.cpp @@ -0,0 +1,248 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <memory> + +#include "aidge/recipes/ONNXRecipes.hpp" +#include "aidge/graph/Matching.hpp" +#include "aidge/operator/MetaOperator.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/graph/Node.hpp" + +#include "aidge/operator/Mul.hpp" +#include "aidge/operator/Cast.hpp" +#include "aidge/operator/Producer.hpp" +#include "aidge/operator/MetaOperator.hpp" + +#include "aidge/recipes/Recipes.hpp" + +namespace Aidge { + +void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ + const auto quantizeMatches = SinglePassGraphMatching(graphView).match("Mul#0->Round?;Mul#0<-Producer#0;Mul#0<1-Producer#1"); + //verify if there are matches AIDGE ASSERT + int nbfusions = 0; + + Log::info("Init"); + //QuantizeLinear Creation + for (const auto& match : quantizeMatches) { + Log::info("Init 1 loop"); + std::shared_ptr<Node> quantMulProd = nullptr; + std::shared_ptr<Node> quantMulOp = nullptr; + for (const auto& node: match.graph->getNodes()){ + if(node->type() == "Mul"){ + quantMulOp = node; + Log::info("got mul"); + break; + } + } + + if(quantMulOp == nullptr){//make and AIDGEASSERT + Log::info("MulOp : nullptr error"); + } + + if (!(quantMulOp->attributes()->hasAttr("isScaling")) && !(quantMulOp->attributes()->hasAttr("isProducerScaling"))){ + Log::info("match skipped"); + continue; + } + match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/catched_graph"); + + auto quantizeLinearSubGraph = *SinglePassGraphMatching(match.graph->clone()).match("Mul#0->Round?").begin(); + + auto quantMetaOp = MetaOperator("QuantizeLinear", quantizeLinearSubGraph.graph); + Log::info("upper midle 1 loop"); + + //creation and addition of dequantizes linear + Log::info("upper middle 1 loop"); + + quantMulProd = quantMulOp->getParent(1)->clone();//ediit + const auto& opTensorqwerty = quantMulProd->getOperator();//error here? + + Log::info("middle middle 1 loop"); + const auto opTensor = std::static_pointer_cast<OperatorTensor>(opTensorqwerty); + // const auto opTensor = std::static_pointer_cast<OperatorTensor>(quantMulOp->getParent(0)->getOperator()); + Log::info("encapsulate static_pointer_cast 1 loop"); + const auto quantizeSF = opTensor->getOutput(0); + + auto castNode = Cast(quantizeSF->dataType()); + auto mulNode = Mul(); + Log::info("middle 1 loop"); + + const auto tempTensor = Tensor(Array1D<float, 1>{1}); + //Dequantize Scaling factor is the inverse of quatize scaling factor + const Tensor dequantizeSF = tempTensor / *quantizeSF; + + auto mulProd = Producer(std::make_shared<Tensor>(dequantizeSF)); + castNode->addChild(mulNode,0,0); + mulProd->addChild(mulNode,0,1); + + auto dequantMetaOp = MetaOperator("DequantizeLinear", getConnectedGraphView(mulNode)); + + quantMetaOp->addChild(dequantMetaOp,0,0); + //Graphview of quantize->dequantize is made to be able to replace without it possibly changing the inputs + // auto metaOpGraph = std::make_shared<GraphView>(getConnectedGraphView(dequantMetaOp)); + auto metaOpGraph = getConnectedGraphView(dequantMetaOp);//?????? + Log::info("end 1 loop"); + + const auto mulRoundgraph = *SinglePassGraphMatching(match.graph).match("Mul#0->Round?").begin(); + + match.graph->replace(mulRoundgraph.graph, metaOpGraph); + nbfusions++; + } + Log::info("fusioned {} out of {} possible matches", nbfusions,quantizeMatches.size()); + + graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/middlegraph"); + + if(qop){ + const auto wholeQlinearMatches = SinglePassGraphMatching(graphView).match( + //Query is subject to change as quantization operators change + //Query needs to be modified so it takes into account conv2d->quantizelinear, need an OR operand in query--------------- + "Conv2D#0<1-DequantizeLinear#0<-QuantizeLinear#0<1-Producer#0;" + "Conv2D#0<2-DequantizeLinear#1?<-QuantizeLinear#1?<-Producer#1?;" + "Conv2D#0<2-DequantizeLinear#1?<-QuantizeLinear#1?<1-Producer#2?;" + "Conv2D#0->Quantizer" + ); + // CHECK for matches--------------------- + Log::info("found : {} ",wholeQlinearMatches.size()); + for (const auto match : wholeQlinearMatches) { + + for (const auto& node: match.graph->getNodes()){ + //Search the convolution node and look for bias presence + if((node->type() == "Conv2D") && (node->getParents().size() > 2)){ + Log::info("start calc"); + //bias and bias scaling factor have to be modified so it corresponds to ONNX's bias scaling factor formula: biasSF = inputSF * weightSF + + const auto inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1});//TEMP: placeholder while quantizer node is not present at the input of convolution node + //TODO better getter + Log::info("inpusf calc"); + + match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/qlineargraph"); + Log::info("middle middle middle calc"); + + const auto weightSFTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getParent(0)->getParent(1)->getOperator())->getOutput(0); + Log::info("dims: {}",weightSFTensor->dims()); + inputSFTensor->setDataType(weightSFTensor->dataType()); + + Log::info("middle middle calc"); + + const auto& biasProd = node->getParent(2)->getParent(0)->getParent(0); + const auto& biasSFProd = node->getParent(2)->getParent(0)->getParent(1); + Log::info("middle calc"); + const auto biasTensor = std::static_pointer_cast<OperatorTensor>(biasProd->getOperator())->getOutput(0); + const auto biasSFTensor = std::static_pointer_cast<OperatorTensor>(biasSFProd->getOperator())->getOutput(0); + Log::info("middle down calc"); + + const Tensor newBiasSFTensor = *inputSFTensor* *weightSFTensor; + const Tensor newBiasTensor = (*biasSFTensor* *biasTensor)/newBiasSFTensor; + Log::info("down up"); + + biasProd->getOperator()->setOutput(0,std::make_shared<Tensor>(newBiasTensor)); + biasSFProd->getOperator()->setOutput(0,std::make_shared<Tensor>(newBiasSFTensor)); + + // const auto newBiasSFProd = Producer(std::make_shared<Tensor>(newBiasSFTensor),(!biasSFProd->name().empty()) ? biasSFProd->name(): ""); + // const auto newBiasProd = Producer(std::make_shared<Tensor>(newBiasTensor),(!biasProd->name().empty()) ? biasProd->name(): ""); + + // //replace with sets is not encouraged but because of these nodes being producers(0 input and only 1 output), input order should not be an issue + // match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/beforeReplace"); + + // match.graph->replace(std::set<NodePtr>{biasProd},std::set<NodePtr>{newBiasProd}); + // match.graph->replace(std::set<NodePtr>{biasSFProd},std::set<NodePtr>{newBiasSFProd}); + match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/afterSetReplace"); + + Log::info("end calc"); + break;//only one conv per match + } + Log::info("loop"); + } + Log::info("qlinearrr"); + // auto qlinearMatchs = SinglePassGraphMatching(match.graph->clone()).match("Conv2D#0<1-DequantizeLinear#0; Conv2D#0<2-DequantizeLinear#1?; Conv2D#0->Quantizer"); + + // Log::info("matches: {}", qlinearMatchs.size()); + // auto onlyMatch = *qlinearMatchs.begin(); + // auto nodes = onlyMatch.graph->getOrderedInputs(); + + // Log::info("nodes: {}", nodes.size()); + // onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/qlinearmatches"); + // //-------------- + // //Aidge export function to ONNX only accepts an especific order of inputs + // // const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newOrder = {nodes[0], + // // nodes[5], + // // nodes[6], + // // nodes[1], + // // nodes[2], + // // nodes[3], + // // nodes[4]}; + + // // const auto newGraph = onlyMatch.graph->clone(); + // // newGraph->setOrderedInputs(newOrder); + // //*--------------- + + // auto qlinearMetaOp = MetaOperator("QlinearConv", onlyMatch.graph->clone()); + // auto metaOpGraph = std::make_shared<GraphView>(); + // metaOpGraph->add(qlinearMetaOp, false); + // metaOpGraph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/metaopgraph"); + + // Log::info("final step", nodes.size()); + // match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/preQlinear"); + + // const bool qlinearReplaced = GraphView::replace(onlyMatch.graph, metaOpGraph); + + // if (!qlinearReplaced) Log::info("Not replaced"); + //------------------------------------- + auto qlinearMatchs = SinglePassGraphMatching(match.graph).match("Conv2D#0<1-DequantizeLinear#0; Conv2D#0<2-DequantizeLinear#1?; Conv2D#0->Quantizer"); + + Log::info("matches: {}", qlinearMatchs.size()); + auto onlyMatch = *qlinearMatchs.begin(); + auto orderedIn = onlyMatch.graph->getOrderedInputs(); + const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newOrder = {orderedIn[0], + orderedIn[3], + orderedIn[4], + orderedIn[1], + orderedIn[2]}; + onlyMatch.graph->setOrderedInputs(newOrder); + auto qlinearMetaOp = MetaOperator("QLinearConv", onlyMatch.graph->clone()); + auto metaOpGraph = std::make_shared<GraphView>(); + metaOpGraph->add(qlinearMetaOp, false); + + const bool qlinearReplaced = GraphView::replace(onlyMatch.graph, metaOpGraph); + + if (!qlinearReplaced) Log::info("Not replaced"); + + + //----------------------------- + + // match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/couldQlinear"); + // Aidge::fuseToMetaOps(match.graph,"Conv2D#0<1-DequantizeLinear#0; Conv2D#0<2-DequantizeLinear#1?; Conv2D#0->Quantizer","QLinearConv"); + + // for(const auto& graphnode: graphView->getNodes()){ + // if(graphnode->type() == "QLinearConv"){ + // auto micrograph = std::static_pointer_cast<MetaOperator_Op>(graphnode->getOperator())->getMicroGraph(); + // micrograph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/oldOrderedMicrograph"); + + // auto orderedIn = micrograph->getOrderedInputs(); + // const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newOrder = {orderedIn[0], + // orderedIn[3], + // orderedIn[4], + // orderedIn[1], + // orderedIn[2]}; + // micrograph->setOrderedInputs(newOrder); + // micrograph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/newOrderedMicrograph"); + + + // } + // } + + match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/maybeQlinear"); + } + } + + } +} \ No newline at end of file -- GitLab From 4831bec7264e025946f771ec01ff7e35b7a7117f Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Wed, 29 Jan 2025 14:36:57 +0000 Subject: [PATCH 17/44] removed commented test code, debug save models,simplified one operation --- src/recipes/ONNXRecipes.cpp | 88 +++---------------------------------- 1 file changed, 7 insertions(+), 81 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index f067ea8..f9e20b4 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -53,7 +53,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ Log::info("match skipped"); continue; } - match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/catched_graph"); auto quantizeLinearSubGraph = *SinglePassGraphMatching(match.graph->clone()).match("Mul#0->Round?").begin(); @@ -63,12 +62,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ //creation and addition of dequantizes linear Log::info("upper middle 1 loop"); - quantMulProd = quantMulOp->getParent(1)->clone();//ediit - const auto& opTensorqwerty = quantMulProd->getOperator();//error here? - - Log::info("middle middle 1 loop"); - const auto opTensor = std::static_pointer_cast<OperatorTensor>(opTensorqwerty); - // const auto opTensor = std::static_pointer_cast<OperatorTensor>(quantMulOp->getParent(0)->getOperator()); + const auto opTensor = std::static_pointer_cast<OperatorTensor>(quantMulOp->getParent(1)->clone()->getOperator()); Log::info("encapsulate static_pointer_cast 1 loop"); const auto quantizeSF = opTensor->getOutput(0); @@ -89,7 +83,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ quantMetaOp->addChild(dequantMetaOp,0,0); //Graphview of quantize->dequantize is made to be able to replace without it possibly changing the inputs // auto metaOpGraph = std::make_shared<GraphView>(getConnectedGraphView(dequantMetaOp)); - auto metaOpGraph = getConnectedGraphView(dequantMetaOp);//?????? + auto metaOpGraph = getConnectedGraphView(dequantMetaOp); Log::info("end 1 loop"); const auto mulRoundgraph = *SinglePassGraphMatching(match.graph).match("Mul#0->Round?").begin(); @@ -119,12 +113,12 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ if((node->type() == "Conv2D") && (node->getParents().size() > 2)){ Log::info("start calc"); //bias and bias scaling factor have to be modified so it corresponds to ONNX's bias scaling factor formula: biasSF = inputSF * weightSF - - const auto inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1});//TEMP: placeholder while quantizer node is not present at the input of convolution node - //TODO better getter + + //TEMP: placeholder while quantizer node is not present at the input of convolution node + const auto inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1}); + Log::info("inpusf calc"); - match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/qlineargraph"); Log::info("middle middle middle calc"); const auto weightSFTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getParent(0)->getParent(1)->getOperator())->getOutput(0); @@ -147,56 +141,13 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ biasProd->getOperator()->setOutput(0,std::make_shared<Tensor>(newBiasTensor)); biasSFProd->getOperator()->setOutput(0,std::make_shared<Tensor>(newBiasSFTensor)); - // const auto newBiasSFProd = Producer(std::make_shared<Tensor>(newBiasSFTensor),(!biasSFProd->name().empty()) ? biasSFProd->name(): ""); - // const auto newBiasProd = Producer(std::make_shared<Tensor>(newBiasTensor),(!biasProd->name().empty()) ? biasProd->name(): ""); - - // //replace with sets is not encouraged but because of these nodes being producers(0 input and only 1 output), input order should not be an issue - // match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/beforeReplace"); - - // match.graph->replace(std::set<NodePtr>{biasProd},std::set<NodePtr>{newBiasProd}); - // match.graph->replace(std::set<NodePtr>{biasSFProd},std::set<NodePtr>{newBiasSFProd}); - match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/afterSetReplace"); - Log::info("end calc"); break;//only one conv per match } Log::info("loop"); } Log::info("qlinearrr"); - // auto qlinearMatchs = SinglePassGraphMatching(match.graph->clone()).match("Conv2D#0<1-DequantizeLinear#0; Conv2D#0<2-DequantizeLinear#1?; Conv2D#0->Quantizer"); - - // Log::info("matches: {}", qlinearMatchs.size()); - // auto onlyMatch = *qlinearMatchs.begin(); - // auto nodes = onlyMatch.graph->getOrderedInputs(); - - // Log::info("nodes: {}", nodes.size()); - // onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/qlinearmatches"); - // //-------------- - // //Aidge export function to ONNX only accepts an especific order of inputs - // // const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newOrder = {nodes[0], - // // nodes[5], - // // nodes[6], - // // nodes[1], - // // nodes[2], - // // nodes[3], - // // nodes[4]}; - - // // const auto newGraph = onlyMatch.graph->clone(); - // // newGraph->setOrderedInputs(newOrder); - // //*--------------- - - // auto qlinearMetaOp = MetaOperator("QlinearConv", onlyMatch.graph->clone()); - // auto metaOpGraph = std::make_shared<GraphView>(); - // metaOpGraph->add(qlinearMetaOp, false); - // metaOpGraph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/metaopgraph"); - - // Log::info("final step", nodes.size()); - // match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/preQlinear"); - - // const bool qlinearReplaced = GraphView::replace(onlyMatch.graph, metaOpGraph); - - // if (!qlinearReplaced) Log::info("Not replaced"); - //------------------------------------- + auto qlinearMatchs = SinglePassGraphMatching(match.graph).match("Conv2D#0<1-DequantizeLinear#0; Conv2D#0<2-DequantizeLinear#1?; Conv2D#0->Quantizer"); Log::info("matches: {}", qlinearMatchs.size()); @@ -216,31 +167,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ if (!qlinearReplaced) Log::info("Not replaced"); - - //----------------------------- - - // match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/couldQlinear"); - // Aidge::fuseToMetaOps(match.graph,"Conv2D#0<1-DequantizeLinear#0; Conv2D#0<2-DequantizeLinear#1?; Conv2D#0->Quantizer","QLinearConv"); - - // for(const auto& graphnode: graphView->getNodes()){ - // if(graphnode->type() == "QLinearConv"){ - // auto micrograph = std::static_pointer_cast<MetaOperator_Op>(graphnode->getOperator())->getMicroGraph(); - // micrograph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/oldOrderedMicrograph"); - - // auto orderedIn = micrograph->getOrderedInputs(); - // const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newOrder = {orderedIn[0], - // orderedIn[3], - // orderedIn[4], - // orderedIn[1], - // orderedIn[2]}; - // micrograph->setOrderedInputs(newOrder); - // micrograph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/newOrderedMicrograph"); - - - // } - // } - - match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/maybeQlinear"); } } -- GitLab From f2f0e2daed63ce50a4bed5dad4d6acb6fc1f7d57 Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Thu, 30 Jan 2025 09:22:07 +0000 Subject: [PATCH 18/44] better graph matching queries, put dequantizelinear's producer out of the metaoperator --- src/recipes/ONNXRecipes.cpp | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index f9e20b4..dc15084 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -76,9 +76,12 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ auto mulProd = Producer(std::make_shared<Tensor>(dequantizeSF)); castNode->addChild(mulNode,0,0); - mulProd->addChild(mulNode,0,1); + + // mulProd->addChild(mulNode,0,1);TEMP auto dequantMetaOp = MetaOperator("DequantizeLinear", getConnectedGraphView(mulNode)); + + mulProd->addChild(dequantMetaOp,0,1);//TEST quantMetaOp->addChild(dequantMetaOp,0,0); //Graphview of quantize->dequantize is made to be able to replace without it possibly changing the inputs @@ -98,11 +101,12 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ if(qop){ const auto wholeQlinearMatches = SinglePassGraphMatching(graphView).match( //Query is subject to change as quantization operators change - //Query needs to be modified so it takes into account conv2d->quantizelinear, need an OR operand in query--------------- "Conv2D#0<1-DequantizeLinear#0<-QuantizeLinear#0<1-Producer#0;" - "Conv2D#0<2-DequantizeLinear#1?<-QuantizeLinear#1?<-Producer#1?;" - "Conv2D#0<2-DequantizeLinear#1?<-QuantizeLinear#1?<1-Producer#2?;" - "Conv2D#0->Quantizer" + "Conv2D#0<1-DequantizeLinear#0<1-Producer#3;" + "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<-Producer#1)?;" + "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<1-Producer#2)?;" + "Conv2D#0<2-(DequantizeLinear#1<1-Producer#4)?;" + "Conv2D#0->(Quantizer|QuantizeLinear#2)" ); // CHECK for matches--------------------- Log::info("found : {} ",wholeQlinearMatches.size()); @@ -148,16 +152,19 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ } Log::info("qlinearrr"); - auto qlinearMatchs = SinglePassGraphMatching(match.graph).match("Conv2D#0<1-DequantizeLinear#0; Conv2D#0<2-DequantizeLinear#1?; Conv2D#0->Quantizer"); + auto qlinearMatchs = SinglePassGraphMatching(match.graph).match("Conv2D#0<1-DequantizeLinear#0<1-Producer#0;" + "Conv2D#0<2-(DequantizeLinear#1<1-Producer#1)?;" + "Conv2D#0->(Quantizer|QuantizeLinear)"); + match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/dequantizerTest"); Log::info("matches: {}", qlinearMatchs.size()); auto onlyMatch = *qlinearMatchs.begin(); auto orderedIn = onlyMatch.graph->getOrderedInputs(); const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newOrder = {orderedIn[0], - orderedIn[3], - orderedIn[4], - orderedIn[1], - orderedIn[2]}; + orderedIn[3], + orderedIn[4], + orderedIn[1], + orderedIn[2]}; onlyMatch.graph->setOrderedInputs(newOrder); auto qlinearMetaOp = MetaOperator("QLinearConv", onlyMatch.graph->clone()); auto metaOpGraph = std::make_shared<GraphView>(); -- GitLab From cf767678ade636e53bb3bc1332b4ecf2d3e12d4e Mon Sep 17 00:00:00 2001 From: Maxence Naud <maxence.naud@cea.fr> Date: Sat, 1 Feb 2025 13:07:29 +0000 Subject: [PATCH 19/44] Update .gitlab-ci.yml --- .gitlab-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4256774..3c68d74 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -21,9 +21,9 @@ include: - '.gitlab/ci/ubuntu_python.gitlab-ci.yml' - '.gitlab/ci/release/cibuildwheel_ubuntu.gitlab-ci.yml' - # - '.gitlab/ci/windows_cpp.gitlab-ci.yml' + - '.gitlab/ci/windows_cpp.gitlab-ci.yml' - # - '.gitlab/ci/windows_python.gitlab-ci.yml' + - '.gitlab/ci/windows_python.gitlab-ci.yml' # - '.gitlab/ci/release/cibuildwheel_windows.gitlab-ci.yml' -- GitLab From bdf9541cd176571070dd2a58406b77655fd7e987 Mon Sep 17 00:00:00 2001 From: Maxence Naud <maxence.naud@cea.fr> Date: Sat, 1 Feb 2025 13:23:53 +0000 Subject: [PATCH 20/44] Update .gitlab-ci.yml --- .gitlab-ci.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3c68d74..6b36832 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -21,9 +21,10 @@ include: - '.gitlab/ci/ubuntu_python.gitlab-ci.yml' - '.gitlab/ci/release/cibuildwheel_ubuntu.gitlab-ci.yml' - - '.gitlab/ci/windows_cpp.gitlab-ci.yml' + # Cannot find successful job on aidge_backend_cuda yet + # - '.gitlab/ci/windows_cpp.gitlab-ci.yml' - - '.gitlab/ci/windows_python.gitlab-ci.yml' + # - '.gitlab/ci/windows_python.gitlab-ci.yml' # - '.gitlab/ci/release/cibuildwheel_windows.gitlab-ci.yml' -- GitLab From 5f85b2e29e88ae2d9e4e7228e4777a89cd7d73fe Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Tue, 4 Feb 2025 15:45:46 +0000 Subject: [PATCH 21/44] Change to not reorder, quantizer without dangling inputs expected, added quantizelinear, dequantize and qlinearconv support, small fixes, added dequantize op at the end of the graph --- src/operator/PTQMetaOps.cpp | 8 +- src/recipes/ONNXRecipes.cpp | 191 +++++++++++++++++++++++++++++------- 2 files changed, 163 insertions(+), 36 deletions(-) diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index 105d4e8..174ad12 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -52,13 +52,19 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli // create the metaop graph std::shared_ptr<GraphView> graphView = Sequential({mulNode, roundNode, clipNode}); + + //Producers added to clip to not have dangling inputs + + std::shared_ptr<Node> clipMinProd = addProducer<1>(clipNode, 1, {1}, (!clipNode->name().empty()) ? name + "_Min" : ""); + std::shared_ptr<Node> clipMaxProd = addProducer<1>(clipNode, 2, {1}, (!clipNode->name().empty()) ? name + "_Max" : ""); + std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ??? // return the metaop std::shared_ptr<Node> metaopNode = MetaOperator("Quantizer", connectedGraphView, {}, name); // XXX alternative prototype - return metaopNode; + return metaopNode; } static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index dc15084..cad82b1 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -13,7 +13,6 @@ #include "aidge/recipes/ONNXRecipes.hpp" #include "aidge/graph/Matching.hpp" -#include "aidge/operator/MetaOperator.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/graph/Node.hpp" @@ -21,17 +20,49 @@ #include "aidge/operator/Cast.hpp" #include "aidge/operator/Producer.hpp" #include "aidge/operator/MetaOperator.hpp" - +#include "aidge/utils/ErrorHandling.hpp" #include "aidge/recipes/Recipes.hpp" + namespace Aidge { +void verify(std::shared_ptr<GraphView> graphView, bool Forward = false){ + if (Forward){ + // graphView->setBackend("cpu"); + // graphView->forwardDims(std::vector<std::vector<Aidge::DimSize_t>>({{1,3,224,224},{1},{1}}),true); + graphView->compile("cpu",Aidge::DataType::Float32, (Aidge::DeviceIdx_t)0U, std::vector<std::vector<Aidge::DimSize_t>>({{1,3,224,224}})); + } + else{ + for (std::shared_ptr<Node> nodePtr : graphView->getNodes()) { + for (IOIndex_t i = 0; i < nodePtr->nbInputs(); ++i) { + std::pair<std::shared_ptr<Node>, IOIndex_t> inputI = nodePtr->input(i); + if (inputI.first) { + // Check that associated Data are properly connected... + AIDGE_ASSERT(nodePtr->getOperator()->getRawInput(i) == inputI.first->getOperator()->getRawOutput(inputI.second), + "Input#{} for node {} ({}) is not properly connected to output#{} of node {} ({}): Data or Tensor mismatch!", + i, nodePtr->name(), nodePtr->type(), inputI.second, inputI.first->name(), inputI.first->type()); + } else if (nodePtr->inputCategory(i) != InputCategory::OptionalData && nodePtr->inputCategory(i) != InputCategory::OptionalParam) { + // Input is missing + AIDGE_ASSERT(nodePtr->getOperator()->getRawInput(i), + "Missing input#{} for node {} ({})", i, nodePtr->name(), nodePtr->type()); + AIDGE_ASSERT(!std::static_pointer_cast<Tensor>(nodePtr->getOperator()->getRawInput(i))->undefined(), + "Undefined input#{} for node {} ({})", i, nodePtr->name(), nodePtr->type()); + } + } + } + } +} + + void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const auto quantizeMatches = SinglePassGraphMatching(graphView).match("Mul#0->Round?;Mul#0<-Producer#0;Mul#0<1-Producer#1"); //verify if there are matches AIDGE ASSERT + if(quantizeMatches.size()<1) Log::warn("no matches found to convert to Quantize/Dequantize operators"); + int nbfusions = 0; - Log::info("Init"); + + verify(graphView,true);//---------------------- //QuantizeLinear Creation for (const auto& match : quantizeMatches) { Log::info("Init 1 loop"); @@ -39,37 +70,44 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ std::shared_ptr<Node> quantMulOp = nullptr; for (const auto& node: match.graph->getNodes()){ if(node->type() == "Mul"){ + verify(graphView,true);//---------------------- quantMulOp = node; Log::info("got mul"); + verify(graphView,true);//---------------------- + break; } } - if(quantMulOp == nullptr){//make and AIDGEASSERT - Log::info("MulOp : nullptr error"); - } + AIDGE_ASSERT(quantMulOp != nullptr,"Unexpected error, Mul operator, root of QuantizeLinear, not found"); if (!(quantMulOp->attributes()->hasAttr("isScaling")) && !(quantMulOp->attributes()->hasAttr("isProducerScaling"))){ - Log::info("match skipped"); + //Mul operator does not have the 'isScaling or 'isProducerScaling' tag so it is not product of quantization, match skipped + Log::info("mul operator {} skipped, not part of quantization process",quantMulOp->name()); continue; } - auto quantizeLinearSubGraph = *SinglePassGraphMatching(match.graph->clone()).match("Mul#0->Round?").begin(); + verify(graphView,true);//---------------------- + + auto quantizeLinearSubGraph = *SinglePassGraphMatching(match.graph).match("Mul#0->Round?").begin(); auto quantMetaOp = MetaOperator("QuantizeLinear", quantizeLinearSubGraph.graph); Log::info("upper midle 1 loop"); + verify(graphView,true);//---------------------- //creation and addition of dequantizes linear - Log::info("upper middle 1 loop"); + Log::info("middle middle 1 loop"); const auto opTensor = std::static_pointer_cast<OperatorTensor>(quantMulOp->getParent(1)->clone()->getOperator()); + Log::info("encapsulate static_pointer_cast 1 loop"); const auto quantizeSF = opTensor->getOutput(0); auto castNode = Cast(quantizeSF->dataType()); auto mulNode = Mul(); Log::info("middle 1 loop"); - + verify(graphView,true);//---------------------- + const auto tempTensor = Tensor(Array1D<float, 1>{1}); //Dequantize Scaling factor is the inverse of quatize scaling factor const Tensor dequantizeSF = tempTensor / *quantizeSF; @@ -77,38 +115,82 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ auto mulProd = Producer(std::make_shared<Tensor>(dequantizeSF)); castNode->addChild(mulNode,0,0); - // mulProd->addChild(mulNode,0,1);TEMP - auto dequantMetaOp = MetaOperator("DequantizeLinear", getConnectedGraphView(mulNode)); - mulProd->addChild(dequantMetaOp,0,1);//TEST + mulProd->addChild(dequantMetaOp,0,1); quantMetaOp->addChild(dequantMetaOp,0,0); - //Graphview of quantize->dequantize is made to be able to replace without it possibly changing the inputs - // auto metaOpGraph = std::make_shared<GraphView>(getConnectedGraphView(dequantMetaOp)); + auto metaOpGraph = getConnectedGraphView(dequantMetaOp); Log::info("end 1 loop"); - const auto mulRoundgraph = *SinglePassGraphMatching(match.graph).match("Mul#0->Round?").begin(); - - match.graph->replace(mulRoundgraph.graph, metaOpGraph); + verify(graphView,true);//---------------------- + + graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/PREgraph"); + graphView->replace(quantizeLinearSubGraph.graph, metaOpGraph); + graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/POSTgraph"); + + Log::info("END afterReplace"); + + verify(graphView,true);//---------------------- + nbfusions++; } - Log::info("fusioned {} out of {} possible matches", nbfusions,quantizeMatches.size()); + Log::info("{} QuantizeLinear and DequantizeLinear added", nbfusions); + nbfusions = 0; graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/middlegraph"); + const auto nodeList = graphView->getNodes(); + for(const auto& node: nodeList){ + if(node == nullptr){ + Log::info("as suspected"); + } + Log::info("LOOP start {}",node->name()); + + if(node->type() == "Quantizer"){ + const auto metaNode = std::static_pointer_cast<MetaOperator_Op>(node->getOperator()); + const auto quantizeMicro = metaNode->getMicroGraph(); + quantizeMicro->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/microQuantize"); + + fuseToMetaOps(quantizeMicro,"Clip#0<-Round?<-Mul; Clip#0<1-Producer#0; Clip#0<2-Producer#1","QuantizeLinear"); + // auto metaOpGraph = std::make_shared<GraphView>(); + // metaOpGraph->add(node, false); + quantizeMicro->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/AFTERmicroQuantize"); + + auto tempGraph = std::make_shared<GraphView>(); + tempGraph->add(node, false); + graphView->replace(tempGraph, quantizeMicro); + Log::info("nodes==========="); + } + Log::info("endif"); + + } + graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/quantizerExpand"); + + if(qop){ + // const auto wholeQlinearMatches = SinglePassGraphMatching(graphView).match( + // //Query is subject to change as quantization operators change + // "Conv2D#0<1-DequantizeLinear#0<-QuantizeLinear#0<1-Producer#0;" + // "Conv2D#0<1-DequantizeLinear#0<1-Producer#3;" + // "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<-Producer#1)?;" + // "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<1-Producer#2)?;" + // "Conv2D#0<2-(DequantizeLinear#1<1-Producer#4)?;" + // "Conv2D#0->(Quantizer|QuantizeLinear#2)" + // ); const auto wholeQlinearMatches = SinglePassGraphMatching(graphView).match( //Query is subject to change as quantization operators change "Conv2D#0<1-DequantizeLinear#0<-QuantizeLinear#0<1-Producer#0;" - "Conv2D#0<1-DequantizeLinear#0<1-Producer#3;" + "Conv2D#0<1-DequantizeLinear#0;"//there was a producer here "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<-Producer#1)?;" "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<1-Producer#2)?;" - "Conv2D#0<2-(DequantizeLinear#1<1-Producer#4)?;" + "Conv2D#0<2-DequantizeLinear#1?;"//there was a producer here "Conv2D#0->(Quantizer|QuantizeLinear#2)" ); - // CHECK for matches--------------------- + + if(wholeQlinearMatches.size()<1) Log::warn("No QlinearConv matches found"); + Log::info("found : {} ",wholeQlinearMatches.size()); for (const auto match : wholeQlinearMatches) { @@ -145,37 +227,76 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ biasProd->getOperator()->setOutput(0,std::make_shared<Tensor>(newBiasTensor)); biasSFProd->getOperator()->setOutput(0,std::make_shared<Tensor>(newBiasSFTensor)); - Log::info("end calc"); + Log::info("Bias and Bias Scaling factor values changed to ONNX standard"); break;//only one conv per match } Log::info("loop"); } Log::info("qlinearrr"); - auto qlinearMatchs = SinglePassGraphMatching(match.graph).match("Conv2D#0<1-DequantizeLinear#0<1-Producer#0;" - "Conv2D#0<2-(DequantizeLinear#1<1-Producer#1)?;" + auto qlinearMatchs = SinglePassGraphMatching(match.graph).match("Conv2D#0<1-DequantizeLinear#0;"//there was a producer here + "Conv2D#0<2-DequantizeLinear#1?;"//there was a producer here "Conv2D#0->(Quantizer|QuantizeLinear)"); match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/dequantizerTest"); Log::info("matches: {}", qlinearMatchs.size()); auto onlyMatch = *qlinearMatchs.begin(); - auto orderedIn = onlyMatch.graph->getOrderedInputs(); - const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newOrder = {orderedIn[0], - orderedIn[3], - orderedIn[4], - orderedIn[1], - orderedIn[2]}; - onlyMatch.graph->setOrderedInputs(newOrder); + auto qlinearMetaOp = MetaOperator("QLinearConv", onlyMatch.graph->clone()); auto metaOpGraph = std::make_shared<GraphView>(); metaOpGraph->add(qlinearMetaOp, false); - const bool qlinearReplaced = GraphView::replace(onlyMatch.graph, metaOpGraph); + const bool qlinearReplaced = graphView->replace(onlyMatch.graph, metaOpGraph); - if (!qlinearReplaced) Log::info("Not replaced"); + AIDGE_ASSERT(qlinearReplaced,"Unexpected error, couldn't replace subgraph with QlinearConv operator") + nbfusions++; } + Log::info("{} QlinearConvs added", nbfusions); } + verify(graphView,true);//---------------------- + Log::info("after qop verify"); + + for (const auto& node : graphView->outputNodes()){ + Log::info("start loop"); + + int idxInput = 0; + if(node->type() == "QLinearConv" ){ + idxInput = 4; + if(node->nbInputs() > 5){//Possible simplification of condition + idxInput = 5; + } + } + else if (node->type() == "QuantizeLinear"){ + idxInput = 1; + } + Log::info("middle"); + + if(idxInput == 0) continue; + Log::info("aft continue"); + + const auto quantizeSF = std::static_pointer_cast<OperatorTensor>(node->getParent(idxInput)->getOperator())->getOutput(0); + Log::info("quantize dimensions {}",quantizeSF->dims()); + + const auto castNode = Cast(quantizeSF->dataType()); + const auto mulNode = Mul(); + + castNode->addChild(mulNode,0,0); + + auto dequantMetaOp = MetaOperator("DequantizeLinear", getConnectedGraphView(mulNode)); + + std::static_pointer_cast<MetaOperator_Op>(dequantMetaOp->getOperator())->getMicroGraph()->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/DequantEST"); + + graphView->addChild(dequantMetaOp,node,0,0); + + const auto tempTensor = Tensor(Array1D<float, 1>{1}); + const Tensor dequantizeSF = tempTensor / *quantizeSF; + auto mulProd = Producer(std::make_shared<Tensor>(dequantizeSF)); + mulProd->addChild(dequantMetaOp,0,1); + graphView->add(mulProd); + } + verify(graphView,true);//---------------------- + Log::info("after last verify"); + } - } } \ No newline at end of file -- GitLab From 60aa9ea018d0df4e9a9bf77f5fc41f9c3c9c36e9 Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Wed, 5 Feb 2025 12:56:46 +0000 Subject: [PATCH 22/44] add and subs added --- src/operator/PTQMetaOps.cpp | 4 +-- src/recipes/ONNXRecipes.cpp | 71 ++++++++++++++++++++++++++++++------- 2 files changed, 60 insertions(+), 15 deletions(-) diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index 174ad12..4682842 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -55,8 +55,8 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli //Producers added to clip to not have dangling inputs - std::shared_ptr<Node> clipMinProd = addProducer<1>(clipNode, 1, {1}, (!clipNode->name().empty()) ? name + "_Min" : ""); - std::shared_ptr<Node> clipMaxProd = addProducer<1>(clipNode, 2, {1}, (!clipNode->name().empty()) ? name + "_Max" : ""); + std::shared_ptr<Node> clipMinProd = addProducer<1>(clipNode, 1, {1}, (!clipNode->name().empty()) ? clipNode->name() + "_Min" : ""); + std::shared_ptr<Node> clipMaxProd = addProducer<1>(clipNode, 2, {1}, (!clipNode->name().empty()) ? clipNode->name() + "_Max" : ""); std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ??? diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index cad82b1..a844667 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -17,6 +17,8 @@ #include "aidge/graph/Node.hpp" #include "aidge/operator/Mul.hpp" +#include "aidge/operator/Add.hpp" +#include "aidge/operator/Sub.hpp" #include "aidge/operator/Cast.hpp" #include "aidge/operator/Producer.hpp" #include "aidge/operator/MetaOperator.hpp" @@ -63,7 +65,8 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ Log::info("Init"); verify(graphView,true);//---------------------- - //QuantizeLinear Creation + //QuantizeLinear Creation from Mul->Round? + //Each quantizeLinear will have an additional Add node(additioning 0) and a DequantizeLinear to conform with quantized ONNX models for (const auto& match : quantizeMatches) { Log::info("Init 1 loop"); std::shared_ptr<Node> quantMulProd = nullptr; @@ -91,7 +94,17 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ auto quantizeLinearSubGraph = *SinglePassGraphMatching(match.graph).match("Mul#0->Round?").begin(); - auto quantMetaOp = MetaOperator("QuantizeLinear", quantizeLinearSubGraph.graph); + auto addNode = Add(); + const auto qlinearGraph = quantizeLinearSubGraph.graph->clone(); + + qlinearGraph->addChild(addNode); + + auto quantMetaOp = MetaOperator("QuantizeLinear", qlinearGraph); + + std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int, 1>{0})); + + addNodeProd->addChild(quantMetaOp,0,2); + Log::info("upper midle 1 loop"); verify(graphView,true);//---------------------- @@ -103,25 +116,41 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ Log::info("encapsulate static_pointer_cast 1 loop"); const auto quantizeSF = opTensor->getOutput(0); + auto subNode = Sub(); auto castNode = Cast(quantizeSF->dataType()); auto mulNode = Mul(); Log::info("middle 1 loop"); verify(graphView,true);//---------------------- const auto tempTensor = Tensor(Array1D<float, 1>{1}); - //Dequantize Scaling factor is the inverse of quatize scaling factor + //Dequantize Scaling factor is the inverse of quantize scaling factor const Tensor dequantizeSF = tempTensor / *quantizeSF; auto mulProd = Producer(std::make_shared<Tensor>(dequantizeSF)); + auto subProd = Producer(std::make_shared<Tensor>(Array1D<int, 1>{0})); + + subNode->addChild(castNode,0,0); castNode->addChild(mulNode,0,0); + auto dequantGraph = getConnectedGraphView(mulNode); - auto dequantMetaOp = MetaOperator("DequantizeLinear", getConnectedGraphView(mulNode)); + auto dequantOrdInputs = dequantGraph->getOrderedInputs(); + dequantGraph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/dequantBEFOREREORDER"); + + Log::info("ipt size {}",dequantOrdInputs.size()); + const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newDequantOrder = {dequantOrdInputs[0], + dequantOrdInputs[2], + dequantOrdInputs[1]}; + dequantGraph->setOrderedInputs(newDequantOrder); + dequantGraph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/dequantGraph"); + + auto dequantMetaOp = MetaOperator("DequantizeLinear", dequantGraph); mulProd->addChild(dequantMetaOp,0,1); - + subProd->addChild(dequantMetaOp,0,2); quantMetaOp->addChild(dequantMetaOp,0,0); - + auto metaOpGraph = getConnectedGraphView(dequantMetaOp); + metaOpGraph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/metaGraphconnected"); Log::info("end 1 loop"); verify(graphView,true);//---------------------- @@ -146,16 +175,35 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ if(node == nullptr){ Log::info("as suspected"); } - Log::info("LOOP start {}",node->name()); if(node->type() == "Quantizer"){ const auto metaNode = std::static_pointer_cast<MetaOperator_Op>(node->getOperator()); const auto quantizeMicro = metaNode->getMicroGraph(); quantizeMicro->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/microQuantize"); - fuseToMetaOps(quantizeMicro,"Clip#0<-Round?<-Mul; Clip#0<1-Producer#0; Clip#0<2-Producer#1","QuantizeLinear"); - // auto metaOpGraph = std::make_shared<GraphView>(); - // metaOpGraph->add(node, false); + const auto addNode = Add(); + + for(const auto quantNode : quantizeMicro->getNodes()){ + if(quantNode->type() == "Clip"){ + const auto oldParent = quantNode->getParent(0); + + oldParent->addChild(addNode,0,0); + addNode->addChild(quantNode,0,0); + + std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int, 1>{0})); + addNodeProd->addChild(addNode,0,1); + + quantizeMicro->add(addNode); + quantizeMicro->add(addNodeProd); + + quantizeMicro->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/BEFOREmicroQuantize"); + break; + } + } + quantizeMicro->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/BEFOREmicroQuantize"); + + fuseToMetaOps(quantizeMicro,"Clip#0<-Add<-Round?<-Mul; Clip#0<1-Producer#0; Clip#0<2-Producer#1","QuantizeLinear"); + quantizeMicro->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/AFTERmicroQuantize"); auto tempGraph = std::make_shared<GraphView>(); @@ -163,12 +211,9 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ graphView->replace(tempGraph, quantizeMicro); Log::info("nodes==========="); } - Log::info("endif"); - } graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/quantizerExpand"); - if(qop){ // const auto wholeQlinearMatches = SinglePassGraphMatching(graphView).match( // //Query is subject to change as quantization operators change -- GitLab From 8ed815ecd03e8b6ba9b41044f4eb17af02cad35b Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Wed, 5 Feb 2025 13:14:45 +0000 Subject: [PATCH 23/44] dequantizer sub and producer added, qdq aidge complete, removed debug saves --- src/recipes/ONNXRecipes.cpp | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index a844667..3c06650 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -215,15 +215,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/quantizerExpand"); if(qop){ - // const auto wholeQlinearMatches = SinglePassGraphMatching(graphView).match( - // //Query is subject to change as quantization operators change - // "Conv2D#0<1-DequantizeLinear#0<-QuantizeLinear#0<1-Producer#0;" - // "Conv2D#0<1-DequantizeLinear#0<1-Producer#3;" - // "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<-Producer#1)?;" - // "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<1-Producer#2)?;" - // "Conv2D#0<2-(DequantizeLinear#1<1-Producer#4)?;" - // "Conv2D#0->(Quantizer|QuantizeLinear#2)" - // ); const auto wholeQlinearMatches = SinglePassGraphMatching(graphView).match( //Query is subject to change as quantization operators change "Conv2D#0<1-DequantizeLinear#0<-QuantizeLinear#0<1-Producer#0;" @@ -238,7 +229,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ Log::info("found : {} ",wholeQlinearMatches.size()); for (const auto match : wholeQlinearMatches) { - for (const auto& node: match.graph->getNodes()){ //Search the convolution node and look for bias presence if((node->type() == "Conv2D") && (node->getParents().size() > 2)){ @@ -287,6 +277,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ Log::info("matches: {}", qlinearMatchs.size()); auto onlyMatch = *qlinearMatchs.begin(); + onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/onlymatch"); auto qlinearMetaOp = MetaOperator("QLinearConv", onlyMatch.graph->clone()); auto metaOpGraph = std::make_shared<GraphView>(); metaOpGraph->add(qlinearMetaOp, false); @@ -323,12 +314,22 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const auto quantizeSF = std::static_pointer_cast<OperatorTensor>(node->getParent(idxInput)->getOperator())->getOutput(0); Log::info("quantize dimensions {}",quantizeSF->dims()); + const auto subNode = Sub(); const auto castNode = Cast(quantizeSF->dataType()); const auto mulNode = Mul(); + subNode->addChild(castNode,0,0); castNode->addChild(mulNode,0,0); - auto dequantMetaOp = MetaOperator("DequantizeLinear", getConnectedGraphView(mulNode)); + auto dequantGraph = getConnectedGraphView(mulNode); + + auto dequantOrdInputs = dequantGraph->getOrderedInputs(); + const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newDequantOrder = {dequantOrdInputs[0], + dequantOrdInputs[2], + dequantOrdInputs[1]}; + dequantGraph->setOrderedInputs(newDequantOrder); + + auto dequantMetaOp = MetaOperator("DequantizeLinear", dequantGraph); std::static_pointer_cast<MetaOperator_Op>(dequantMetaOp->getOperator())->getMicroGraph()->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/DequantEST"); @@ -336,9 +337,15 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const auto tempTensor = Tensor(Array1D<float, 1>{1}); const Tensor dequantizeSF = tempTensor / *quantizeSF; + + auto subProd = Producer(std::make_shared<Tensor>(Array1D<int, 1>{0})); auto mulProd = Producer(std::make_shared<Tensor>(dequantizeSF)); + mulProd->addChild(dequantMetaOp,0,1); + subProd->addChild(dequantMetaOp,0,2); + graphView->add(mulProd); + graphView->add(subProd); } verify(graphView,true);//---------------------- Log::info("after last verify"); -- GitLab From 83ff26fed2fe5925616431e0361dd5fe2b1093a5 Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Wed, 5 Feb 2025 14:06:43 +0000 Subject: [PATCH 24/44] minor fix, simplification and qop imports --- src/recipes/ONNXRecipes.cpp | 38 ++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index 3c06650..5ab4c99 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -134,14 +134,12 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ auto dequantGraph = getConnectedGraphView(mulNode); auto dequantOrdInputs = dequantGraph->getOrderedInputs(); - dequantGraph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/dequantBEFOREREORDER"); Log::info("ipt size {}",dequantOrdInputs.size()); const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newDequantOrder = {dequantOrdInputs[0], dequantOrdInputs[2], dequantOrdInputs[1]}; dequantGraph->setOrderedInputs(newDequantOrder); - dequantGraph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/dequantGraph"); auto dequantMetaOp = MetaOperator("DequantizeLinear", dequantGraph); @@ -150,14 +148,11 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ quantMetaOp->addChild(dequantMetaOp,0,0); auto metaOpGraph = getConnectedGraphView(dequantMetaOp); - metaOpGraph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/metaGraphconnected"); Log::info("end 1 loop"); verify(graphView,true);//---------------------- - graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/PREgraph"); graphView->replace(quantizeLinearSubGraph.graph, metaOpGraph); - graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/POSTgraph"); Log::info("END afterReplace"); @@ -179,8 +174,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ if(node->type() == "Quantizer"){ const auto metaNode = std::static_pointer_cast<MetaOperator_Op>(node->getOperator()); const auto quantizeMicro = metaNode->getMicroGraph(); - quantizeMicro->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/microQuantize"); - const auto addNode = Add(); for(const auto quantNode : quantizeMicro->getNodes()){ @@ -195,17 +188,12 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ quantizeMicro->add(addNode); quantizeMicro->add(addNodeProd); - - quantizeMicro->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/BEFOREmicroQuantize"); break; } } - quantizeMicro->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/BEFOREmicroQuantize"); fuseToMetaOps(quantizeMicro,"Clip#0<-Add<-Round?<-Mul; Clip#0<1-Producer#0; Clip#0<2-Producer#1","QuantizeLinear"); - quantizeMicro->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/AFTERmicroQuantize"); - auto tempGraph = std::make_shared<GraphView>(); tempGraph->add(node, false); graphView->replace(tempGraph, quantizeMicro); @@ -278,12 +266,29 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ auto onlyMatch = *qlinearMatchs.begin(); onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/onlymatch"); + + const auto qlinearconvGraph = onlyMatch.graph->clone(); + + auto qConvOrdInputs = onlyMatch.graph->getOrderedInputs(); + const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newQConvOrder = {qConvOrdInputs[0],//x input + qConvOrdInputs[1],//w + qConvOrdInputs[2],//w scale + qConvOrdInputs[3],//w zero point + qConvOrdInputs[7],//y scale + qConvOrdInputs[8],//y zero point + qConvOrdInputs[4],//b + qConvOrdInputs[5],//b scale + qConvOrdInputs[6],};//b zero point + onlyMatch.graph->setOrderedInputs(newQConvOrder); + auto qlinearMetaOp = MetaOperator("QLinearConv", onlyMatch.graph->clone()); + auto metaOpGraph = std::make_shared<GraphView>(); metaOpGraph->add(qlinearMetaOp, false); const bool qlinearReplaced = graphView->replace(onlyMatch.graph, metaOpGraph); + AIDGE_ASSERT(qlinearReplaced,"Unexpected error, couldn't replace subgraph with QlinearConv operator") nbfusions++; @@ -296,19 +301,18 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ for (const auto& node : graphView->outputNodes()){ Log::info("start loop"); - int idxInput = 0; + int idxInput; if(node->type() == "QLinearConv" ){ idxInput = 4; - if(node->nbInputs() > 5){//Possible simplification of condition - idxInput = 5; - } } else if (node->type() == "QuantizeLinear"){ idxInput = 1; } + else{ + continue; + } Log::info("middle"); - if(idxInput == 0) continue; Log::info("aft continue"); const auto quantizeSF = std::static_pointer_cast<OperatorTensor>(node->getParent(idxInput)->getOperator())->getOutput(0); -- GitLab From dffbd06cbd9b56aadfb0964e26cee9081cd3c1dd Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Wed, 5 Feb 2025 14:22:57 +0000 Subject: [PATCH 25/44] removed debug function and verifications --- src/recipes/ONNXRecipes.cpp | 43 ------------------------------------- 1 file changed, 43 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index 5ab4c99..d71a941 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -28,34 +28,6 @@ namespace Aidge { -void verify(std::shared_ptr<GraphView> graphView, bool Forward = false){ - if (Forward){ - // graphView->setBackend("cpu"); - // graphView->forwardDims(std::vector<std::vector<Aidge::DimSize_t>>({{1,3,224,224},{1},{1}}),true); - graphView->compile("cpu",Aidge::DataType::Float32, (Aidge::DeviceIdx_t)0U, std::vector<std::vector<Aidge::DimSize_t>>({{1,3,224,224}})); - } - else{ - for (std::shared_ptr<Node> nodePtr : graphView->getNodes()) { - for (IOIndex_t i = 0; i < nodePtr->nbInputs(); ++i) { - std::pair<std::shared_ptr<Node>, IOIndex_t> inputI = nodePtr->input(i); - if (inputI.first) { - // Check that associated Data are properly connected... - AIDGE_ASSERT(nodePtr->getOperator()->getRawInput(i) == inputI.first->getOperator()->getRawOutput(inputI.second), - "Input#{} for node {} ({}) is not properly connected to output#{} of node {} ({}): Data or Tensor mismatch!", - i, nodePtr->name(), nodePtr->type(), inputI.second, inputI.first->name(), inputI.first->type()); - } else if (nodePtr->inputCategory(i) != InputCategory::OptionalData && nodePtr->inputCategory(i) != InputCategory::OptionalParam) { - // Input is missing - AIDGE_ASSERT(nodePtr->getOperator()->getRawInput(i), - "Missing input#{} for node {} ({})", i, nodePtr->name(), nodePtr->type()); - AIDGE_ASSERT(!std::static_pointer_cast<Tensor>(nodePtr->getOperator()->getRawInput(i))->undefined(), - "Undefined input#{} for node {} ({})", i, nodePtr->name(), nodePtr->type()); - } - } - } - } -} - - void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const auto quantizeMatches = SinglePassGraphMatching(graphView).match("Mul#0->Round?;Mul#0<-Producer#0;Mul#0<1-Producer#1"); //verify if there are matches AIDGE ASSERT @@ -64,7 +36,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ int nbfusions = 0; Log::info("Init"); - verify(graphView,true);//---------------------- //QuantizeLinear Creation from Mul->Round? //Each quantizeLinear will have an additional Add node(additioning 0) and a DequantizeLinear to conform with quantized ONNX models for (const auto& match : quantizeMatches) { @@ -73,10 +44,8 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ std::shared_ptr<Node> quantMulOp = nullptr; for (const auto& node: match.graph->getNodes()){ if(node->type() == "Mul"){ - verify(graphView,true);//---------------------- quantMulOp = node; Log::info("got mul"); - verify(graphView,true);//---------------------- break; } @@ -90,8 +59,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ continue; } - verify(graphView,true);//---------------------- - auto quantizeLinearSubGraph = *SinglePassGraphMatching(match.graph).match("Mul#0->Round?").begin(); auto addNode = Add(); @@ -106,7 +73,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ addNodeProd->addChild(quantMetaOp,0,2); Log::info("upper midle 1 loop"); - verify(graphView,true);//---------------------- //creation and addition of dequantizes linear Log::info("middle middle 1 loop"); @@ -120,7 +86,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ auto castNode = Cast(quantizeSF->dataType()); auto mulNode = Mul(); Log::info("middle 1 loop"); - verify(graphView,true);//---------------------- const auto tempTensor = Tensor(Array1D<float, 1>{1}); //Dequantize Scaling factor is the inverse of quantize scaling factor @@ -150,14 +115,10 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ auto metaOpGraph = getConnectedGraphView(dequantMetaOp); Log::info("end 1 loop"); - verify(graphView,true);//---------------------- - graphView->replace(quantizeLinearSubGraph.graph, metaOpGraph); Log::info("END afterReplace"); - verify(graphView,true);//---------------------- - nbfusions++; } Log::info("{} QuantizeLinear and DequantizeLinear added", nbfusions); @@ -295,7 +256,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ } Log::info("{} QlinearConvs added", nbfusions); } - verify(graphView,true);//---------------------- Log::info("after qop verify"); for (const auto& node : graphView->outputNodes()){ @@ -313,8 +273,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ } Log::info("middle"); - Log::info("aft continue"); - const auto quantizeSF = std::static_pointer_cast<OperatorTensor>(node->getParent(idxInput)->getOperator())->getOutput(0); Log::info("quantize dimensions {}",quantizeSF->dims()); @@ -351,7 +309,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ graphView->add(mulProd); graphView->add(subProd); } - verify(graphView,true);//---------------------- Log::info("after last verify"); } -- GitLab From 4dde310960c886507733a4550aaddefbbe705065 Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Wed, 5 Feb 2025 15:26:48 +0000 Subject: [PATCH 26/44] Temporary add producer datatype added, for onnx inferences purposes --- src/recipes/ONNXRecipes.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index d71a941..c02e89b 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -68,8 +68,8 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ auto quantMetaOp = MetaOperator("QuantizeLinear", qlinearGraph); - std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int, 1>{0})); - + std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0})); + Log::info("------------datatype {}",std::static_pointer_cast<OperatorTensor>(addNodeProd->getOperator())->getOutput(0)->dataType()); addNodeProd->addChild(quantMetaOp,0,2); Log::info("upper midle 1 loop"); @@ -161,7 +161,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ Log::info("nodes==========="); } } - graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/quantizerExpand"); if(qop){ const auto wholeQlinearMatches = SinglePassGraphMatching(graphView).match( @@ -222,12 +221,9 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ "Conv2D#0<2-DequantizeLinear#1?;"//there was a producer here "Conv2D#0->(Quantizer|QuantizeLinear)"); - match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/dequantizerTest"); Log::info("matches: {}", qlinearMatchs.size()); auto onlyMatch = *qlinearMatchs.begin(); - onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/onlymatch"); - const auto qlinearconvGraph = onlyMatch.graph->clone(); auto qConvOrdInputs = onlyMatch.graph->getOrderedInputs(); @@ -293,8 +289,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ auto dequantMetaOp = MetaOperator("DequantizeLinear", dequantGraph); - std::static_pointer_cast<MetaOperator_Op>(dequantMetaOp->getOperator())->getMicroGraph()->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/DequantEST"); - graphView->addChild(dequantMetaOp,node,0,0); const auto tempTensor = Tensor(Array1D<float, 1>{1}); -- GitLab From 81a5d1bd40e46b1a5ee967dc2087788e9ac6d48f Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Thu, 6 Feb 2025 13:47:07 +0000 Subject: [PATCH 27/44] initial quantizeLinear --- src/recipes/ONNXRecipes.cpp | 48 +++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index c02e89b..80525be 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -19,6 +19,7 @@ #include "aidge/operator/Mul.hpp" #include "aidge/operator/Add.hpp" #include "aidge/operator/Sub.hpp" +#include "aidge/operator/Round.hpp" #include "aidge/operator/Cast.hpp" #include "aidge/operator/Producer.hpp" #include "aidge/operator/MetaOperator.hpp" @@ -68,8 +69,8 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ auto quantMetaOp = MetaOperator("QuantizeLinear", qlinearGraph); + //TODO : define datatype of producer tensors std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0})); - Log::info("------------datatype {}",std::static_pointer_cast<OperatorTensor>(addNodeProd->getOperator())->getOutput(0)->dataType()); addNodeProd->addChild(quantMetaOp,0,2); Log::info("upper midle 1 loop"); @@ -254,19 +255,46 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ } Log::info("after qop verify"); + for (const auto& node : graphView->inputNodes()){ + int idxInput; + if(node->type() == "QLinearConv") idxInput = 1; + else if (node->type() == "DequantizeLinear") idxInput = 1; + else continue; + + const auto dequantizeSF = std::static_pointer_cast<OperatorTensor>(node->getParent(idxInput)->getOperator())->getOutput(0); + Log::info("quantize dimensions {}",dequantizeSF->dims()); + + const std::shared_ptr<Node> mulNode = Mul(); + const std::shared_ptr<Node> roundNode = Round(); + const std::shared_ptr<Node> addNode = Add(); + + const auto addProd = Producer(std::make_shared<Tensor>(Array1D<int, 1>{0})); + const auto mulProd = Producer(std::make_shared<Tensor>(Array1D<int, 1>{1})); + + addProd->addChild(addNode,0,1); + mulProd->addChild(mulNode,0,1); + + const std::shared_ptr<GraphView> qlGraph = Sequential({mulNode, roundNode, addNode}); + + const std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); + + const std::shared_ptr<Node> metaopNode = MetaOperator("QuantizeLinear", connectedGraphView); + + const auto tempTensor = Tensor(Array1D<float, 1>{1}); + const Tensor quantizeSF = tempTensor / *dequantizeSF; + + graphView->add(mulProd); + graphView->add(addProd); + } + for (const auto& node : graphView->outputNodes()){ Log::info("start loop"); int idxInput; - if(node->type() == "QLinearConv" ){ - idxInput = 4; - } - else if (node->type() == "QuantizeLinear"){ - idxInput = 1; - } - else{ - continue; - } + if(node->type() == "QLinearConv") idxInput = 4; + else if (node->type() == "QuantizeLinear") idxInput = 1; + else continue; + Log::info("middle"); const auto quantizeSF = std::static_pointer_cast<OperatorTensor>(node->getParent(idxInput)->getOperator())->getOutput(0); -- GitLab From 4fa7c1f24347d4fba119974e5dd5b1d07af50ea7 Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Thu, 6 Feb 2025 14:30:34 +0000 Subject: [PATCH 28/44] input quantize fix --- src/recipes/ONNXRecipes.cpp | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index 80525be..180e83a 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -261,31 +261,27 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ else if (node->type() == "DequantizeLinear") idxInput = 1; else continue; - const auto dequantizeSF = std::static_pointer_cast<OperatorTensor>(node->getParent(idxInput)->getOperator())->getOutput(0); - Log::info("quantize dimensions {}",dequantizeSF->dims()); - const std::shared_ptr<Node> mulNode = Mul(); const std::shared_ptr<Node> roundNode = Round(); const std::shared_ptr<Node> addNode = Add(); - const auto addProd = Producer(std::make_shared<Tensor>(Array1D<int, 1>{0})); - const auto mulProd = Producer(std::make_shared<Tensor>(Array1D<int, 1>{1})); + const std::shared_ptr<GraphView> qlGraph = Sequential({mulNode, roundNode, addNode}); - addProd->addChild(addNode,0,1); - mulProd->addChild(mulNode,0,1); + qlGraph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/conGraph"); - const std::shared_ptr<GraphView> qlGraph = Sequential({mulNode, roundNode, addNode}); + const std::shared_ptr<Node> quantizeLinearNode = MetaOperator("QuantizeLinear", qlGraph); - const std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); + const auto addProd = Producer(std::make_shared<Tensor>(Array1D<int, 1>{0})); + const auto mulProd = Producer(std::make_shared<Tensor>(Array1D<int, 1>{1}));//Placeholder, until quantization in input is done in aidge - const std::shared_ptr<Node> metaopNode = MetaOperator("QuantizeLinear", connectedGraphView); + mulProd->addChild(quantizeLinearNode,0,1); + addProd->addChild(quantizeLinearNode,0,2); - const auto tempTensor = Tensor(Array1D<float, 1>{1}); - const Tensor quantizeSF = tempTensor / *dequantizeSF; + const auto quantizeLinearGraph = getConnectedGraphView(quantizeLinearNode); - graphView->add(mulProd); - graphView->add(addProd); + graphView->addChild(quantizeLinearGraph,std::pair<NodePtr, IOIndex_t>(quantizeLinearNode, IOIndex_t(0)),std::pair<NodePtr, IOIndex_t>(node, IOIndex_t(0))); } + graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/after1quant"); for (const auto& node : graphView->outputNodes()){ Log::info("start loop"); @@ -317,7 +313,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ auto dequantMetaOp = MetaOperator("DequantizeLinear", dequantGraph); - graphView->addChild(dequantMetaOp,node,0,0); + graphView->addChild(dequantMetaOp,node,0,0);//?????? const auto tempTensor = Tensor(Array1D<float, 1>{1}); const Tensor dequantizeSF = tempTensor / *quantizeSF; -- GitLab From da09c18d148351421c8acef724569c857735d35e Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Fri, 7 Feb 2025 08:52:10 +0000 Subject: [PATCH 29/44] qtype corrections for quantizeLinear --- src/recipes/ONNXRecipes.cpp | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index 180e83a..8ffed74 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -93,7 +93,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const Tensor dequantizeSF = tempTensor / *quantizeSF; auto mulProd = Producer(std::make_shared<Tensor>(dequantizeSF)); - auto subProd = Producer(std::make_shared<Tensor>(Array1D<int, 1>{0})); + auto subProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{0})); subNode->addChild(castNode,0,0); castNode->addChild(mulNode,0,0); @@ -145,7 +145,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ oldParent->addChild(addNode,0,0); addNode->addChild(quantNode,0,0); - std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int, 1>{0})); + std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0})); addNodeProd->addChild(addNode,0,1); quantizeMicro->add(addNode); @@ -249,18 +249,12 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ AIDGE_ASSERT(qlinearReplaced,"Unexpected error, couldn't replace subgraph with QlinearConv operator") nbfusions++; - } Log::info("{} QlinearConvs added", nbfusions); } Log::info("after qop verify"); for (const auto& node : graphView->inputNodes()){ - int idxInput; - if(node->type() == "QLinearConv") idxInput = 1; - else if (node->type() == "DequantizeLinear") idxInput = 1; - else continue; - const std::shared_ptr<Node> mulNode = Mul(); const std::shared_ptr<Node> roundNode = Round(); const std::shared_ptr<Node> addNode = Add(); @@ -271,8 +265,8 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const std::shared_ptr<Node> quantizeLinearNode = MetaOperator("QuantizeLinear", qlGraph); - const auto addProd = Producer(std::make_shared<Tensor>(Array1D<int, 1>{0})); - const auto mulProd = Producer(std::make_shared<Tensor>(Array1D<int, 1>{1}));//Placeholder, until quantization in input is done in aidge + const auto addProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0})); + const auto mulProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{1.0}));//Placeholder, until quantization in input is done in aidge mulProd->addChild(quantizeLinearNode,0,1); addProd->addChild(quantizeLinearNode,0,2); @@ -318,7 +312,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const auto tempTensor = Tensor(Array1D<float, 1>{1}); const Tensor dequantizeSF = tempTensor / *quantizeSF; - auto subProd = Producer(std::make_shared<Tensor>(Array1D<int, 1>{0})); + auto subProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{0})); auto mulProd = Producer(std::make_shared<Tensor>(dequantizeSF)); mulProd->addChild(dequantMetaOp,0,1); @@ -329,5 +323,4 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ } Log::info("after last verify"); } - } \ No newline at end of file -- GitLab From 283e973efacdcf8f58150d950b538affe4574ca3 Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Fri, 7 Feb 2025 09:03:04 +0000 Subject: [PATCH 30/44] first quantizelinear added at the beginning of algorithmn --- src/recipes/ONNXRecipes.cpp | 50 ++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index 8ffed74..607959f 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -30,6 +30,31 @@ namespace Aidge { void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ + //Add quantizeLinear Metaop at the beginning of the graph + //according to aidge's quantification, the first input is not quantized so Sf of 1 and Zp of 0 is performed + //Operator is added to conform with ONNX's quantize models form + for (const auto& node : graphView->inputNodes()){ + const std::shared_ptr<Node> mulNode = Mul(); + const std::shared_ptr<Node> roundNode = Round(); + const std::shared_ptr<Node> addNode = Add(); + + const std::shared_ptr<GraphView> qlGraph = Sequential({mulNode, roundNode, addNode}); + + qlGraph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/conGraph"); + + const std::shared_ptr<Node> quantizeLinearNode = MetaOperator("QuantizeLinear", qlGraph); + + const auto addProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0})); + const auto mulProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{1.0}));//Placeholder, until quantization in input is done in aidge + + mulProd->addChild(quantizeLinearNode,0,1); + addProd->addChild(quantizeLinearNode,0,2); + + const auto quantizeLinearGraph = getConnectedGraphView(quantizeLinearNode); + + graphView->addChild(quantizeLinearGraph,std::pair<NodePtr, IOIndex_t>(quantizeLinearNode, IOIndex_t(0)),std::pair<NodePtr, IOIndex_t>(node, IOIndex_t(0))); + } + const auto quantizeMatches = SinglePassGraphMatching(graphView).match("Mul#0->Round?;Mul#0<-Producer#0;Mul#0<1-Producer#1"); //verify if there are matches AIDGE ASSERT if(quantizeMatches.size()<1) Log::warn("no matches found to convert to Quantize/Dequantize operators"); @@ -93,7 +118,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const Tensor dequantizeSF = tempTensor / *quantizeSF; auto mulProd = Producer(std::make_shared<Tensor>(dequantizeSF)); - auto subProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{0})); + auto subProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0})); subNode->addChild(castNode,0,0); castNode->addChild(mulNode,0,0); @@ -254,27 +279,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ } Log::info("after qop verify"); - for (const auto& node : graphView->inputNodes()){ - const std::shared_ptr<Node> mulNode = Mul(); - const std::shared_ptr<Node> roundNode = Round(); - const std::shared_ptr<Node> addNode = Add(); - - const std::shared_ptr<GraphView> qlGraph = Sequential({mulNode, roundNode, addNode}); - - qlGraph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/conGraph"); - - const std::shared_ptr<Node> quantizeLinearNode = MetaOperator("QuantizeLinear", qlGraph); - - const auto addProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0})); - const auto mulProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{1.0}));//Placeholder, until quantization in input is done in aidge - - mulProd->addChild(quantizeLinearNode,0,1); - addProd->addChild(quantizeLinearNode,0,2); - - const auto quantizeLinearGraph = getConnectedGraphView(quantizeLinearNode); - - graphView->addChild(quantizeLinearGraph,std::pair<NodePtr, IOIndex_t>(quantizeLinearNode, IOIndex_t(0)),std::pair<NodePtr, IOIndex_t>(node, IOIndex_t(0))); - } graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/after1quant"); for (const auto& node : graphView->outputNodes()){ @@ -312,7 +316,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const auto tempTensor = Tensor(Array1D<float, 1>{1}); const Tensor dequantizeSF = tempTensor / *quantizeSF; - auto subProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{0})); + auto subProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0})); auto mulProd = Producer(std::make_shared<Tensor>(dequantizeSF)); mulProd->addChild(dequantMetaOp,0,1); -- GitLab From 547c8ff7eb6720c8302a43d01e9f8ea6d52041c4 Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Fri, 7 Feb 2025 09:59:45 +0000 Subject: [PATCH 31/44] added functions for quantizelinear and dequantizelinear creation --- include/aidge/recipes/ONNXRecipes.hpp | 3 +- src/recipes/ONNXRecipes.cpp | 130 +++++++++++--------------- 2 files changed, 54 insertions(+), 79 deletions(-) diff --git a/include/aidge/recipes/ONNXRecipes.hpp b/include/aidge/recipes/ONNXRecipes.hpp index d5d9281..1622288 100644 --- a/include/aidge/recipes/ONNXRecipes.hpp +++ b/include/aidge/recipes/ONNXRecipes.hpp @@ -25,7 +25,8 @@ namespace Aidge { * @param qop if true indicates inclusion of metaoperator qlinearconv, if false only quantizelinear and dequantizelinear will be created */ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop); - +std::shared_ptr<Node> createQuantizeLinearNode(float scalingFactor = 1.0, int8_t zeroPoint = 0); +std::shared_ptr<Node> createDequantizeLinearNode(Tensor descalingFactor, int8_t zeroPoint,Aidge::DataType castDtype); } #endif //AIDGE_QUANTIZATION_RECIPES_ONNXRECIPES_H_ diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index 607959f..bf479e8 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -28,31 +28,60 @@ namespace Aidge { +std::shared_ptr<Node> createQuantizeLinearNode(float scalingFactor, int8_t zeroPoint){ + //returns the Graphview of a QuantizeLinear metaop with it's producers + const std::shared_ptr<Node> mulNode = Mul(); + const std::shared_ptr<Node> roundNode = Round(); + const std::shared_ptr<Node> addNode = Add(); -void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ - //Add quantizeLinear Metaop at the beginning of the graph - //according to aidge's quantification, the first input is not quantized so Sf of 1 and Zp of 0 is performed - //Operator is added to conform with ONNX's quantize models form - for (const auto& node : graphView->inputNodes()){ - const std::shared_ptr<Node> mulNode = Mul(); - const std::shared_ptr<Node> roundNode = Round(); - const std::shared_ptr<Node> addNode = Add(); + const std::shared_ptr<GraphView> qlGraph = Sequential({mulNode, roundNode, addNode}); - const std::shared_ptr<GraphView> qlGraph = Sequential({mulNode, roundNode, addNode}); + const std::shared_ptr<Node> quantizeMetaOp = MetaOperator("QuantizeLinear", qlGraph); - qlGraph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/conGraph"); + const std::shared_ptr<Node> addProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{zeroPoint})); + const std::shared_ptr<Node> mulProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{scalingFactor})); - const std::shared_ptr<Node> quantizeLinearNode = MetaOperator("QuantizeLinear", qlGraph); + mulProd->addChild(quantizeMetaOp,0,1); + addProd->addChild(quantizeMetaOp,0,2); - const auto addProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0})); - const auto mulProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{1.0}));//Placeholder, until quantization in input is done in aidge + return quantizeMetaOp; +} - mulProd->addChild(quantizeLinearNode,0,1); - addProd->addChild(quantizeLinearNode,0,2); +std::shared_ptr<Node> createDequantizeLinearNode(Tensor descalingFactor, int8_t zeroPoint,Aidge::DataType castDtype){ + //returns the Graphview of a QuantizeLinear metaop with it's producers + const std::shared_ptr<Node> subNode = Sub(); + const std::shared_ptr<Node> castNode = Cast(castDtype); + const std::shared_ptr<Node> mulNode = Mul(); - const auto quantizeLinearGraph = getConnectedGraphView(quantizeLinearNode); + const std::shared_ptr<GraphView> dequantGraph = Sequential({subNode, castNode, mulNode}); + auto dequantOrdInputs = dequantGraph->getOrderedInputs(); + const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newDequantOrder = {dequantOrdInputs[0],//input + dequantOrdInputs[2],//scaling factor + dequantOrdInputs[1]};//zero point + dequantGraph->setOrderedInputs(newDequantOrder); + + const std::shared_ptr<Node> dequantMetaOp = MetaOperator("DequantizeLinear", dequantGraph); + + const std::shared_ptr<Node> subProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0})); + const std::shared_ptr<Node> mulProd = Producer(std::make_shared<Tensor>(descalingFactor));//TODO: dtype dynamic? + + mulProd->addChild(dequantMetaOp,0,1); + subProd->addChild(dequantMetaOp,0,2); + + return dequantMetaOp; +} + +void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ + //Add quantizeLinear Metaop at the beginning of the graph + //according to aidge's quantification, the first input is not quantized so Sf of 1 and Zp of 0 is performed + //Operator is added to conform with ONNX's quantize models form + for (const auto& node : graphView->inputNodes()){ + const std::shared_ptr<Node> quantizeLinearNode = createQuantizeLinearNode(); + const auto quantizeLinearGraph = getConnectedGraphView(quantizeLinearNode); + graphView->addChild(quantizeLinearGraph,std::pair<NodePtr, IOIndex_t>(quantizeLinearNode, IOIndex_t(0)),std::pair<NodePtr, IOIndex_t>(node, IOIndex_t(0))); + } const auto quantizeMatches = SinglePassGraphMatching(graphView).match("Mul#0->Round?;Mul#0<-Producer#0;Mul#0<1-Producer#1"); @@ -60,7 +89,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ if(quantizeMatches.size()<1) Log::warn("no matches found to convert to Quantize/Dequantize operators"); int nbfusions = 0; - Log::info("Init"); //QuantizeLinear Creation from Mul->Round? //Each quantizeLinear will have an additional Add node(additioning 0) and a DequantizeLinear to conform with quantized ONNX models @@ -98,44 +126,14 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0})); addNodeProd->addChild(quantMetaOp,0,2); - Log::info("upper midle 1 loop"); - - //creation and addition of dequantizes linear - Log::info("middle middle 1 loop"); - - const auto opTensor = std::static_pointer_cast<OperatorTensor>(quantMulOp->getParent(1)->clone()->getOperator()); - Log::info("encapsulate static_pointer_cast 1 loop"); - const auto quantizeSF = opTensor->getOutput(0); - - auto subNode = Sub(); - auto castNode = Cast(quantizeSF->dataType()); - auto mulNode = Mul(); - Log::info("middle 1 loop"); - + const auto quantizeSF = std::static_pointer_cast<OperatorTensor>(quantMulOp->getParent(1)->clone()->getOperator())->getOutput(0); const auto tempTensor = Tensor(Array1D<float, 1>{1}); //Dequantize Scaling factor is the inverse of quantize scaling factor const Tensor dequantizeSF = tempTensor / *quantizeSF; - auto mulProd = Producer(std::make_shared<Tensor>(dequantizeSF)); - auto subProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0})); - - subNode->addChild(castNode,0,0); - castNode->addChild(mulNode,0,0); - auto dequantGraph = getConnectedGraphView(mulNode); - - auto dequantOrdInputs = dequantGraph->getOrderedInputs(); - - Log::info("ipt size {}",dequantOrdInputs.size()); - const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newDequantOrder = {dequantOrdInputs[0], - dequantOrdInputs[2], - dequantOrdInputs[1]}; - dequantGraph->setOrderedInputs(newDequantOrder); - - auto dequantMetaOp = MetaOperator("DequantizeLinear", dequantGraph); - - mulProd->addChild(dequantMetaOp,0,1); - subProd->addChild(dequantMetaOp,0,2); + const auto dequantMetaOp = createDequantizeLinearNode(dequantizeSF,0,quantizeSF->dataType()); + quantMetaOp->addChild(dequantMetaOp,0,0); auto metaOpGraph = getConnectedGraphView(dequantMetaOp); @@ -154,9 +152,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const auto nodeList = graphView->getNodes(); for(const auto& node: nodeList){ - if(node == nullptr){ - Log::info("as suspected"); - } if(node->type() == "Quantizer"){ const auto metaNode = std::static_pointer_cast<MetaOperator_Op>(node->getOperator()); @@ -294,37 +289,16 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const auto quantizeSF = std::static_pointer_cast<OperatorTensor>(node->getParent(idxInput)->getOperator())->getOutput(0); Log::info("quantize dimensions {}",quantizeSF->dims()); - const auto subNode = Sub(); - const auto castNode = Cast(quantizeSF->dataType()); - const auto mulNode = Mul(); - - subNode->addChild(castNode,0,0); - castNode->addChild(mulNode,0,0); - - auto dequantGraph = getConnectedGraphView(mulNode); - - auto dequantOrdInputs = dequantGraph->getOrderedInputs(); - const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newDequantOrder = {dequantOrdInputs[0], - dequantOrdInputs[2], - dequantOrdInputs[1]}; - dequantGraph->setOrderedInputs(newDequantOrder); - - auto dequantMetaOp = MetaOperator("DequantizeLinear", dequantGraph); - - graphView->addChild(dequantMetaOp,node,0,0);//?????? - const auto tempTensor = Tensor(Array1D<float, 1>{1}); const Tensor dequantizeSF = tempTensor / *quantizeSF; - auto subProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0})); - auto mulProd = Producer(std::make_shared<Tensor>(dequantizeSF)); + const auto dequantMetaOp = createDequantizeLinearNode(dequantizeSF,0,quantizeSF->dataType()); + const auto dequantGraph = getConnectedGraphView(dequantMetaOp); - mulProd->addChild(dequantMetaOp,0,1); - subProd->addChild(dequantMetaOp,0,2); + graphView->addChild(dequantGraph,std::pair<NodePtr, IOIndex_t>(node, IOIndex_t(0)),std::pair<NodePtr, IOIndex_t>(dequantMetaOp, IOIndex_t(0))); - graphView->add(mulProd); - graphView->add(subProd); } Log::info("after last verify"); } + } \ No newline at end of file -- GitLab From 3eb8f13a46b60fe067b12b6f0120d4bd49f12e35 Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Fri, 7 Feb 2025 10:38:03 +0000 Subject: [PATCH 32/44] added corresponding input dequantize linear --- src/recipes/ONNXRecipes.cpp | 79 +++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 29 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index bf479e8..f92b0f6 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -85,11 +85,9 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ } const auto quantizeMatches = SinglePassGraphMatching(graphView).match("Mul#0->Round?;Mul#0<-Producer#0;Mul#0<1-Producer#1"); - //verify if there are matches AIDGE ASSERT - if(quantizeMatches.size()<1) Log::warn("no matches found to convert to Quantize/Dequantize operators"); + if(quantizeMatches.size()<1) Log::warn("no matches found to convert to Quantize/Dequantize operators"); int nbfusions = 0; - //QuantizeLinear Creation from Mul->Round? //Each quantizeLinear will have an additional Add node(additioning 0) and a DequantizeLinear to conform with quantized ONNX models for (const auto& match : quantizeMatches) { @@ -127,6 +125,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ addNodeProd->addChild(quantMetaOp,0,2); Log::info("encapsulate static_pointer_cast 1 loop"); + const auto quantizeSF = std::static_pointer_cast<OperatorTensor>(quantMulOp->getParent(1)->clone()->getOperator())->getOutput(0); const auto tempTensor = Tensor(Array1D<float, 1>{1}); //Dequantize Scaling factor is the inverse of quantize scaling factor @@ -150,6 +149,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/middlegraph"); + //Modify quantizer so it posseses zero point and conforms with expected metaop in output const auto nodeList = graphView->getNodes(); for(const auto& node: nodeList){ @@ -179,28 +179,45 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ auto tempGraph = std::make_shared<GraphView>(); tempGraph->add(node, false); graphView->replace(tempGraph, quantizeMicro); - Log::info("nodes==========="); } } - if(qop){ - const auto wholeQlinearMatches = SinglePassGraphMatching(graphView).match( - //Query is subject to change as quantization operators change - "Conv2D#0<1-DequantizeLinear#0<-QuantizeLinear#0<1-Producer#0;" - "Conv2D#0<1-DequantizeLinear#0;"//there was a producer here - "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<-Producer#1)?;" - "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<1-Producer#2)?;" - "Conv2D#0<2-DequantizeLinear#1?;"//there was a producer here - "Conv2D#0->(Quantizer|QuantizeLinear#2)" - ); - - if(wholeQlinearMatches.size()<1) Log::warn("No QlinearConv matches found"); - - Log::info("found : {} ",wholeQlinearMatches.size()); - for (const auto match : wholeQlinearMatches) { - for (const auto& node: match.graph->getNodes()){ - //Search the convolution node and look for bias presence - if((node->type() == "Conv2D") && (node->getParents().size() > 2)){ + const auto wholeQlinearMatches = SinglePassGraphMatching(graphView).match( + //Query is subject to change as quantization operators change + "Conv2D#0<1-DequantizeLinear#0<-QuantizeLinear#0<1-Producer#0;" + "Conv2D#0<1-DequantizeLinear#0;"//there was a producer here + "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<-Producer#1)?;" + "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<1-Producer#2)?;" + "Conv2D#0<2-DequantizeLinear#1?;"//there was a producer here + "Conv2D#0->(Quantizer|QuantizeLinear#2)" + ); + + // if(wholeQlinearMatches.size()<1) Log::warn("No QlinearConv matches found"); + + Log::info("found : {} ",wholeQlinearMatches.size()); + for (const auto match : wholeQlinearMatches) { + for (const auto& node: match.graph->getNodes()){ + //Search the convolution node and look for bias presence + if(node->type() == "Conv2D"){ + + if (node->getParent(0)->type() == "QlinearConv" || node->getParent(0)->type() == "QuantizeLinear"){ + const auto quantizeNode = node->getParent(0); + + int idxInput = 1; + if(quantizeNode->type() == "QLinearConv") idxInput = 4; + + const auto quantizeSF = std::static_pointer_cast<OperatorTensor>(quantizeNode->getParent(idxInput)->getOperator())->getOutput(0); + Log::info("quantize dimensions {}",quantizeSF->dims()); + + const auto dequantMetaOp = createDequantizeLinearNode(quantizeSF->clone(),0,quantizeSF->dataType()); + const auto dequantGraph = getConnectedGraphView(dequantMetaOp); + + graphView->add(dequantGraph); + quantizeNode->addChild(dequantMetaOp,0,0); + dequantMetaOp->addChild(node,0,0); + } + + if (qop && node->getParents().size() > 2){ Log::info("start calc"); //bias and bias scaling factor have to be modified so it corresponds to ONNX's bias scaling factor formula: biasSF = inputSF * weightSF @@ -234,13 +251,15 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ Log::info("Bias and Bias Scaling factor values changed to ONNX standard"); break;//only one conv per match } - Log::info("loop"); + break; } - Log::info("qlinearrr"); - + Log::info("loop"); + } + Log::info("qlinearrr"); + if (qop){ auto qlinearMatchs = SinglePassGraphMatching(match.graph).match("Conv2D#0<1-DequantizeLinear#0;"//there was a producer here - "Conv2D#0<2-DequantizeLinear#1?;"//there was a producer here - "Conv2D#0->(Quantizer|QuantizeLinear)"); + "Conv2D#0<2-DequantizeLinear#1?;"//there was a producer here + "Conv2D#0->(Quantizer|QuantizeLinear)"); Log::info("matches: {}", qlinearMatchs.size()); auto onlyMatch = *qlinearMatchs.begin(); @@ -269,9 +288,11 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ AIDGE_ASSERT(qlinearReplaced,"Unexpected error, couldn't replace subgraph with QlinearConv operator") nbfusions++; - } - Log::info("{} QlinearConvs added", nbfusions); + + } + } + Log::info("{} QlinearConvs added", nbfusions); Log::info("after qop verify"); graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/after1quant"); -- GitLab From a54bfb8c01953eda60f776e64d6e5ba7c70bdbf8 Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Fri, 7 Feb 2025 15:55:14 +0000 Subject: [PATCH 33/44] reordering of nodes, small fixes, TODO: metaop dynamic attribute --- src/recipes/ONNXRecipes.cpp | 49 +++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index f92b0f6..6860a98 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -192,7 +192,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ "Conv2D#0->(Quantizer|QuantizeLinear#2)" ); - // if(wholeQlinearMatches.size()<1) Log::warn("No QlinearConv matches found"); + if(wholeQlinearMatches.size()<1) Log::warn("No quantized convolutions found"); Log::info("found : {} ",wholeQlinearMatches.size()); for (const auto match : wholeQlinearMatches) { @@ -212,10 +212,13 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const auto dequantMetaOp = createDequantizeLinearNode(quantizeSF->clone(),0,quantizeSF->dataType()); const auto dequantGraph = getConnectedGraphView(dequantMetaOp); - graphView->add(dequantGraph); + quantizeNode->addChild(dequantMetaOp,0,0); dequantMetaOp->addChild(node,0,0); + graphView->add(dequantGraph); + match.graph->add(dequantGraph); } + graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/between_ifs"); if (qop && node->getParents().size() > 2){ Log::info("start calc"); @@ -256,27 +259,34 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ Log::info("loop"); } Log::info("qlinearrr"); + graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/beforeQOP"); + if (qop){ - auto qlinearMatchs = SinglePassGraphMatching(match.graph).match("Conv2D#0<1-DequantizeLinear#0;"//there was a producer here - "Conv2D#0<2-DequantizeLinear#1?;"//there was a producer here + auto qlinearMatchs = SinglePassGraphMatching(match.graph).match("Conv2D#0<-DequantizeLinear#0;" + "Conv2D#0<1-DequantizeLinear#1;" + "Conv2D#0<2-DequantizeLinear#2?;" "Conv2D#0->(Quantizer|QuantizeLinear)"); Log::info("matches: {}", qlinearMatchs.size()); auto onlyMatch = *qlinearMatchs.begin(); - const auto qlinearconvGraph = onlyMatch.graph->clone(); + const auto qlinearconvGraph = onlyMatch.graph->clone();//deletable?? + onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/oldorder"); auto qConvOrdInputs = onlyMatch.graph->getOrderedInputs(); const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newQConvOrder = {qConvOrdInputs[0],//x input - qConvOrdInputs[1],//w - qConvOrdInputs[2],//w scale - qConvOrdInputs[3],//w zero point - qConvOrdInputs[7],//y scale - qConvOrdInputs[8],//y zero point - qConvOrdInputs[4],//b - qConvOrdInputs[5],//b scale - qConvOrdInputs[6],};//b zero point + qConvOrdInputs[1],//x scale + qConvOrdInputs[2],//x zero point + qConvOrdInputs[3],//w + qConvOrdInputs[4],//w scale + qConvOrdInputs[5],//w zero point + qConvOrdInputs[9],//y scale + qConvOrdInputs[10],//y zero point + qConvOrdInputs[6],//b + qConvOrdInputs[7],//b scale + qConvOrdInputs[8],};//b zero point onlyMatch.graph->setOrderedInputs(newQConvOrder); + onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/neworder"); auto qlinearMetaOp = MetaOperator("QLinearConv", onlyMatch.graph->clone()); @@ -285,13 +295,16 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const bool qlinearReplaced = graphView->replace(onlyMatch.graph, metaOpGraph); - + Log::info("nb of parnets {}",qConvOrdInputs[8].first->getParents().size()); + // qConvOrdInputs[8].first->getParent(0)->attributes()->addAttr("outputDtype",DataType::Int32); + auto test = qConvOrdInputs[8].first->getParent(0); + auto testest = std::static_pointer_cast<Aidge::Operator>(test->getOperator())->attributes(); + auto testarrt = testest; + testarrt->addAttr("outputDtype",DataType::Int32); AIDGE_ASSERT(qlinearReplaced,"Unexpected error, couldn't replace subgraph with QlinearConv operator") nbfusions++; - - } - - } + } + } Log::info("{} QlinearConvs added", nbfusions); Log::info("after qop verify"); -- GitLab From 3086958cb70c478c614d06cfe6d16666727644cb Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Mon, 10 Feb 2025 13:16:58 +0000 Subject: [PATCH 34/44] bias dynamic attribute added, cleaning --- src/recipes/ONNXRecipes.cpp | 132 ++++++++++++++++++++---------------- 1 file changed, 75 insertions(+), 57 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index 6860a98..ed5c959 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -38,8 +38,8 @@ std::shared_ptr<Node> createQuantizeLinearNode(float scalingFactor, int8_t zeroP const std::shared_ptr<Node> quantizeMetaOp = MetaOperator("QuantizeLinear", qlGraph); - const std::shared_ptr<Node> addProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{zeroPoint})); - const std::shared_ptr<Node> mulProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{scalingFactor})); + const std::shared_ptr<Node> addProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{zeroPoint}),"",true); + const std::shared_ptr<Node> mulProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{scalingFactor}),"",true); mulProd->addChild(quantizeMetaOp,0,1); addProd->addChild(quantizeMetaOp,0,2); @@ -55,16 +55,16 @@ std::shared_ptr<Node> createDequantizeLinearNode(Tensor descalingFactor, int8_t const std::shared_ptr<GraphView> dequantGraph = Sequential({subNode, castNode, mulNode}); - auto dequantOrdInputs = dequantGraph->getOrderedInputs(); - const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newDequantOrder = {dequantOrdInputs[0],//input + std::vector<std::pair<std::shared_ptr<Node>, Aidge::IOIndex_t>> dequantOrdInputs = dequantGraph->getOrderedInputs(); + const std::vector<std::pair<std::shared_ptr<Node>, Aidge::IOIndex_t>> newDequantOrder = {dequantOrdInputs[0],//input dequantOrdInputs[2],//scaling factor dequantOrdInputs[1]};//zero point dequantGraph->setOrderedInputs(newDequantOrder); const std::shared_ptr<Node> dequantMetaOp = MetaOperator("DequantizeLinear", dequantGraph); - const std::shared_ptr<Node> subProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0})); - const std::shared_ptr<Node> mulProd = Producer(std::make_shared<Tensor>(descalingFactor));//TODO: dtype dynamic? + const std::shared_ptr<Node> subProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0}),"",true); + const std::shared_ptr<Node> mulProd = Producer(std::make_shared<Tensor>(descalingFactor),"",true);//TODO: dtype dynamic? mulProd->addChild(dequantMetaOp,0,1); subProd->addChild(dequantMetaOp,0,2); @@ -78,13 +78,13 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ //Operator is added to conform with ONNX's quantize models form for (const auto& node : graphView->inputNodes()){ const std::shared_ptr<Node> quantizeLinearNode = createQuantizeLinearNode(); - const auto quantizeLinearGraph = getConnectedGraphView(quantizeLinearNode); + const std::shared_ptr<Aidge::GraphView> quantizeLinearGraph = getConnectedGraphView(quantizeLinearNode); graphView->addChild(quantizeLinearGraph,std::pair<NodePtr, IOIndex_t>(quantizeLinearNode, IOIndex_t(0)),std::pair<NodePtr, IOIndex_t>(node, IOIndex_t(0))); } - const auto quantizeMatches = SinglePassGraphMatching(graphView).match("Mul#0->Round?;Mul#0<-Producer#0;Mul#0<1-Producer#1"); + const std::set<Aidge::SinglePassGraphMatching::MatchingResult> quantizeMatches = SinglePassGraphMatching(graphView).match("Mul#0->Round?;Mul#0<-Producer#0;Mul#0<1-Producer#1"); if(quantizeMatches.size()<1) Log::warn("no matches found to convert to Quantize/Dequantize operators"); int nbfusions = 0; @@ -111,31 +111,32 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ continue; } - auto quantizeLinearSubGraph = *SinglePassGraphMatching(match.graph).match("Mul#0->Round?").begin(); + Aidge::SinglePassGraphMatching::MatchingResult quantizeLinearSubGraph = *SinglePassGraphMatching(match.graph).match("Mul#0->Round?").begin(); - auto addNode = Add(); - const auto qlinearGraph = quantizeLinearSubGraph.graph->clone(); + const std::shared_ptr<Node> addNode = Add(); + const std::shared_ptr<Aidge::GraphView> qlinearGraph = quantizeLinearSubGraph.graph->clone(); qlinearGraph->addChild(addNode); - auto quantMetaOp = MetaOperator("QuantizeLinear", qlinearGraph); + const std::shared_ptr<Aidge::Node> quantMetaOp = MetaOperator("QuantizeLinear", qlinearGraph); //TODO : define datatype of producer tensors - std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0})); + const std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0}),"",true); addNodeProd->addChild(quantMetaOp,0,2); Log::info("encapsulate static_pointer_cast 1 loop"); - const auto quantizeSF = std::static_pointer_cast<OperatorTensor>(quantMulOp->getParent(1)->clone()->getOperator())->getOutput(0); - const auto tempTensor = Tensor(Array1D<float, 1>{1}); + const std::shared_ptr<Aidge::Tensor> quantizeSF = std::static_pointer_cast<OperatorTensor>(quantMulOp->getParent(1)->clone()->getOperator())->getOutput(0); + const Tensor tempTensor = Tensor(Array1D<float, 1>{1}); + //Dequantize Scaling factor is the inverse of quantize scaling factor const Tensor dequantizeSF = tempTensor / *quantizeSF; - const auto dequantMetaOp = createDequantizeLinearNode(dequantizeSF,0,quantizeSF->dataType()); + const std::shared_ptr<Node> dequantMetaOp = createDequantizeLinearNode(dequantizeSF,0,quantizeSF->dataType()); quantMetaOp->addChild(dequantMetaOp,0,0); - auto metaOpGraph = getConnectedGraphView(dequantMetaOp); + std::shared_ptr<Aidge::GraphView> metaOpGraph = getConnectedGraphView(dequantMetaOp); Log::info("end 1 loop"); graphView->replace(quantizeLinearSubGraph.graph, metaOpGraph); @@ -150,22 +151,22 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/middlegraph"); //Modify quantizer so it posseses zero point and conforms with expected metaop in output - const auto nodeList = graphView->getNodes(); + const std::set<std::shared_ptr<Node>> nodeList = graphView->getNodes(); for(const auto& node: nodeList){ if(node->type() == "Quantizer"){ - const auto metaNode = std::static_pointer_cast<MetaOperator_Op>(node->getOperator()); - const auto quantizeMicro = metaNode->getMicroGraph(); - const auto addNode = Add(); + const std::shared_ptr<Aidge::MetaOperator_Op> metaNode = std::static_pointer_cast<MetaOperator_Op>(node->getOperator()); + const std::shared_ptr<Aidge::GraphView> quantizeMicro = metaNode->getMicroGraph(); + const std::shared_ptr<Node> addNode = Add(); for(const auto quantNode : quantizeMicro->getNodes()){ if(quantNode->type() == "Clip"){ - const auto oldParent = quantNode->getParent(0); + const std::shared_ptr<Node> oldParent = quantNode->getParent(0); oldParent->addChild(addNode,0,0); addNode->addChild(quantNode,0,0); - std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0})); + std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0}),"",true); addNodeProd->addChild(addNode,0,1); quantizeMicro->add(addNode); @@ -176,41 +177,47 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ fuseToMetaOps(quantizeMicro,"Clip#0<-Add<-Round?<-Mul; Clip#0<1-Producer#0; Clip#0<2-Producer#1","QuantizeLinear"); - auto tempGraph = std::make_shared<GraphView>(); + std::shared_ptr<Aidge::GraphView> tempGraph = std::make_shared<GraphView>(); tempGraph->add(node, false); graphView->replace(tempGraph, quantizeMicro); } } - const auto wholeQlinearMatches = SinglePassGraphMatching(graphView).match( + const std::set<Aidge::SinglePassGraphMatching::MatchingResult> wholeQlinearMatches = SinglePassGraphMatching(graphView).match( //Query is subject to change as quantization operators change "Conv2D#0<1-DequantizeLinear#0<-QuantizeLinear#0<1-Producer#0;" "Conv2D#0<1-DequantizeLinear#0;"//there was a producer here "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<-Producer#1)?;" "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<1-Producer#2)?;" "Conv2D#0<2-DequantizeLinear#1?;"//there was a producer here - "Conv2D#0->(Quantizer|QuantizeLinear#2)" + "Conv2D#0->QuantizeLinear#2" ); if(wholeQlinearMatches.size()<1) Log::warn("No quantized convolutions found"); Log::info("found : {} ",wholeQlinearMatches.size()); for (const auto match : wholeQlinearMatches) { + + + for (const auto& node: match.graph->getNodes()){ //Search the convolution node and look for bias presence + const bool hasBias = false; + if(node->getParents().size() > 2) const bool hasBias = true; + if(node->type() == "Conv2D"){ if (node->getParent(0)->type() == "QlinearConv" || node->getParent(0)->type() == "QuantizeLinear"){ - const auto quantizeNode = node->getParent(0); + const std::shared_ptr<Node> quantizeNode = node->getParent(0); int idxInput = 1; if(quantizeNode->type() == "QLinearConv") idxInput = 4; - const auto quantizeSF = std::static_pointer_cast<OperatorTensor>(quantizeNode->getParent(idxInput)->getOperator())->getOutput(0); + const std::shared_ptr<Aidge::Tensor> quantizeSF = std::static_pointer_cast<OperatorTensor>(quantizeNode->getParent(idxInput)->getOperator())->getOutput(0); Log::info("quantize dimensions {}",quantizeSF->dims()); - const auto dequantMetaOp = createDequantizeLinearNode(quantizeSF->clone(),0,quantizeSF->dataType()); - const auto dequantGraph = getConnectedGraphView(dequantMetaOp); + const std::shared_ptr<Node> dequantMetaOp = createDequantizeLinearNode(quantizeSF->clone(),0,quantizeSF->dataType()); + const std::shared_ptr<Aidge::GraphView> dequantGraph = getConnectedGraphView(dequantMetaOp); quantizeNode->addChild(dequantMetaOp,0,0); @@ -220,28 +227,28 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ } graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/between_ifs"); - if (qop && node->getParents().size() > 2){ + if (qop && hasBias){ Log::info("start calc"); //bias and bias scaling factor have to be modified so it corresponds to ONNX's bias scaling factor formula: biasSF = inputSF * weightSF //TEMP: placeholder while quantizer node is not present at the input of convolution node - const auto inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1}); + const std::shared_ptr<Aidge::Tensor> inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1}); Log::info("inpusf calc"); Log::info("middle middle middle calc"); - const auto weightSFTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getParent(0)->getParent(1)->getOperator())->getOutput(0); + const std::shared_ptr<Aidge::Tensor> weightSFTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getParent(0)->getParent(1)->getOperator())->getOutput(0); Log::info("dims: {}",weightSFTensor->dims()); inputSFTensor->setDataType(weightSFTensor->dataType()); Log::info("middle middle calc"); - const auto& biasProd = node->getParent(2)->getParent(0)->getParent(0); - const auto& biasSFProd = node->getParent(2)->getParent(0)->getParent(1); + const std::shared_ptr<Node>& biasProd = node->getParent(2)->getParent(0)->getParent(0); + const std::shared_ptr<Node>& biasSFProd = node->getParent(2)->getParent(0)->getParent(1); Log::info("middle calc"); - const auto biasTensor = std::static_pointer_cast<OperatorTensor>(biasProd->getOperator())->getOutput(0); - const auto biasSFTensor = std::static_pointer_cast<OperatorTensor>(biasSFProd->getOperator())->getOutput(0); + const std::shared_ptr<Aidge::Tensor> biasTensor = std::static_pointer_cast<OperatorTensor>(biasProd->getOperator())->getOutput(0); + const std::shared_ptr<Aidge::Tensor> biasSFTensor = std::static_pointer_cast<OperatorTensor>(biasSFProd->getOperator())->getOutput(0); Log::info("middle down calc"); const Tensor newBiasSFTensor = *inputSFTensor* *weightSFTensor; @@ -262,19 +269,19 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/beforeQOP"); if (qop){ - auto qlinearMatchs = SinglePassGraphMatching(match.graph).match("Conv2D#0<-DequantizeLinear#0;" + const std::set<Aidge::SinglePassGraphMatching::MatchingResult> qlinearMatchs = SinglePassGraphMatching(match.graph).match("Conv2D#0<-DequantizeLinear#0;" "Conv2D#0<1-DequantizeLinear#1;" "Conv2D#0<2-DequantizeLinear#2?;" - "Conv2D#0->(Quantizer|QuantizeLinear)"); + "Conv2D#0->QuantizeLinear"); Log::info("matches: {}", qlinearMatchs.size()); - auto onlyMatch = *qlinearMatchs.begin(); + Aidge::SinglePassGraphMatching::MatchingResult onlyMatch = *qlinearMatchs.begin(); - const auto qlinearconvGraph = onlyMatch.graph->clone();//deletable?? + const std::shared_ptr<Aidge::GraphView> qlinearconvGraph = onlyMatch.graph->clone();//deletable?? onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/oldorder"); - auto qConvOrdInputs = onlyMatch.graph->getOrderedInputs(); - const std::vector<std::pair<Aidge::NodePtr, Aidge::IOIndex_t>> newQConvOrder = {qConvOrdInputs[0],//x input + const std::vector<std::pair<std::shared_ptr<Node>, Aidge::IOIndex_t>> qConvOrdInputs = onlyMatch.graph->getOrderedInputs(); + const std::vector<std::pair<std::shared_ptr<Node>, Aidge::IOIndex_t>> newQConvOrder = {qConvOrdInputs[0],//x input qConvOrdInputs[1],//x scale qConvOrdInputs[2],//x zero point qConvOrdInputs[3],//w @@ -288,25 +295,31 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ onlyMatch.graph->setOrderedInputs(newQConvOrder); onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/neworder"); - auto qlinearMetaOp = MetaOperator("QLinearConv", onlyMatch.graph->clone()); + const std::shared_ptr<Node> qlinearMetaOp = MetaOperator("QLinearConv", onlyMatch.graph->clone()); - auto metaOpGraph = std::make_shared<GraphView>(); + const std::shared_ptr<Aidge::GraphView> metaOpGraph = std::make_shared<GraphView>(); metaOpGraph->add(qlinearMetaOp, false); const bool qlinearReplaced = graphView->replace(onlyMatch.graph, metaOpGraph); - Log::info("nb of parnets {}",qConvOrdInputs[8].first->getParents().size()); - // qConvOrdInputs[8].first->getParent(0)->attributes()->addAttr("outputDtype",DataType::Int32); - auto test = qConvOrdInputs[8].first->getParent(0); - auto testest = std::static_pointer_cast<Aidge::Operator>(test->getOperator())->attributes(); - auto testarrt = testest; - testarrt->addAttr("outputDtype",DataType::Int32); AIDGE_ASSERT(qlinearReplaced,"Unexpected error, couldn't replace subgraph with QlinearConv operator") + + Log::info("nb of parnets {}",qConvOrdInputs[8].first->getParents().size()); + + std::shared_ptr<Node> first_step = qlinearMetaOp->getParent(8); + if (first_step == nullptr) Log::info("error"); + else Log::info("parent type: {}", first_step->type()); + + std::shared_ptr<Aidge::DynamicAttributes> second_step = first_step->attributes(); + second_step->addAttr("outputDtype",6);//6 corresponds to int32 in ONNX's Datatype enum + + // std::static_pointer_cast<DynamicAttributes>(second_step)->addAttr("outputDtype",6);//6 corresponds to int32 in ONNX's Datatype enum + std::static_pointer_cast<OperatorTensor>(first_step->getParent(2)->getOperator())->getOutput(0)->setDataType(DataType::Int32); + nbfusions++; } } - Log::info("{} QlinearConvs added", nbfusions); - Log::info("after qop verify"); + Log::info("{} QlinearConvs added", nbfusions); graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/after1quant"); @@ -320,18 +333,23 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ Log::info("middle"); - const auto quantizeSF = std::static_pointer_cast<OperatorTensor>(node->getParent(idxInput)->getOperator())->getOutput(0); + const std::shared_ptr<Aidge::Tensor> quantizeSF = std::static_pointer_cast<OperatorTensor>(node->getParent(idxInput)->getOperator())->getOutput(0); Log::info("quantize dimensions {}",quantizeSF->dims()); - const auto tempTensor = Tensor(Array1D<float, 1>{1}); + const Tensor tempTensor = Tensor(Array1D<float, 1>{1}); const Tensor dequantizeSF = tempTensor / *quantizeSF; - const auto dequantMetaOp = createDequantizeLinearNode(dequantizeSF,0,quantizeSF->dataType()); - const auto dequantGraph = getConnectedGraphView(dequantMetaOp); + const std::shared_ptr<Node> dequantMetaOp = createDequantizeLinearNode(dequantizeSF,0,quantizeSF->dataType()); + const std::shared_ptr<Aidge::GraphView> dequantGraph = getConnectedGraphView(dequantMetaOp); graphView->addChild(dequantGraph,std::pair<NodePtr, IOIndex_t>(node, IOIndex_t(0)),std::pair<NodePtr, IOIndex_t>(dequantMetaOp, IOIndex_t(0))); } + graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/ENDRECIPE"); + constantFolding(graphView); + graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/folded"); + + Log::info("after last verify"); } -- GitLab From 116021ffb0296590b9abee1bd0710fd25205fb45 Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Mon, 10 Feb 2025 13:34:46 +0000 Subject: [PATCH 35/44] const --- src/recipes/ONNXRecipes.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index ed5c959..c2c1093 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -55,7 +55,7 @@ std::shared_ptr<Node> createDequantizeLinearNode(Tensor descalingFactor, int8_t const std::shared_ptr<GraphView> dequantGraph = Sequential({subNode, castNode, mulNode}); - std::vector<std::pair<std::shared_ptr<Node>, Aidge::IOIndex_t>> dequantOrdInputs = dequantGraph->getOrderedInputs(); + const std::vector<std::pair<std::shared_ptr<Node>, Aidge::IOIndex_t>> dequantOrdInputs = dequantGraph->getOrderedInputs(); const std::vector<std::pair<std::shared_ptr<Node>, Aidge::IOIndex_t>> newDequantOrder = {dequantOrdInputs[0],//input dequantOrdInputs[2],//scaling factor dequantOrdInputs[1]};//zero point @@ -136,7 +136,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ quantMetaOp->addChild(dequantMetaOp,0,0); - std::shared_ptr<Aidge::GraphView> metaOpGraph = getConnectedGraphView(dequantMetaOp); + const std::shared_ptr<Aidge::GraphView> metaOpGraph = getConnectedGraphView(dequantMetaOp); Log::info("end 1 loop"); graphView->replace(quantizeLinearSubGraph.graph, metaOpGraph); @@ -166,7 +166,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ oldParent->addChild(addNode,0,0); addNode->addChild(quantNode,0,0); - std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0}),"",true); + const std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0}),"",true); addNodeProd->addChild(addNode,0,1); quantizeMicro->add(addNode); @@ -313,7 +313,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ std::shared_ptr<Aidge::DynamicAttributes> second_step = first_step->attributes(); second_step->addAttr("outputDtype",6);//6 corresponds to int32 in ONNX's Datatype enum - // std::static_pointer_cast<DynamicAttributes>(second_step)->addAttr("outputDtype",6);//6 corresponds to int32 in ONNX's Datatype enum + //6 corresponds to int32 in ONNX's Datatype enum std::static_pointer_cast<OperatorTensor>(first_step->getParent(2)->getOperator())->getOutput(0)->setDataType(DataType::Int32); nbfusions++; -- GitLab From 2bc27b133fd5c7071b39bb34222831f9f570d2fa Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Tue, 11 Feb 2025 15:20:33 +0000 Subject: [PATCH 36/44] Node names, constant folding of quantize operators with only producers, minor fixes --- aidge_quantization/__init__.py | 1 + aidge_quantization/freezeProducers.py | 38 ++++++ include/aidge/recipes/ONNXRecipes.hpp | 4 +- src/recipes/ONNXRecipes.cpp | 161 +++++++++++++++----------- 4 files changed, 137 insertions(+), 67 deletions(-) create mode 100644 aidge_quantization/freezeProducers.py diff --git a/aidge_quantization/__init__.py b/aidge_quantization/__init__.py index b00fae1..c321e46 100644 --- a/aidge_quantization/__init__.py +++ b/aidge_quantization/__init__.py @@ -1 +1,2 @@ from aidge_quantization.aidge_quantization import * # import so generated by PyBind +from .freezeProducers import * \ No newline at end of file diff --git a/aidge_quantization/freezeProducers.py b/aidge_quantization/freezeProducers.py new file mode 100644 index 0000000..a6ee484 --- /dev/null +++ b/aidge_quantization/freezeProducers.py @@ -0,0 +1,38 @@ +import aidge_core +import aidge_onnx + +def freeze_weights(graphview: aidge_core.GraphView, all_producers: bool = False): + """freeze the weights and bias of Convolution and fully connected nodes. Usage primarly lies so constant folding may be used in those parts of the graph + + :param graphview: model to freeze the weights in + :type graphview: py:class:`aidge_core.GraphView` + :param all_producers: defaults to False, if true, freezes all producers that are part of the wieght input and bias input of the conv or FC + :type all_producers: bool + """ + def freeze_all(node): + for inpt in node.get_parents(): + if inpt is None: + break + elif inpt.type()!= "Producer": + freeze_all(inpt) + else: + inpt.get_operator().attr.set_attr("constant",True) + + #Possible way to have a registry of looked at nodes to prevent unecessary iterations + for node in graphview.get_nodes(): + #Search for Convolution and Fully connected nodes + if node.type() in ["FC","Conv1D", "Conv2D", "Conv3D","ConvDepthWise1D", "ConvDepthWise2D", "ConvDepthWise3D"]: + #iterate over it's weights and if present, bias + for inputs_id in range(node.get_nb_inputs() - 1): + parent_node = node.get_parent(inputs_id + 1) + + #get parent until getting the producer, if directly connected no iteration will be performed + #loop present to also be able to freeze producers so that they can get constant folded + if all_producers: + freeze_all(parent_node) + else: + while(parent_node.type() != "Producer"): + parent_node = parent_node.get_parent(0) + if parent_node is None: + raise ValueError ("error, producer not found") + parent_node.get_operator().attr.set_attr("constant",True) diff --git a/include/aidge/recipes/ONNXRecipes.hpp b/include/aidge/recipes/ONNXRecipes.hpp index 1622288..68b5123 100644 --- a/include/aidge/recipes/ONNXRecipes.hpp +++ b/include/aidge/recipes/ONNXRecipes.hpp @@ -25,8 +25,8 @@ namespace Aidge { * @param qop if true indicates inclusion of metaoperator qlinearconv, if false only quantizelinear and dequantizelinear will be created */ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop); -std::shared_ptr<Node> createQuantizeLinearNode(float scalingFactor = 1.0, int8_t zeroPoint = 0); -std::shared_ptr<Node> createDequantizeLinearNode(Tensor descalingFactor, int8_t zeroPoint,Aidge::DataType castDtype); +std::shared_ptr<Node> createQuantizeLinearNode(float scalingFactor = 1.0, int8_t zeroPoint = 0,const std::string basename = ""); +std::shared_ptr<Node> createDequantizeLinearNode(Tensor descalingFactor, int8_t zeroPoint,Aidge::DataType castDtype,const std::string basename = ""); } #endif //AIDGE_QUANTIZATION_RECIPES_ONNXRECIPES_H_ diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index c2c1093..f028cfc 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -28,18 +28,22 @@ namespace Aidge { -std::shared_ptr<Node> createQuantizeLinearNode(float scalingFactor, int8_t zeroPoint){ +std::shared_ptr<Node> createQuantizeLinearNode(float scalingFactor, int8_t zeroPoint,const std::string basename){ //returns the Graphview of a QuantizeLinear metaop with it's producers - const std::shared_ptr<Node> mulNode = Mul(); - const std::shared_ptr<Node> roundNode = Round(); - const std::shared_ptr<Node> addNode = Add(); + const std::shared_ptr<Node> mulNode = Mul(basename == "" ? "" : basename + "_MulQuant"); + const std::shared_ptr<Node> roundNode = Round(basename == "" ? "" : basename + "_RoundQuant"); + const std::shared_ptr<Node> addNode = Add(basename == "" ? "" : basename + "_AddQuant"); const std::shared_ptr<GraphView> qlGraph = Sequential({mulNode, roundNode, addNode}); + + const std::shared_ptr<Node> quantizeMetaOp = MetaOperator("QuantizeLinear", qlGraph,{},basename == "" ? "" : basename + "_QuantLinear"); - const std::shared_ptr<Node> quantizeMetaOp = MetaOperator("QuantizeLinear", qlGraph); + mulNode->getOperator()->setDataType(DataType::Float32); + roundNode->getOperator()->setDataType(DataType::Float32); + addNode->getOperator()->setDataType(DataType::Int8); - const std::shared_ptr<Node> addProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{zeroPoint}),"",true); - const std::shared_ptr<Node> mulProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{scalingFactor}),"",true); + const std::shared_ptr<Node> addProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{zeroPoint}),basename == "" ? "" : basename + "_AddZeroPoint",true); + const std::shared_ptr<Node> mulProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{scalingFactor}),basename == "" ? "" : basename + "_MulScale",true); mulProd->addChild(quantizeMetaOp,0,1); addProd->addChild(quantizeMetaOp,0,2); @@ -47,24 +51,28 @@ std::shared_ptr<Node> createQuantizeLinearNode(float scalingFactor, int8_t zeroP return quantizeMetaOp; } -std::shared_ptr<Node> createDequantizeLinearNode(Tensor descalingFactor, int8_t zeroPoint,Aidge::DataType castDtype){ +std::shared_ptr<Node> createDequantizeLinearNode(Tensor descalingFactor, int8_t zeroPoint,DataType castDtype, const std::string basename){ //returns the Graphview of a QuantizeLinear metaop with it's producers - const std::shared_ptr<Node> subNode = Sub(); - const std::shared_ptr<Node> castNode = Cast(castDtype); - const std::shared_ptr<Node> mulNode = Mul(); + const std::shared_ptr<Node> subNode = Sub(basename == "" ? "" : basename + "_SubDequant"); + const std::shared_ptr<Node> castNode = Cast(castDtype,basename == "" ? "" : basename + "_CastDequant"); + const std::shared_ptr<Node> mulNode = Mul(basename == "" ? "" : basename + "_MulDequant"); const std::shared_ptr<GraphView> dequantGraph = Sequential({subNode, castNode, mulNode}); - const std::vector<std::pair<std::shared_ptr<Node>, Aidge::IOIndex_t>> dequantOrdInputs = dequantGraph->getOrderedInputs(); - const std::vector<std::pair<std::shared_ptr<Node>, Aidge::IOIndex_t>> newDequantOrder = {dequantOrdInputs[0],//input + const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> dequantOrdInputs = dequantGraph->getOrderedInputs(); + const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> newDequantOrder = {dequantOrdInputs[0],//input dequantOrdInputs[2],//scaling factor dequantOrdInputs[1]};//zero point dequantGraph->setOrderedInputs(newDequantOrder); - const std::shared_ptr<Node> dequantMetaOp = MetaOperator("DequantizeLinear", dequantGraph); + const std::shared_ptr<Node> dequantMetaOp = MetaOperator("DequantizeLinear", dequantGraph,{},basename == "" ? "" : basename + "_DequantLinear"); + + subNode->getOperator()->setDataType(DataType::Int8); + castNode->getOperator()->setDataType(DataType::Float32); + mulNode->getOperator()->setDataType(DataType::Float32); - const std::shared_ptr<Node> subProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0}),"",true); - const std::shared_ptr<Node> mulProd = Producer(std::make_shared<Tensor>(descalingFactor),"",true);//TODO: dtype dynamic? + const std::shared_ptr<Node> subProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0}),basename == "" ? "" : basename + "_SubZeroPoint",true); + const std::shared_ptr<Node> mulProd = Producer(std::make_shared<Tensor>(descalingFactor),basename == "" ? "" : basename + "_MulScale",true); mulProd->addChild(dequantMetaOp,0,1); subProd->addChild(dequantMetaOp,0,2); @@ -76,15 +84,20 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ //Add quantizeLinear Metaop at the beginning of the graph //according to aidge's quantification, the first input is not quantized so Sf of 1 and Zp of 0 is performed //Operator is added to conform with ONNX's quantize models form + int inptIdx = 0; for (const auto& node : graphView->inputNodes()){ - const std::shared_ptr<Node> quantizeLinearNode = createQuantizeLinearNode(); - const std::shared_ptr<Aidge::GraphView> quantizeLinearGraph = getConnectedGraphView(quantizeLinearNode); - + + const std::shared_ptr<Node> quantizeLinearNode = createQuantizeLinearNode(1.0,0,"in"+std::to_string(inptIdx)); + const std::shared_ptr<GraphView> quantizeLinearGraph = getConnectedGraphView(quantizeLinearNode); + + graphView->add(quantizeLinearNode); + quantizeLinearGraph->add(node); + //a better function may be used graphView->addChild(quantizeLinearGraph,std::pair<NodePtr, IOIndex_t>(quantizeLinearNode, IOIndex_t(0)),std::pair<NodePtr, IOIndex_t>(node, IOIndex_t(0))); } - const std::set<Aidge::SinglePassGraphMatching::MatchingResult> quantizeMatches = SinglePassGraphMatching(graphView).match("Mul#0->Round?;Mul#0<-Producer#0;Mul#0<1-Producer#1"); + const std::set<SinglePassGraphMatching::MatchingResult> quantizeMatches = SinglePassGraphMatching(graphView).match("Mul#0->Round?;Mul#0<-Producer#0;Mul#0<1-Producer#1"); if(quantizeMatches.size()<1) Log::warn("no matches found to convert to Quantize/Dequantize operators"); int nbfusions = 0; @@ -92,13 +105,12 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ //Each quantizeLinear will have an additional Add node(additioning 0) and a DequantizeLinear to conform with quantized ONNX models for (const auto& match : quantizeMatches) { Log::info("Init 1 loop"); - std::shared_ptr<Node> quantMulProd = nullptr; + // std::shared_ptr<Node> quantMulProd = nullptr; std::shared_ptr<Node> quantMulOp = nullptr; for (const auto& node: match.graph->getNodes()){ if(node->type() == "Mul"){ quantMulOp = node; Log::info("got mul"); - break; } } @@ -110,33 +122,35 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ Log::info("mul operator {} skipped, not part of quantization process",quantMulOp->name()); continue; } + const std::string mulQuantName = quantMulOp->name(); + SinglePassGraphMatching::MatchingResult quantizeLinearSubGraph = *SinglePassGraphMatching(match.graph).match("Mul#0->Round?").begin(); - Aidge::SinglePassGraphMatching::MatchingResult quantizeLinearSubGraph = *SinglePassGraphMatching(match.graph).match("Mul#0->Round?").begin(); + const std::shared_ptr<Node> addNode = Add(mulQuantName == "" ? "" : mulQuantName + "_Add"); + addNode->getOperator()->setDataType(DataType::Float32); - const std::shared_ptr<Node> addNode = Add(); - const std::shared_ptr<Aidge::GraphView> qlinearGraph = quantizeLinearSubGraph.graph->clone(); + const std::shared_ptr<GraphView> qlinearGraph = quantizeLinearSubGraph.graph->clone(); qlinearGraph->addChild(addNode); - const std::shared_ptr<Aidge::Node> quantMetaOp = MetaOperator("QuantizeLinear", qlinearGraph); + const std::shared_ptr<Node> quantMetaOp = MetaOperator("QuantizeLinear", qlinearGraph, {}, mulQuantName == "" ? "" : mulQuantName + "_QuantLinear"); //TODO : define datatype of producer tensors - const std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0}),"",true); + const std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0}),mulQuantName == "" ? "" : mulQuantName + "_ZeroPoint",true); addNodeProd->addChild(quantMetaOp,0,2); Log::info("encapsulate static_pointer_cast 1 loop"); - const std::shared_ptr<Aidge::Tensor> quantizeSF = std::static_pointer_cast<OperatorTensor>(quantMulOp->getParent(1)->clone()->getOperator())->getOutput(0); + const std::shared_ptr<Tensor> quantizeSF = std::static_pointer_cast<OperatorTensor>(quantMulOp->getParent(1)->clone()->getOperator())->getOutput(0); const Tensor tempTensor = Tensor(Array1D<float, 1>{1}); //Dequantize Scaling factor is the inverse of quantize scaling factor const Tensor dequantizeSF = tempTensor / *quantizeSF; - const std::shared_ptr<Node> dequantMetaOp = createDequantizeLinearNode(dequantizeSF,0,quantizeSF->dataType()); + const std::shared_ptr<Node> dequantMetaOp = createDequantizeLinearNode(dequantizeSF,0,quantizeSF->dataType(),mulQuantName); quantMetaOp->addChild(dequantMetaOp,0,0); - const std::shared_ptr<Aidge::GraphView> metaOpGraph = getConnectedGraphView(dequantMetaOp); + const std::shared_ptr<GraphView> metaOpGraph = getConnectedGraphView(dequantMetaOp); Log::info("end 1 loop"); graphView->replace(quantizeLinearSubGraph.graph, metaOpGraph); @@ -155,9 +169,11 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ for(const auto& node: nodeList){ if(node->type() == "Quantizer"){ - const std::shared_ptr<Aidge::MetaOperator_Op> metaNode = std::static_pointer_cast<MetaOperator_Op>(node->getOperator()); - const std::shared_ptr<Aidge::GraphView> quantizeMicro = metaNode->getMicroGraph(); - const std::shared_ptr<Node> addNode = Add(); + const std::string quantizerName = node->name(); + const std::shared_ptr<MetaOperator_Op> metaNode = std::static_pointer_cast<MetaOperator_Op>(node->getOperator()); + const std::shared_ptr<GraphView> quantizeMicro = metaNode->getMicroGraph(); + const std::shared_ptr<Node> addNode = Add(quantizerName == "" ? "" : quantizerName + "_Add"); + addNode->getOperator()->setDataType(DataType::Float32); for(const auto quantNode : quantizeMicro->getNodes()){ if(quantNode->type() == "Clip"){ @@ -173,23 +189,23 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ quantizeMicro->add(addNodeProd); break; } - } + } fuseToMetaOps(quantizeMicro,"Clip#0<-Add<-Round?<-Mul; Clip#0<1-Producer#0; Clip#0<2-Producer#1","QuantizeLinear"); - std::shared_ptr<Aidge::GraphView> tempGraph = std::make_shared<GraphView>(); + std::shared_ptr<GraphView> tempGraph = std::make_shared<GraphView>(); tempGraph->add(node, false); graphView->replace(tempGraph, quantizeMicro); } } - const std::set<Aidge::SinglePassGraphMatching::MatchingResult> wholeQlinearMatches = SinglePassGraphMatching(graphView).match( + const std::set<SinglePassGraphMatching::MatchingResult> wholeQlinearMatches = SinglePassGraphMatching(graphView).match( //Query is subject to change as quantization operators change "Conv2D#0<1-DequantizeLinear#0<-QuantizeLinear#0<1-Producer#0;" - "Conv2D#0<1-DequantizeLinear#0;"//there was a producer here + "Conv2D#0<1-DequantizeLinear#0;" "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<-Producer#1)?;" "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<1-Producer#2)?;" - "Conv2D#0<2-DequantizeLinear#1?;"//there was a producer here + "Conv2D#0<2-DequantizeLinear#1?;" "Conv2D#0->QuantizeLinear#2" ); @@ -197,9 +213,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ Log::info("found : {} ",wholeQlinearMatches.size()); for (const auto match : wholeQlinearMatches) { - - - for (const auto& node: match.graph->getNodes()){ //Search the convolution node and look for bias presence const bool hasBias = false; @@ -213,12 +226,11 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ int idxInput = 1; if(quantizeNode->type() == "QLinearConv") idxInput = 4; - const std::shared_ptr<Aidge::Tensor> quantizeSF = std::static_pointer_cast<OperatorTensor>(quantizeNode->getParent(idxInput)->getOperator())->getOutput(0); + const std::shared_ptr<Tensor> quantizeSF = std::static_pointer_cast<OperatorTensor>(quantizeNode->getParent(idxInput)->getOperator())->getOutput(0); Log::info("quantize dimensions {}",quantizeSF->dims()); - const std::shared_ptr<Node> dequantMetaOp = createDequantizeLinearNode(quantizeSF->clone(),0,quantizeSF->dataType()); - const std::shared_ptr<Aidge::GraphView> dequantGraph = getConnectedGraphView(dequantMetaOp); - + const std::shared_ptr<Node> dequantMetaOp = createDequantizeLinearNode(quantizeSF->clone(),0,quantizeSF->dataType(),node->name()); + const std::shared_ptr<GraphView> dequantGraph = getConnectedGraphView(dequantMetaOp); quantizeNode->addChild(dequantMetaOp,0,0); dequantMetaOp->addChild(node,0,0); @@ -232,13 +244,13 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ //bias and bias scaling factor have to be modified so it corresponds to ONNX's bias scaling factor formula: biasSF = inputSF * weightSF //TEMP: placeholder while quantizer node is not present at the input of convolution node - const std::shared_ptr<Aidge::Tensor> inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1}); + const std::shared_ptr<Tensor> inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1}); Log::info("inpusf calc"); Log::info("middle middle middle calc"); - const std::shared_ptr<Aidge::Tensor> weightSFTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getParent(0)->getParent(1)->getOperator())->getOutput(0); + const std::shared_ptr<Tensor> weightSFTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getParent(0)->getParent(1)->getOperator())->getOutput(0); Log::info("dims: {}",weightSFTensor->dims()); inputSFTensor->setDataType(weightSFTensor->dataType()); @@ -247,8 +259,8 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const std::shared_ptr<Node>& biasProd = node->getParent(2)->getParent(0)->getParent(0); const std::shared_ptr<Node>& biasSFProd = node->getParent(2)->getParent(0)->getParent(1); Log::info("middle calc"); - const std::shared_ptr<Aidge::Tensor> biasTensor = std::static_pointer_cast<OperatorTensor>(biasProd->getOperator())->getOutput(0); - const std::shared_ptr<Aidge::Tensor> biasSFTensor = std::static_pointer_cast<OperatorTensor>(biasSFProd->getOperator())->getOutput(0); + const std::shared_ptr<Tensor> biasTensor = std::static_pointer_cast<OperatorTensor>(biasProd->getOperator())->getOutput(0); + const std::shared_ptr<Tensor> biasSFTensor = std::static_pointer_cast<OperatorTensor>(biasSFProd->getOperator())->getOutput(0); Log::info("middle down calc"); const Tensor newBiasSFTensor = *inputSFTensor* *weightSFTensor; @@ -269,19 +281,28 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/beforeQOP"); if (qop){ - const std::set<Aidge::SinglePassGraphMatching::MatchingResult> qlinearMatchs = SinglePassGraphMatching(match.graph).match("Conv2D#0<-DequantizeLinear#0;" + const std::set<SinglePassGraphMatching::MatchingResult> qlinearMatchs = SinglePassGraphMatching(match.graph).match("Conv2D#0<-DequantizeLinear#0;" "Conv2D#0<1-DequantizeLinear#1;" "Conv2D#0<2-DequantizeLinear#2?;" "Conv2D#0->QuantizeLinear"); Log::info("matches: {}", qlinearMatchs.size()); - Aidge::SinglePassGraphMatching::MatchingResult onlyMatch = *qlinearMatchs.begin(); + SinglePassGraphMatching::MatchingResult onlyMatch = *qlinearMatchs.begin(); - const std::shared_ptr<Aidge::GraphView> qlinearconvGraph = onlyMatch.graph->clone();//deletable?? + std::string convBaseName; + for (const auto node :onlyMatch.graph->getNodes()) + { + if(node->type() == "Conv2D"){ + convBaseName = node->name(); + break; + } + } + + // const std::shared_ptr<GraphView> qlinearconvGraph = onlyMatch.graph->clone();//deletable?? onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/oldorder"); - const std::vector<std::pair<std::shared_ptr<Node>, Aidge::IOIndex_t>> qConvOrdInputs = onlyMatch.graph->getOrderedInputs(); - const std::vector<std::pair<std::shared_ptr<Node>, Aidge::IOIndex_t>> newQConvOrder = {qConvOrdInputs[0],//x input + const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> qConvOrdInputs = onlyMatch.graph->getOrderedInputs(); + const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> newQConvOrder = {qConvOrdInputs[0],//x input qConvOrdInputs[1],//x scale qConvOrdInputs[2],//x zero point qConvOrdInputs[3],//w @@ -295,9 +316,9 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ onlyMatch.graph->setOrderedInputs(newQConvOrder); onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/neworder"); - const std::shared_ptr<Node> qlinearMetaOp = MetaOperator("QLinearConv", onlyMatch.graph->clone()); + const std::shared_ptr<Node> qlinearMetaOp = MetaOperator("QLinearConv", onlyMatch.graph->clone(),{},convBaseName == "" ? "" : convBaseName+"_QlinearConv"); - const std::shared_ptr<Aidge::GraphView> metaOpGraph = std::make_shared<GraphView>(); + const std::shared_ptr<GraphView> metaOpGraph = std::make_shared<GraphView>(); metaOpGraph->add(qlinearMetaOp, false); const bool qlinearReplaced = graphView->replace(onlyMatch.graph, metaOpGraph); @@ -307,15 +328,14 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ Log::info("nb of parnets {}",qConvOrdInputs[8].first->getParents().size()); std::shared_ptr<Node> first_step = qlinearMetaOp->getParent(8); + if (first_step == nullptr) Log::info("error"); else Log::info("parent type: {}", first_step->type()); - std::shared_ptr<Aidge::DynamicAttributes> second_step = first_step->attributes(); + std::shared_ptr<DynamicAttributes> second_step = first_step->attributes(); second_step->addAttr("outputDtype",6);//6 corresponds to int32 in ONNX's Datatype enum - //6 corresponds to int32 in ONNX's Datatype enum std::static_pointer_cast<OperatorTensor>(first_step->getParent(2)->getOperator())->getOutput(0)->setDataType(DataType::Int32); - nbfusions++; } } @@ -333,24 +353,35 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ Log::info("middle"); - const std::shared_ptr<Aidge::Tensor> quantizeSF = std::static_pointer_cast<OperatorTensor>(node->getParent(idxInput)->getOperator())->getOutput(0); + const std::shared_ptr<Tensor> quantizeSF = std::static_pointer_cast<OperatorTensor>(node->getParent(idxInput)->getOperator())->getOutput(0); Log::info("quantize dimensions {}",quantizeSF->dims()); const Tensor tempTensor = Tensor(Array1D<float, 1>{1}); const Tensor dequantizeSF = tempTensor / *quantizeSF; - const std::shared_ptr<Node> dequantMetaOp = createDequantizeLinearNode(dequantizeSF,0,quantizeSF->dataType()); - const std::shared_ptr<Aidge::GraphView> dequantGraph = getConnectedGraphView(dequantMetaOp); + const std::shared_ptr<Node> dequantMetaOp = createDequantizeLinearNode(dequantizeSF,0,quantizeSF->dataType(),node->name()); + const std::shared_ptr<GraphView> dequantGraph = getConnectedGraphView(dequantMetaOp); graphView->addChild(dequantGraph,std::pair<NodePtr, IOIndex_t>(node, IOIndex_t(0)),std::pair<NodePtr, IOIndex_t>(dequantMetaOp, IOIndex_t(0))); } + graphView->setBackend("cpu");//get dynamically + Log::notice("Before folding--------------------------"); + graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/ENDRECIPE"); - constantFolding(graphView); + const std::set<SinglePassGraphMatching::MatchingResult> foldQuantize = SinglePassGraphMatching(graphView).match( + //Query is subject to change as quantization operators change + "QuantizeLinear#0<-Producer#0;" + "QuantizeLinear#0<1-Producer#1;" + "QuantizeLinear#0<2-Producer#2?" + ); + for(const auto match : foldQuantize){ + constantFolding(match.graph); + } + // constantFolding(graphView); graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/folded"); - - Log::info("after last verify"); + Log::notice("after last verify----------------------------"); } } \ No newline at end of file -- GitLab From b8eb48ceb4461092c1ab3eb39ad655892a9de02f Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Tue, 11 Feb 2025 15:35:33 +0000 Subject: [PATCH 37/44] added bias verification --- src/recipes/ONNXRecipes.cpp | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index f028cfc..56e6083 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -213,13 +213,12 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ Log::info("found : {} ",wholeQlinearMatches.size()); for (const auto match : wholeQlinearMatches) { + bool hasBias = false; for (const auto& node: match.graph->getNodes()){ //Search the convolution node and look for bias presence - const bool hasBias = false; - if(node->getParents().size() > 2) const bool hasBias = true; - if(node->type() == "Conv2D"){ - + if(node->getParents().size() > 2) hasBias = true; + if (node->getParent(0)->type() == "QlinearConv" || node->getParent(0)->type() == "QuantizeLinear"){ const std::shared_ptr<Node> quantizeNode = node->getParent(0); @@ -301,8 +300,10 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ // const std::shared_ptr<GraphView> qlinearconvGraph = onlyMatch.graph->clone();//deletable?? onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/oldorder"); - const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> qConvOrdInputs = onlyMatch.graph->getOrderedInputs(); - const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> newQConvOrder = {qConvOrdInputs[0],//x input + + if(hasBias){ + const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> qConvOrdInputs = onlyMatch.graph->getOrderedInputs(); + const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> newQConvOrder = {qConvOrdInputs[0],//x input qConvOrdInputs[1],//x scale qConvOrdInputs[2],//x zero point qConvOrdInputs[3],//w @@ -313,7 +314,10 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ qConvOrdInputs[6],//b qConvOrdInputs[7],//b scale qConvOrdInputs[8],};//b zero point - onlyMatch.graph->setOrderedInputs(newQConvOrder); + onlyMatch.graph->setOrderedInputs(newQConvOrder); + Log::info("nb of parnets {}",qConvOrdInputs[8].first->getParents().size()); + } + onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/neworder"); const std::shared_ptr<Node> qlinearMetaOp = MetaOperator("QLinearConv", onlyMatch.graph->clone(),{},convBaseName == "" ? "" : convBaseName+"_QlinearConv"); @@ -324,18 +328,11 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const bool qlinearReplaced = graphView->replace(onlyMatch.graph, metaOpGraph); AIDGE_ASSERT(qlinearReplaced,"Unexpected error, couldn't replace subgraph with QlinearConv operator") - - Log::info("nb of parnets {}",qConvOrdInputs[8].first->getParents().size()); - - std::shared_ptr<Node> first_step = qlinearMetaOp->getParent(8); - - if (first_step == nullptr) Log::info("error"); - else Log::info("parent type: {}", first_step->type()); - - std::shared_ptr<DynamicAttributes> second_step = first_step->attributes(); - second_step->addAttr("outputDtype",6);//6 corresponds to int32 in ONNX's Datatype enum - - std::static_pointer_cast<OperatorTensor>(first_step->getParent(2)->getOperator())->getOutput(0)->setDataType(DataType::Int32); + if(hasBias){ + std::shared_ptr<Node> biasQuantLinear = qlinearMetaOp->getParent(8); + biasQuantLinear->attributes()->addAttr("outputDtype",6);//6 corresponds to int32 in ONNX's Datatype enum + std::static_pointer_cast<OperatorTensor>(biasQuantLinear->getParent(2)->getOperator())->getOutput(0)->setDataType(DataType::Int32); + } nbfusions++; } } -- GitLab From 7794a23052c5481ef456af1e435657e45cff0efc Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Wed, 12 Feb 2025 10:55:23 +0000 Subject: [PATCH 38/44] matching fixes, tensors modified replaced --- src/recipes/ONNXRecipes.cpp | 41 +++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index 56e6083..ee2d147 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -206,6 +206,8 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<-Producer#1)?;" "Conv2D#0<2-(DequantizeLinear#1<-QuantizeLinear#1<1-Producer#2)?;" "Conv2D#0<2-DequantizeLinear#1?;" + "Conv2D#0<2-(DequantizeLinear#1<1-Producer#0)?;" + "Conv2D#0<2-(DequantizeLinear#1<2-Producer#1)?;" "Conv2D#0->QuantizeLinear#2" ); @@ -255,8 +257,8 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ Log::info("middle middle calc"); - const std::shared_ptr<Node>& biasProd = node->getParent(2)->getParent(0)->getParent(0); - const std::shared_ptr<Node>& biasSFProd = node->getParent(2)->getParent(0)->getParent(1); + const std::shared_ptr<Node> biasProd = node->getParent(2)->getParent(0)->getParent(0); + const std::shared_ptr<Node> biasSFProd = node->getParent(2)->getParent(0)->getParent(1); Log::info("middle calc"); const std::shared_ptr<Tensor> biasTensor = std::static_pointer_cast<OperatorTensor>(biasProd->getOperator())->getOutput(0); const std::shared_ptr<Tensor> biasSFTensor = std::static_pointer_cast<OperatorTensor>(biasSFProd->getOperator())->getOutput(0); @@ -266,8 +268,20 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const Tensor newBiasTensor = (*biasSFTensor* *biasTensor)/newBiasSFTensor; Log::info("down up"); - biasProd->getOperator()->setOutput(0,std::make_shared<Tensor>(newBiasTensor)); - biasSFProd->getOperator()->setOutput(0,std::make_shared<Tensor>(newBiasSFTensor)); + + bool biasProdWasConstant = std::static_pointer_cast<Producer_Op>(biasProd->getOperator())->constant(); + if(biasProdWasConstant){ + const std::shared_ptr<Node> newBiasProd = Producer(std::make_shared<Tensor>(newBiasTensor),biasProd->name(),true); + graphView->replace(std::set<std::shared_ptr<Node>>{biasProd},std::set<std::shared_ptr<Node>>{newBiasProd}); + } + else biasProd->getOperator()->setOutput(0,std::make_shared<Tensor>(newBiasTensor)); + + biasProdWasConstant = std::static_pointer_cast<Producer_Op>(biasSFProd->getOperator())->constant(); + if(biasProdWasConstant){ + const std::shared_ptr<Node> newBiasSFProd = Producer(std::make_shared<Tensor>(newBiasSFTensor),biasSFProd->name(),true); + graphView->replace(std::set<std::shared_ptr<Node>>{biasSFProd},std::set<std::shared_ptr<Node>>{newBiasSFProd}); + } + else biasSFProd->getOperator()->setOutput(0,std::make_shared<Tensor>(newBiasSFTensor)); Log::info("Bias and Bias Scaling factor values changed to ONNX standard"); break;//only one conv per match @@ -282,7 +296,8 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ if (qop){ const std::set<SinglePassGraphMatching::MatchingResult> qlinearMatchs = SinglePassGraphMatching(match.graph).match("Conv2D#0<-DequantizeLinear#0;" "Conv2D#0<1-DequantizeLinear#1;" - "Conv2D#0<2-DequantizeLinear#2?;" + "Conv2D#0<2-(DequantizeLinear#2<1-Producer#0)?;" + "Conv2D#0<2-(DequantizeLinear#2<2-Producer#1)?;" "Conv2D#0->QuantizeLinear"); Log::info("matches: {}", qlinearMatchs.size()); @@ -299,7 +314,8 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ // const std::shared_ptr<GraphView> qlinearconvGraph = onlyMatch.graph->clone();//deletable?? onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/oldorder"); - + match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/oldordermedium"); + graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/OLDORDERBIG"); if(hasBias){ const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> qConvOrdInputs = onlyMatch.graph->getOrderedInputs(); @@ -309,11 +325,11 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ qConvOrdInputs[3],//w qConvOrdInputs[4],//w scale qConvOrdInputs[5],//w zero point - qConvOrdInputs[9],//y scale - qConvOrdInputs[10],//y zero point - qConvOrdInputs[6],//b - qConvOrdInputs[7],//b scale - qConvOrdInputs[8],};//b zero point + qConvOrdInputs[7],//y scale + qConvOrdInputs[8],//y zero point + qConvOrdInputs[6]};//b + // qConvOrdInputs[7],//b scale + // qConvOrdInputs[8],};//b zero point onlyMatch.graph->setOrderedInputs(newQConvOrder); Log::info("nb of parnets {}",qConvOrdInputs[8].first->getParents().size()); } @@ -331,7 +347,10 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ if(hasBias){ std::shared_ptr<Node> biasQuantLinear = qlinearMetaOp->getParent(8); biasQuantLinear->attributes()->addAttr("outputDtype",6);//6 corresponds to int32 in ONNX's Datatype enum + Log::info("onodatatype"); std::static_pointer_cast<OperatorTensor>(biasQuantLinear->getParent(2)->getOperator())->getOutput(0)->setDataType(DataType::Int32); + Log::info("onodatatype2"); + } nbfusions++; } -- GitLab From 88b3d36e1cfbabac247db96439d5e5b57c6f666c Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Thu, 13 Feb 2025 08:44:14 +0000 Subject: [PATCH 39/44] fixes for qdq dtypes, manual constant folding --- src/recipes/ONNXRecipes.cpp | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index ee2d147..5321b77 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -126,15 +126,21 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ SinglePassGraphMatching::MatchingResult quantizeLinearSubGraph = *SinglePassGraphMatching(match.graph).match("Mul#0->Round?").begin(); const std::shared_ptr<Node> addNode = Add(mulQuantName == "" ? "" : mulQuantName + "_Add"); - addNode->getOperator()->setDataType(DataType::Float32); + // addNode->getOperator()->setDataType(DataType::Float32); + addNode->getOperator()->setDataType(DataType::Int8); + + const std::shared_ptr<Node> castNode = Cast(DataType::Int8,mulQuantName == "" ? "" : mulQuantName + "_Cast"); + castNode->getOperator()->setDataType(DataType::Int8); const std::shared_ptr<GraphView> qlinearGraph = quantizeLinearSubGraph.graph->clone(); + qlinearGraph->addChild(castNode); qlinearGraph->addChild(addNode); const std::shared_ptr<Node> quantMetaOp = MetaOperator("QuantizeLinear", qlinearGraph, {}, mulQuantName == "" ? "" : mulQuantName + "_QuantLinear"); - //TODO : define datatype of producer tensors + //TODO : define datatype of producer tensors CHANGE BACK + // const std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{0}),mulQuantName == "" ? "" : mulQuantName + "_ZeroPoint",true); const std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0}),mulQuantName == "" ? "" : mulQuantName + "_ZeroPoint",true); addNodeProd->addChild(quantMetaOp,0,2); @@ -328,8 +334,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ qConvOrdInputs[7],//y scale qConvOrdInputs[8],//y zero point qConvOrdInputs[6]};//b - // qConvOrdInputs[7],//b scale - // qConvOrdInputs[8],};//b zero point onlyMatch.graph->setOrderedInputs(newQConvOrder); Log::info("nb of parnets {}",qConvOrdInputs[8].first->getParents().size()); } @@ -392,9 +396,18 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ "QuantizeLinear#0<2-Producer#2?" ); for(const auto match : foldQuantize){ - constantFolding(match.graph); + auto quantizeFolder = SequentialScheduler(match.graph); + quantizeFolder.forward(); + + const std::shared_ptr<Tensor> foldedTensor = std::make_shared<Tensor>(std::static_pointer_cast<OperatorTensor>((*match.graph->outputNodes().begin())->getOperator())->getOutput(0)->clone()); + const std::shared_ptr<Node> foldedProd = Producer(foldedTensor, "", true); + const std::shared_ptr<GraphView> foldedGraph = std::make_shared<GraphView>(); + + foldedGraph->add(foldedProd); + graphView->replace(match.graph,foldedGraph); + // constantFolding(match.graph); } - // constantFolding(graphView); + graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/folded"); Log::notice("after last verify----------------------------"); -- GitLab From 6532e51283dbcfc1719aaff2deef8154c1b44048 Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Thu, 13 Feb 2025 11:05:21 +0000 Subject: [PATCH 40/44] correct bias dtype for qlinearconv --- src/recipes/ONNXRecipes.cpp | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index 5321b77..6998c95 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -349,10 +349,23 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ AIDGE_ASSERT(qlinearReplaced,"Unexpected error, couldn't replace subgraph with QlinearConv operator") if(hasBias){ - std::shared_ptr<Node> biasQuantLinear = qlinearMetaOp->getParent(8); - biasQuantLinear->attributes()->addAttr("outputDtype",6);//6 corresponds to int32 in ONNX's Datatype enum + // biasQuantLinear->attributes()->addAttr("outputDtype",6);//6 corresponds to int32 in ONNX's Datatype enum + Log::info("onodatatype"); - std::static_pointer_cast<OperatorTensor>(biasQuantLinear->getParent(2)->getOperator())->getOutput(0)->setDataType(DataType::Int32); + + const std::shared_ptr<Node> quantizeLinearB = qlinearMetaOp->getParent(8); + const auto quantizeNodes = std::static_pointer_cast<MetaOperator_Op>(quantizeLinearB->getOperator())->getMicroGraph()->getNodes(); + + for (const auto node : quantizeNodes){ + //Cast operator configured to castand add to int32 according to onnx bias requirements + + const std::string nodeOPtype= node->type(); + if(nodeOPtype == "Cast" || nodeOPtype == "Add"){ + node->getOperator()->setDataType(DataType::Int32); + if(nodeOPtype == "Cast") std::static_pointer_cast<Cast_Op>(node->getOperator())->targetType() = DataType::Int32; + } + } + std::static_pointer_cast<OperatorTensor>(quantizeLinearB->getParent(2)->getOperator())->getOutput(0)->setDataType(DataType::Int32); Log::info("onodatatype2"); } @@ -385,7 +398,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ graphView->addChild(dequantGraph,std::pair<NodePtr, IOIndex_t>(node, IOIndex_t(0)),std::pair<NodePtr, IOIndex_t>(dequantMetaOp, IOIndex_t(0))); } - graphView->setBackend("cpu");//get dynamically + graphView->setBackend("cpu");//TODO get dynamically Log::notice("Before folding--------------------------"); graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/ENDRECIPE"); @@ -399,13 +412,15 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ auto quantizeFolder = SequentialScheduler(match.graph); quantizeFolder.forward(); - const std::shared_ptr<Tensor> foldedTensor = std::make_shared<Tensor>(std::static_pointer_cast<OperatorTensor>((*match.graph->outputNodes().begin())->getOperator())->getOutput(0)->clone()); + const auto quantizeLinearNode = *match.graph->outputNodes().begin(); + + const std::shared_ptr<Tensor> foldedTensor = std::make_shared<Tensor>(std::static_pointer_cast<OperatorTensor>((quantizeLinearNode)->getOperator())->getOutput(0)->clone()); const std::shared_ptr<Node> foldedProd = Producer(foldedTensor, "", true); const std::shared_ptr<GraphView> foldedGraph = std::make_shared<GraphView>(); - + + foldedGraph->add(foldedProd); graphView->replace(match.graph,foldedGraph); - // constantFolding(match.graph); } graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/folded"); -- GitLab From 14ccb7907c696109cad32f8285958c9e292017d1 Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Thu, 20 Feb 2025 14:41:48 +0000 Subject: [PATCH 41/44] order of quantizelinear nodes rewordked, dtype fixe, more comments --- include/aidge/recipes/ONNXRecipes.hpp | 6 +- python_binding/recipes/pybind_ONNXRecipes.cpp | 2 +- src/operator/PTQMetaOps.cpp | 7 +- src/recipes/ONNXRecipes.cpp | 270 +++++++++--------- 4 files changed, 150 insertions(+), 135 deletions(-) diff --git a/include/aidge/recipes/ONNXRecipes.hpp b/include/aidge/recipes/ONNXRecipes.hpp index 68b5123..14bf6bb 100644 --- a/include/aidge/recipes/ONNXRecipes.hpp +++ b/include/aidge/recipes/ONNXRecipes.hpp @@ -24,9 +24,9 @@ namespace Aidge { * @param graphView The GraphView to process. * @param qop if true indicates inclusion of metaoperator qlinearconv, if false only quantizelinear and dequantizelinear will be created */ -void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop); -std::shared_ptr<Node> createQuantizeLinearNode(float scalingFactor = 1.0, int8_t zeroPoint = 0,const std::string basename = ""); -std::shared_ptr<Node> createDequantizeLinearNode(Tensor descalingFactor, int8_t zeroPoint,Aidge::DataType castDtype,const std::string basename = ""); +void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop = true, bool foldWeights = true); +std::shared_ptr<Node> createQuantizeLinearNode(float scalingFactor = 1.0, uint8_t zeroPoint = 0,const std::string basename = ""); +std::shared_ptr<Node> createDequantizeLinearNode(Tensor descalingFactor, uint8_t zeroPoint,Aidge::DataType castDtype,const std::string basename = ""); } #endif //AIDGE_QUANTIZATION_RECIPES_ONNXRECIPES_H_ diff --git a/python_binding/recipes/pybind_ONNXRecipes.cpp b/python_binding/recipes/pybind_ONNXRecipes.cpp index 0e5c8e9..d1ea339 100644 --- a/python_binding/recipes/pybind_ONNXRecipes.cpp +++ b/python_binding/recipes/pybind_ONNXRecipes.cpp @@ -21,7 +21,7 @@ namespace Aidge { void init_ONNXRecipes(py::module &m) { - m.def("quantize_matching_to_export", &quantizeMatchingtoExport, py::arg("graphView"), py::arg("qop")=true); + m.def("quantize_matching_to_export", &quantizeMatchingtoExport, py::arg("graphView"), py::arg("qop")=true,py::arg("fold_weights")=true); } diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index 4682842..5816a70 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -55,8 +55,11 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli //Producers added to clip to not have dangling inputs - std::shared_ptr<Node> clipMinProd = addProducer<1>(clipNode, 1, {1}, (!clipNode->name().empty()) ? clipNode->name() + "_Min" : ""); - std::shared_ptr<Node> clipMaxProd = addProducer<1>(clipNode, 2, {1}, (!clipNode->name().empty()) ? clipNode->name() + "_Max" : ""); + std::shared_ptr<Node> clipMinProd = addProducer<1>(clipNode, 1, {}, "Min"); + std::shared_ptr<Node> clipMaxProd = addProducer<1>(clipNode, 2, {}, "Max"); + clipMinProd->getOperator()->setOutput(0,std::make_shared<Tensor>(clipMin)); + clipMaxProd->getOperator()->setOutput(0,std::make_shared<Tensor>(clipMax)); + std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ??? diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index 6998c95..ee8c69a 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -28,22 +28,24 @@ namespace Aidge { -std::shared_ptr<Node> createQuantizeLinearNode(float scalingFactor, int8_t zeroPoint,const std::string basename){ +std::shared_ptr<Node> createQuantizeLinearNode(float scalingFactor, uint8_t zeroPoint,const std::string basename){ //returns the Graphview of a QuantizeLinear metaop with it's producers const std::shared_ptr<Node> mulNode = Mul(basename == "" ? "" : basename + "_MulQuant"); const std::shared_ptr<Node> roundNode = Round(basename == "" ? "" : basename + "_RoundQuant"); const std::shared_ptr<Node> addNode = Add(basename == "" ? "" : basename + "_AddQuant"); + const std::shared_ptr<Node> castNode = Cast(DataType::UInt8, basename == "" ? "" : basename + "_castQuant"); - const std::shared_ptr<GraphView> qlGraph = Sequential({mulNode, roundNode, addNode}); + const std::shared_ptr<GraphView> qlGraph = Sequential({mulNode, roundNode, addNode, castNode}); const std::shared_ptr<Node> quantizeMetaOp = MetaOperator("QuantizeLinear", qlGraph,{},basename == "" ? "" : basename + "_QuantLinear"); mulNode->getOperator()->setDataType(DataType::Float32); roundNode->getOperator()->setDataType(DataType::Float32); - addNode->getOperator()->setDataType(DataType::Int8); + addNode->getOperator()->setDataType(DataType::Float32); + castNode->getOperator()->setDataType(DataType::UInt8); - const std::shared_ptr<Node> addProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{zeroPoint}),basename == "" ? "" : basename + "_AddZeroPoint",true); - const std::shared_ptr<Node> mulProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{scalingFactor}),basename == "" ? "" : basename + "_MulScale",true); + const std::shared_ptr<Node> addProd = Producer(std::make_shared<Tensor>(Array1D<uint8_t, 1>{zeroPoint}),addNode->name() == "" ? "" : addNode->name() + "_ZeroPoint",true); + const std::shared_ptr<Node> mulProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{scalingFactor}),mulNode->name() == "" ? "" : mulNode->name() + "_ScaleFactor",true); mulProd->addChild(quantizeMetaOp,0,1); addProd->addChild(quantizeMetaOp,0,2); @@ -51,42 +53,44 @@ std::shared_ptr<Node> createQuantizeLinearNode(float scalingFactor, int8_t zeroP return quantizeMetaOp; } -std::shared_ptr<Node> createDequantizeLinearNode(Tensor descalingFactor, int8_t zeroPoint,DataType castDtype, const std::string basename){ +std::shared_ptr<Node> createDequantizeLinearNode(Tensor descalingFactor, uint8_t zeroPoint,DataType castDtype, const std::string basename){ //returns the Graphview of a QuantizeLinear metaop with it's producers - const std::shared_ptr<Node> subNode = Sub(basename == "" ? "" : basename + "_SubDequant"); - const std::shared_ptr<Node> castNode = Cast(castDtype,basename == "" ? "" : basename + "_CastDequant"); - const std::shared_ptr<Node> mulNode = Mul(basename == "" ? "" : basename + "_MulDequant"); + const std::shared_ptr<Node> castNode = Cast(castDtype,basename == "" ? "" : basename + "_CastDequant"); + const std::shared_ptr<Node> subNode = Sub(basename == "" ? "" : basename + "_SubDequant"); + const std::shared_ptr<Node> mulNode = Mul(basename == "" ? "" : basename + "_MulDequant"); - const std::shared_ptr<GraphView> dequantGraph = Sequential({subNode, castNode, mulNode}); + const std::shared_ptr<GraphView> dequantGraph = Sequential({castNode, subNode, mulNode}); - const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> dequantOrdInputs = dequantGraph->getOrderedInputs(); - const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> newDequantOrder = {dequantOrdInputs[0],//input - dequantOrdInputs[2],//scaling factor - dequantOrdInputs[1]};//zero point - dequantGraph->setOrderedInputs(newDequantOrder); + const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> dequantOrdInputs = dequantGraph->getOrderedInputs(); + const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> newDequantOrder = {dequantOrdInputs[0],//input + dequantOrdInputs[2],//scaling factor + dequantOrdInputs[1]};//zero point + dequantGraph->setOrderedInputs(newDequantOrder); - const std::shared_ptr<Node> dequantMetaOp = MetaOperator("DequantizeLinear", dequantGraph,{},basename == "" ? "" : basename + "_DequantLinear"); + const std::shared_ptr<Node> dequantMetaOp = MetaOperator("DequantizeLinear", dequantGraph,{},basename == "" ? "" : basename + "_DequantLinear"); - subNode->getOperator()->setDataType(DataType::Int8); - castNode->getOperator()->setDataType(DataType::Float32); - mulNode->getOperator()->setDataType(DataType::Float32); + castNode->getOperator()->setDataType(castDtype); + subNode->getOperator()->setDataType(DataType::Float32); + mulNode->getOperator()->setDataType(DataType::Float32); - const std::shared_ptr<Node> subProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0}),basename == "" ? "" : basename + "_SubZeroPoint",true); - const std::shared_ptr<Node> mulProd = Producer(std::make_shared<Tensor>(descalingFactor),basename == "" ? "" : basename + "_MulScale",true); + //producer must be uint8 for correct zp dtype in onnx export + const std::shared_ptr<Node> subProd = Producer(std::make_shared<Tensor>(Array1D<uint8_t, 1>{0}),subNode->name() == "" ? "" : subNode->name() + "_ZeroPoint",true); + const std::shared_ptr<Node> mulProd = Producer(std::make_shared<Tensor>(descalingFactor),mulNode->name() == "" ? "" : mulNode->name() + "_ScaleFactor",true); - mulProd->addChild(dequantMetaOp,0,1); - subProd->addChild(dequantMetaOp,0,2); + mulProd->addChild(dequantMetaOp,0,1); + subProd->addChild(dequantMetaOp,0,2); - return dequantMetaOp; + return dequantMetaOp; } -void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ +void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop, bool foldWeights){ //Add quantizeLinear Metaop at the beginning of the graph //according to aidge's quantification, the first input is not quantized so Sf of 1 and Zp of 0 is performed //Operator is added to conform with ONNX's quantize models form + // AIDGE_ASSERT( !qop || foldWeights,"QOP format cannot be exported to onnx without folded weight/bias"); + int inptIdx = 0; for (const auto& node : graphView->inputNodes()){ - const std::shared_ptr<Node> quantizeLinearNode = createQuantizeLinearNode(1.0,0,"in"+std::to_string(inptIdx)); const std::shared_ptr<GraphView> quantizeLinearGraph = getConnectedGraphView(quantizeLinearNode); @@ -94,7 +98,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ quantizeLinearGraph->add(node); //a better function may be used graphView->addChild(quantizeLinearGraph,std::pair<NodePtr, IOIndex_t>(quantizeLinearNode, IOIndex_t(0)),std::pair<NodePtr, IOIndex_t>(node, IOIndex_t(0))); - + inptIdx++; } const std::set<SinglePassGraphMatching::MatchingResult> quantizeMatches = SinglePassGraphMatching(graphView).match("Mul#0->Round?;Mul#0<-Producer#0;Mul#0<1-Producer#1"); @@ -104,13 +108,11 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ //QuantizeLinear Creation from Mul->Round? //Each quantizeLinear will have an additional Add node(additioning 0) and a DequantizeLinear to conform with quantized ONNX models for (const auto& match : quantizeMatches) { - Log::info("Init 1 loop"); // std::shared_ptr<Node> quantMulProd = nullptr; std::shared_ptr<Node> quantMulOp = nullptr; for (const auto& node: match.graph->getNodes()){ if(node->type() == "Mul"){ quantMulOp = node; - Log::info("got mul"); break; } } @@ -126,43 +128,30 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ SinglePassGraphMatching::MatchingResult quantizeLinearSubGraph = *SinglePassGraphMatching(match.graph).match("Mul#0->Round?").begin(); const std::shared_ptr<Node> addNode = Add(mulQuantName == "" ? "" : mulQuantName + "_Add"); - // addNode->getOperator()->setDataType(DataType::Float32); - addNode->getOperator()->setDataType(DataType::Int8); - - const std::shared_ptr<Node> castNode = Cast(DataType::Int8,mulQuantName == "" ? "" : mulQuantName + "_Cast"); - castNode->getOperator()->setDataType(DataType::Int8); + const std::shared_ptr<Node> castNode = Cast(DataType::UInt8,mulQuantName == "" ? "" : mulQuantName + "_Cast"); + addNode->getOperator()->setDataType(DataType::Float32); + castNode->getOperator()->setDataType(DataType::UInt8); const std::shared_ptr<GraphView> qlinearGraph = quantizeLinearSubGraph.graph->clone(); - - qlinearGraph->addChild(castNode); qlinearGraph->addChild(addNode); + qlinearGraph->addChild(castNode); const std::shared_ptr<Node> quantMetaOp = MetaOperator("QuantizeLinear", qlinearGraph, {}, mulQuantName == "" ? "" : mulQuantName + "_QuantLinear"); //TODO : define datatype of producer tensors CHANGE BACK - // const std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{0}),mulQuantName == "" ? "" : mulQuantName + "_ZeroPoint",true); - const std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0}),mulQuantName == "" ? "" : mulQuantName + "_ZeroPoint",true); + const std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<uint8_t, 1>{0}),mulQuantName == "" ? "" : mulQuantName + "_ZeroPoint",true); addNodeProd->addChild(quantMetaOp,0,2); - Log::info("encapsulate static_pointer_cast 1 loop"); - const std::shared_ptr<Tensor> quantizeSF = std::static_pointer_cast<OperatorTensor>(quantMulOp->getParent(1)->clone()->getOperator())->getOutput(0); const Tensor tempTensor = Tensor(Array1D<float, 1>{1}); - //Dequantize Scaling factor is the inverse of quantize scaling factor const Tensor dequantizeSF = tempTensor / *quantizeSF; const std::shared_ptr<Node> dequantMetaOp = createDequantizeLinearNode(dequantizeSF,0,quantizeSF->dataType(),mulQuantName); - quantMetaOp->addChild(dequantMetaOp,0,0); - const std::shared_ptr<GraphView> metaOpGraph = getConnectedGraphView(dequantMetaOp); - Log::info("end 1 loop"); graphView->replace(quantizeLinearSubGraph.graph, metaOpGraph); - - Log::info("END afterReplace"); - nbfusions++; } Log::info("{} QuantizeLinear and DequantizeLinear added", nbfusions); @@ -172,36 +161,91 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ //Modify quantizer so it posseses zero point and conforms with expected metaop in output const std::set<std::shared_ptr<Node>> nodeList = graphView->getNodes(); - for(const auto& node: nodeList){ + for(const std::shared_ptr<Node> node: nodeList){ if(node->type() == "Quantizer"){ const std::string quantizerName = node->name(); const std::shared_ptr<MetaOperator_Op> metaNode = std::static_pointer_cast<MetaOperator_Op>(node->getOperator()); const std::shared_ptr<GraphView> quantizeMicro = metaNode->getMicroGraph(); + + const std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<uint8_t, 1>{0}), quantizerName == "" ? "" : quantizerName + "_ZeroPoint", true); const std::shared_ptr<Node> addNode = Add(quantizerName == "" ? "" : quantizerName + "_Add"); + const std::shared_ptr<Node> castNode = Cast(DataType::UInt8, quantizerName == "" ? "" : quantizerName + "_Cast"); + addNode->getOperator()->setDataType(DataType::Float32); + castNode->getOperator()->setDataType(DataType::UInt8); + quantizeMicro->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/1quantizemicro"); for(const auto quantNode : quantizeMicro->getNodes()){ if(quantNode->type() == "Clip"){ + //parent of clip may be a mul or round node const std::shared_ptr<Node> oldParent = quantNode->getParent(0); + + //Clip is float32 + // quantNode->getOperator()->setDataType(DataType::UInt8); + // quantNode->getParent(1)->getOperator()->setDataType(DataType::UInt8); + // quantNode->getParent(2)->getOperator()->setDataType(DataType::UInt8); oldParent->addChild(addNode,0,0); - addNode->addChild(quantNode,0,0); - - const std::shared_ptr<Node> addNodeProd = Producer(std::make_shared<Tensor>(Array1D<int8_t, 1>{0}),"",true); - addNodeProd->addChild(addNode,0,1); - - quantizeMicro->add(addNode); - quantizeMicro->add(addNodeProd); + addNode->addChild(quantNode,0,0); + quantNode->addChild(castNode,0,0); break; } } + addNodeProd->addChild(addNode,0,1); + quantizeMicro->add(castNode); + quantizeMicro->add(addNode); + quantizeMicro->add(addNodeProd); + + quantizeMicro->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/2quantizemicro"); + + // Log::info("-one ======"); + // for(const auto& nde2 : quantizeMicro->getNodes() ){ + // if(nde2->type() == "Cast" ){ + // Log::info("dtype {}", std::static_pointer_cast<OperatorTensor>(nde2->getOperator())->getOutput(0)->dataType()); + // } + // } + // ======================== + // SinglePassGraphMatching::MatchingResult quantizeLinearNewGraph = *SinglePassGraphMatching(quantizeMicro).match("Clip#0<-Add<-Cast<-Round?<-Mul; Clip#0<1-Producer#0; Clip#0<2-Producer#1").begin(); + // const std::shared_ptr<Node> newQuantizeMetaop = MetaOperator("QuantizeLinear",quantizeLinearNewGraph.graph->clone(),{},""); + // auto metaOpGraph = std::make_shared<GraphView>(); + // metaOpGraph->add(newQuantizeMetaop, false); + // quantizeMicro->replace(quantizeLinearNewGraph.graph, metaOpGraph); + //========================= + + fuseToMetaOps(quantizeMicro,"Clip#0<-Add<-Round?<-Mul; Clip#0<1-Producer#0; Clip#0<2-Producer#1; Clip#0->Cast","QuantizeLinear"); + graphView->add(addNodeProd); + + Log::info("one======"); + for (const auto nde : quantizeMicro->getNodes()){ + if(nde->type() == "QuantizeLinear"){ + Log::info("{} ==================",nde->name()); + const auto quantigraph = std::static_pointer_cast<MetaOperator_Op>(nde->getOperator())->getMicroGraph(); + for(const auto nde2 : quantigraph->getNodes() ){ + if(nde2->type() == "Cast"){ + Log::info("-- type {}",nde2->type()); + Log::info("dtype {}", std::static_pointer_cast<OperatorTensor>(nde2->getOperator())->getOutput(0)->dataType()); + nde2->getOperator()->setDataType(DataType::UInt8); + Log::info("newdtype {}", std::static_pointer_cast<OperatorTensor>(nde2->getOperator())->getOutput(0)->dataType()); + if(nde2->type() == "Clip"){ + quantigraph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/quantizer"); + } + } + } + + } + } + + Log::info("2cast dtype {} =============================================",std::static_pointer_cast<OperatorTensor>(castNode->getOperator())->getOutput(0)->dataType()); + quantizeMicro->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/3quantizemicro"); + graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/1quantize"); - fuseToMetaOps(quantizeMicro,"Clip#0<-Add<-Round?<-Mul; Clip#0<1-Producer#0; Clip#0<2-Producer#1","QuantizeLinear"); + std::shared_ptr<GraphView> replacedGraph = std::make_shared<GraphView>(); + replacedGraph->add(node); + graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/2quantize"); - std::shared_ptr<GraphView> tempGraph = std::make_shared<GraphView>(); - tempGraph->add(node, false); - graphView->replace(tempGraph, quantizeMicro); + graphView->replace(replacedGraph, quantizeMicro); + graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/3quantize"); } } @@ -222,11 +266,12 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ Log::info("found : {} ",wholeQlinearMatches.size()); for (const auto match : wholeQlinearMatches) { bool hasBias = false; + for (const auto& node: match.graph->getNodes()){ //Search the convolution node and look for bias presence if(node->type() == "Conv2D"){ if(node->getParents().size() > 2) hasBias = true; - + //If previous output is quantized add a dequantizelinear node if (node->getParent(0)->type() == "QlinearConv" || node->getParent(0)->type() == "QuantizeLinear"){ const std::shared_ptr<Node> quantizeNode = node->getParent(0); @@ -234,8 +279,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ if(quantizeNode->type() == "QLinearConv") idxInput = 4; const std::shared_ptr<Tensor> quantizeSF = std::static_pointer_cast<OperatorTensor>(quantizeNode->getParent(idxInput)->getOperator())->getOutput(0); - Log::info("quantize dimensions {}",quantizeSF->dims()); - const std::shared_ptr<Node> dequantMetaOp = createDequantizeLinearNode(quantizeSF->clone(),0,quantizeSF->dataType(),node->name()); const std::shared_ptr<GraphView> dequantGraph = getConnectedGraphView(dequantMetaOp); @@ -244,36 +287,25 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ graphView->add(dequantGraph); match.graph->add(dequantGraph); } - graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/between_ifs"); + //if conv has bias re calculate values of scaling factor and bias if (qop && hasBias){ - Log::info("start calc"); //bias and bias scaling factor have to be modified so it corresponds to ONNX's bias scaling factor formula: biasSF = inputSF * weightSF //TEMP: placeholder while quantizer node is not present at the input of convolution node const std::shared_ptr<Tensor> inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1}); - Log::info("inpusf calc"); - - Log::info("middle middle middle calc"); - const std::shared_ptr<Tensor> weightSFTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getParent(0)->getParent(1)->getOperator())->getOutput(0); Log::info("dims: {}",weightSFTensor->dims()); inputSFTensor->setDataType(weightSFTensor->dataType()); - Log::info("middle middle calc"); - const std::shared_ptr<Node> biasProd = node->getParent(2)->getParent(0)->getParent(0); const std::shared_ptr<Node> biasSFProd = node->getParent(2)->getParent(0)->getParent(1); - Log::info("middle calc"); const std::shared_ptr<Tensor> biasTensor = std::static_pointer_cast<OperatorTensor>(biasProd->getOperator())->getOutput(0); const std::shared_ptr<Tensor> biasSFTensor = std::static_pointer_cast<OperatorTensor>(biasSFProd->getOperator())->getOutput(0); - Log::info("middle down calc"); const Tensor newBiasSFTensor = *inputSFTensor* *weightSFTensor; const Tensor newBiasTensor = (*biasSFTensor* *biasTensor)/newBiasSFTensor; - Log::info("down up"); - bool biasProdWasConstant = std::static_pointer_cast<Producer_Op>(biasProd->getOperator())->constant(); if(biasProdWasConstant){ @@ -290,25 +322,24 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ else biasSFProd->getOperator()->setOutput(0,std::make_shared<Tensor>(newBiasSFTensor)); Log::info("Bias and Bias Scaling factor values changed to ONNX standard"); - break;//only one conv per match } - break; + break; //only one conv per match + } - Log::info("loop"); } - Log::info("qlinearrr"); graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/beforeQOP"); + //if qop desired match for qlinearconv form and create the corresponding metaoperator if (qop){ const std::set<SinglePassGraphMatching::MatchingResult> qlinearMatchs = SinglePassGraphMatching(match.graph).match("Conv2D#0<-DequantizeLinear#0;" "Conv2D#0<1-DequantizeLinear#1;" "Conv2D#0<2-(DequantizeLinear#2<1-Producer#0)?;" "Conv2D#0<2-(DequantizeLinear#2<2-Producer#1)?;" "Conv2D#0->QuantizeLinear"); - - Log::info("matches: {}", qlinearMatchs.size()); + //Only one match is present in match.graph SinglePassGraphMatching::MatchingResult onlyMatch = *qlinearMatchs.begin(); - + + //convolution's name to be able to name metaop accordingly std::string convBaseName; for (const auto node :onlyMatch.graph->getNodes()) { @@ -317,13 +348,11 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ break; } } - // const std::shared_ptr<GraphView> qlinearconvGraph = onlyMatch.graph->clone();//deletable?? onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/oldorder"); - match.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/oldordermedium"); - graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/OLDORDERBIG"); - + if(hasBias){ + //metaop/graph inputs reordered to ONNX standard const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> qConvOrdInputs = onlyMatch.graph->getOrderedInputs(); const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> newQConvOrder = {qConvOrdInputs[0],//x input qConvOrdInputs[1],//x scale @@ -335,60 +364,42 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ qConvOrdInputs[8],//y zero point qConvOrdInputs[6]};//b onlyMatch.graph->setOrderedInputs(newQConvOrder); - Log::info("nb of parnets {}",qConvOrdInputs[8].first->getParents().size()); } - - onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/neworder"); const std::shared_ptr<Node> qlinearMetaOp = MetaOperator("QLinearConv", onlyMatch.graph->clone(),{},convBaseName == "" ? "" : convBaseName+"_QlinearConv"); - const std::shared_ptr<GraphView> metaOpGraph = std::make_shared<GraphView>(); metaOpGraph->add(qlinearMetaOp, false); - const bool qlinearReplaced = graphView->replace(onlyMatch.graph, metaOpGraph); - AIDGE_ASSERT(qlinearReplaced,"Unexpected error, couldn't replace subgraph with QlinearConv operator") - if(hasBias){ - // biasQuantLinear->attributes()->addAttr("outputDtype",6);//6 corresponds to int32 in ONNX's Datatype enum - - Log::info("onodatatype"); + if(hasBias){ + //up to current opset qlnearconv bias input must be in int32 const std::shared_ptr<Node> quantizeLinearB = qlinearMetaOp->getParent(8); const auto quantizeNodes = std::static_pointer_cast<MetaOperator_Op>(quantizeLinearB->getOperator())->getMicroGraph()->getNodes(); + //TODO verify that this change does not impact calculations for (const auto node : quantizeNodes){ - //Cast operator configured to castand add to int32 according to onnx bias requirements - const std::string nodeOPtype= node->type(); - if(nodeOPtype == "Cast" || nodeOPtype == "Add"){ + if(nodeOPtype == "Cast" ){ node->getOperator()->setDataType(DataType::Int32); if(nodeOPtype == "Cast") std::static_pointer_cast<Cast_Op>(node->getOperator())->targetType() = DataType::Int32; } } std::static_pointer_cast<OperatorTensor>(quantizeLinearB->getParent(2)->getOperator())->getOutput(0)->setDataType(DataType::Int32); - Log::info("onodatatype2"); - } nbfusions++; - } + Log::info("{} QlinearConvs added", nbfusions); + } } - Log::info("{} QlinearConvs added", nbfusions); - - graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/after1quant"); + //add a dequantize node to every output node of types qlinearconv out quantizelinear: (Float output expected by default) for (const auto& node : graphView->outputNodes()){ - Log::info("start loop"); - int idxInput; if(node->type() == "QLinearConv") idxInput = 4; else if (node->type() == "QuantizeLinear") idxInput = 1; else continue; - - Log::info("middle"); const std::shared_ptr<Tensor> quantizeSF = std::static_pointer_cast<OperatorTensor>(node->getParent(idxInput)->getOperator())->getOutput(0); - Log::info("quantize dimensions {}",quantizeSF->dims()); - const Tensor tempTensor = Tensor(Array1D<float, 1>{1}); const Tensor dequantizeSF = tempTensor / *quantizeSF; @@ -396,33 +407,34 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop){ const std::shared_ptr<GraphView> dequantGraph = getConnectedGraphView(dequantMetaOp); graphView->addChild(dequantGraph,std::pair<NodePtr, IOIndex_t>(node, IOIndex_t(0)),std::pair<NodePtr, IOIndex_t>(dequantMetaOp, IOIndex_t(0))); - } graphView->setBackend("cpu");//TODO get dynamically - Log::notice("Before folding--------------------------"); graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/ENDRECIPE"); - const std::set<SinglePassGraphMatching::MatchingResult> foldQuantize = SinglePassGraphMatching(graphView).match( - //Query is subject to change as quantization operators change - "QuantizeLinear#0<-Producer#0;" - "QuantizeLinear#0<1-Producer#1;" - "QuantizeLinear#0<2-Producer#2?" - ); - for(const auto match : foldQuantize){ - auto quantizeFolder = SequentialScheduler(match.graph); - quantizeFolder.forward(); - const auto quantizeLinearNode = *match.graph->outputNodes().begin(); - - const std::shared_ptr<Tensor> foldedTensor = std::make_shared<Tensor>(std::static_pointer_cast<OperatorTensor>((quantizeLinearNode)->getOperator())->getOutput(0)->clone()); - const std::shared_ptr<Node> foldedProd = Producer(foldedTensor, "", true); - const std::shared_ptr<GraphView> foldedGraph = std::make_shared<GraphView>(); + if(foldWeights){ + //Fold quantize linear of weights and bias, leaving the quantized producer + const std::set<SinglePassGraphMatching::MatchingResult> foldQuantize = SinglePassGraphMatching(graphView).match( + //find quantizelinears with only producers as input, meaning they can be folded + "QuantizeLinear#0<-Producer#0;" + "QuantizeLinear#0<1-Producer#1;" + "QuantizeLinear#0<2-Producer#2?" + ); + for(const auto match : foldQuantize){ + auto quantizeFolder = SequentialScheduler(match.graph); + quantizeFolder.forward(); + + const auto quantizeLinearNode = *match.graph->outputNodes().begin(); + + const std::shared_ptr<Tensor> foldedTensor = std::make_shared<Tensor>(std::static_pointer_cast<OperatorTensor>((quantizeLinearNode)->getOperator())->getOutput(0)->clone()); + const std::shared_ptr<Node> foldedProd = Producer(foldedTensor, quantizeLinearNode->name(), true); + const std::shared_ptr<GraphView> foldedGraph = std::make_shared<GraphView>(); - foldedGraph->add(foldedProd); - graphView->replace(match.graph,foldedGraph); + foldedGraph->add(foldedProd); + graphView->replace(match.graph,foldedGraph); + } } - graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/folded"); Log::notice("after last verify----------------------------"); -- GitLab From c3af0b6964b750d85e41f8aad6083c6688e1ae48 Mon Sep 17 00:00:00 2001 From: Lucas Lopez <lucas.lopezmape@cea.fr> Date: Fri, 21 Feb 2025 10:59:34 +0000 Subject: [PATCH 42/44] Apply 2 suggestion(s) to 2 file(s) Co-authored-by: Cyril Moineau <cyril.moineau@cea.fr> --- aidge_quantization/freezeProducers.py | 2 +- python_binding/recipes/pybind_ONNXRecipes.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aidge_quantization/freezeProducers.py b/aidge_quantization/freezeProducers.py index a6ee484..8783971 100644 --- a/aidge_quantization/freezeProducers.py +++ b/aidge_quantization/freezeProducers.py @@ -34,5 +34,5 @@ def freeze_weights(graphview: aidge_core.GraphView, all_producers: bool = False) while(parent_node.type() != "Producer"): parent_node = parent_node.get_parent(0) if parent_node is None: - raise ValueError ("error, producer not found") + raise RuntimeError(f"Could not find a parent producer for node {node.name()}") parent_node.get_operator().attr.set_attr("constant",True) diff --git a/python_binding/recipes/pybind_ONNXRecipes.cpp b/python_binding/recipes/pybind_ONNXRecipes.cpp index d1ea339..9664f76 100644 --- a/python_binding/recipes/pybind_ONNXRecipes.cpp +++ b/python_binding/recipes/pybind_ONNXRecipes.cpp @@ -21,7 +21,7 @@ namespace Aidge { void init_ONNXRecipes(py::module &m) { - m.def("quantize_matching_to_export", &quantizeMatchingtoExport, py::arg("graphView"), py::arg("qop")=true,py::arg("fold_weights")=true); + m.def("quantize_matching_to_export", &quantizeMatchingtoExport, py::arg("graph_view"), py::arg("qop")=true, py::arg("fold_weights")=true); } -- GitLab From 3233a331b372aa7df75025ce089586ec50d21b36 Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Fri, 21 Feb 2025 14:47:48 +0000 Subject: [PATCH 43/44] added Additional cast operator in QuantizeLinear and DequantizeLinear and removed some debug artifacts --- include/aidge/recipes/ONNXRecipes.hpp | 19 ++++- src/recipes/ONNXRecipes.cpp | 100 +++++++++----------------- 2 files changed, 50 insertions(+), 69 deletions(-) diff --git a/include/aidge/recipes/ONNXRecipes.hpp b/include/aidge/recipes/ONNXRecipes.hpp index 14bf6bb..058deea 100644 --- a/include/aidge/recipes/ONNXRecipes.hpp +++ b/include/aidge/recipes/ONNXRecipes.hpp @@ -22,10 +22,25 @@ namespace Aidge { /** * @brief Prepare a Aidge model for ONNX export: regroup aidge nodes into quantizelinear,dequantizelinear or qlinearconv operators. * @param graphView The GraphView to process. - * @param qop if true indicates inclusion of metaoperator qlinearconv, if false only quantizelinear and dequantizelinear will be created + * @param QoperatorFormat if true indicates inclusion of metaoperator qlinearconv, if false qdq or QuantizeDequantize format will be used(see https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#onnx-quantization-representation-format) + */ +void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool QoperatorFormat = true, bool foldWeights = true); + + /** + * @brief Prepare a Aidge model for ONNX export: regroup aidge nodes into quantizelinear,dequantizelinear or qlinearconv operators. + * @param scalingFactor Scaling factor used in the quantization operation + * @param zeroPoint Zero point used in the quantization operation, for aidge quantization this should always be equal to 0 + * @param basename name used as base for the names of the quantizelinear metaoperator and its components */ -void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop = true, bool foldWeights = true); std::shared_ptr<Node> createQuantizeLinearNode(float scalingFactor = 1.0, uint8_t zeroPoint = 0,const std::string basename = ""); + + /** + * @brief Prepare a Aidge model for ONNX export: regroup aidge nodes into quantizelinear,dequantizelinear or qlinearconv operators. + * @param descalingFactor Scaling factor used in the quantization operation + * @param zeroPoint Zero point used in the quantization operation, for aidge quantization this should always be equal to 0 + * @param castDtype Dtype of the output of the dequantizelinear metaop. This argument may be deprecated in the future because of ONNX's imposed dtypes + * @param basename name used as base for the names of the quantizelinear metaoperator and its components + */ std::shared_ptr<Node> createDequantizeLinearNode(Tensor descalingFactor, uint8_t zeroPoint,Aidge::DataType castDtype,const std::string basename = ""); } diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index ee8c69a..0fdaa16 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -29,21 +29,24 @@ namespace Aidge { std::shared_ptr<Node> createQuantizeLinearNode(float scalingFactor, uint8_t zeroPoint,const std::string basename){ - //returns the Graphview of a QuantizeLinear metaop with it's producers const std::shared_ptr<Node> mulNode = Mul(basename == "" ? "" : basename + "_MulQuant"); const std::shared_ptr<Node> roundNode = Round(basename == "" ? "" : basename + "_RoundQuant"); const std::shared_ptr<Node> addNode = Add(basename == "" ? "" : basename + "_AddQuant"); - const std::shared_ptr<Node> castNode = Cast(DataType::UInt8, basename == "" ? "" : basename + "_castQuant"); - - const std::shared_ptr<GraphView> qlGraph = Sequential({mulNode, roundNode, addNode, castNode}); - - const std::shared_ptr<Node> quantizeMetaOp = MetaOperator("QuantizeLinear", qlGraph,{},basename == "" ? "" : basename + "_QuantLinear"); + const std::shared_ptr<Node> castNode = Cast(DataType::UInt8, basename == "" ? "" : basename + "_CastQuant"); + const std::shared_ptr<Node> castAddNode = Cast(DataType::Float32, basename == "" ? "" : basename + "_Cast_ZeroPointQuant"); mulNode->getOperator()->setDataType(DataType::Float32); roundNode->getOperator()->setDataType(DataType::Float32); + castAddNode->getOperator()->setDataType(DataType::Float32); addNode->getOperator()->setDataType(DataType::Float32); castNode->getOperator()->setDataType(DataType::UInt8); + const std::shared_ptr<GraphView> qlGraph = Sequential({mulNode, roundNode, addNode, castNode});//Would be less wasteful to just use multiple addChild? + castAddNode->addChild(addNode,0,1); + const std::shared_ptr<GraphView> Quantizegraph = getConnectedGraphView(castNode); + + const std::shared_ptr<Node> quantizeMetaOp = MetaOperator("QuantizeLinear", Quantizegraph,{},basename == "" ? "" : basename + "_QuantLinear"); + const std::shared_ptr<Node> addProd = Producer(std::make_shared<Tensor>(Array1D<uint8_t, 1>{zeroPoint}),addNode->name() == "" ? "" : addNode->name() + "_ZeroPoint",true); const std::shared_ptr<Node> mulProd = Producer(std::make_shared<Tensor>(Array1D<float, 1>{scalingFactor}),mulNode->name() == "" ? "" : mulNode->name() + "_ScaleFactor",true); @@ -54,24 +57,26 @@ std::shared_ptr<Node> createQuantizeLinearNode(float scalingFactor, uint8_t zero } std::shared_ptr<Node> createDequantizeLinearNode(Tensor descalingFactor, uint8_t zeroPoint,DataType castDtype, const std::string basename){ - //returns the Graphview of a QuantizeLinear metaop with it's producers const std::shared_ptr<Node> castNode = Cast(castDtype,basename == "" ? "" : basename + "_CastDequant"); + const std::shared_ptr<Node> castSubNode = Cast(castDtype,basename == "" ? "" : basename + "_Cast_ZeroPointDequant"); const std::shared_ptr<Node> subNode = Sub(basename == "" ? "" : basename + "_SubDequant"); const std::shared_ptr<Node> mulNode = Mul(basename == "" ? "" : basename + "_MulDequant"); - const std::shared_ptr<GraphView> dequantGraph = Sequential({castNode, subNode, mulNode}); + castNode->getOperator()->setDataType(castDtype); + subNode->getOperator()->setDataType(DataType::Float32); + mulNode->getOperator()->setDataType(DataType::Float32); - const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> dequantOrdInputs = dequantGraph->getOrderedInputs(); + const std::shared_ptr<GraphView> dequantGraph = Sequential({castNode, subNode, mulNode});//Would be less wasteful to just use multiple addChild? + castSubNode->addChild(subNode,0,1); + const std::shared_ptr<GraphView> dequantizegraph = getConnectedGraphView(mulNode); + + const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> dequantOrdInputs = dequantizegraph->getOrderedInputs(); const std::vector<std::pair<std::shared_ptr<Node>, IOIndex_t>> newDequantOrder = {dequantOrdInputs[0],//input dequantOrdInputs[2],//scaling factor dequantOrdInputs[1]};//zero point - dequantGraph->setOrderedInputs(newDequantOrder); - - const std::shared_ptr<Node> dequantMetaOp = MetaOperator("DequantizeLinear", dequantGraph,{},basename == "" ? "" : basename + "_DequantLinear"); + dequantizegraph->setOrderedInputs(newDequantOrder); - castNode->getOperator()->setDataType(castDtype); - subNode->getOperator()->setDataType(DataType::Float32); - mulNode->getOperator()->setDataType(DataType::Float32); + const std::shared_ptr<Node> dequantMetaOp = MetaOperator("DequantizeLinear", dequantizegraph,{},basename == "" ? "" : basename + "_DequantLinear"); //producer must be uint8 for correct zp dtype in onnx export const std::shared_ptr<Node> subProd = Producer(std::make_shared<Tensor>(Array1D<uint8_t, 1>{0}),subNode->name() == "" ? "" : subNode->name() + "_ZeroPoint",true); @@ -83,11 +88,10 @@ std::shared_ptr<Node> createDequantizeLinearNode(Tensor descalingFactor, uint8_t return dequantMetaOp; } -void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop, bool foldWeights){ +void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool QoperatorFormat, bool foldWeights){ //Add quantizeLinear Metaop at the beginning of the graph - //according to aidge's quantification, the first input is not quantized so Sf of 1 and Zp of 0 is performed + //according to aidge's quantification, the first input is not quantized so sf of 1 and Zp of 0 is performed //Operator is added to conform with ONNX's quantize models form - // AIDGE_ASSERT( !qop || foldWeights,"QOP format cannot be exported to onnx without folded weight/bias"); int inptIdx = 0; for (const auto& node : graphView->inputNodes()){ @@ -157,8 +161,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop, bo Log::info("{} QuantizeLinear and DequantizeLinear added", nbfusions); nbfusions = 0; - graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/middlegraph"); - //Modify quantizer so it posseses zero point and conforms with expected metaop in output const std::set<std::shared_ptr<Node>> nodeList = graphView->getNodes(); for(const std::shared_ptr<Node> node: nodeList){ @@ -174,17 +176,12 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop, bo addNode->getOperator()->setDataType(DataType::Float32); castNode->getOperator()->setDataType(DataType::UInt8); - quantizeMicro->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/1quantizemicro"); for(const auto quantNode : quantizeMicro->getNodes()){ + //Shape of quantizer may vary so Clip will be used as root if(quantNode->type() == "Clip"){ //parent of clip may be a mul or round node const std::shared_ptr<Node> oldParent = quantNode->getParent(0); - - //Clip is float32 - // quantNode->getOperator()->setDataType(DataType::UInt8); - // quantNode->getParent(1)->getOperator()->setDataType(DataType::UInt8); - // quantNode->getParent(2)->getOperator()->setDataType(DataType::UInt8); oldParent->addChild(addNode,0,0); addNode->addChild(quantNode,0,0); @@ -197,55 +194,33 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop, bo quantizeMicro->add(addNode); quantizeMicro->add(addNodeProd); - quantizeMicro->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/2quantizemicro"); - - // Log::info("-one ======"); - // for(const auto& nde2 : quantizeMicro->getNodes() ){ - // if(nde2->type() == "Cast" ){ - // Log::info("dtype {}", std::static_pointer_cast<OperatorTensor>(nde2->getOperator())->getOutput(0)->dataType()); - // } - // } - // ======================== - // SinglePassGraphMatching::MatchingResult quantizeLinearNewGraph = *SinglePassGraphMatching(quantizeMicro).match("Clip#0<-Add<-Cast<-Round?<-Mul; Clip#0<1-Producer#0; Clip#0<2-Producer#1").begin(); - // const std::shared_ptr<Node> newQuantizeMetaop = MetaOperator("QuantizeLinear",quantizeLinearNewGraph.graph->clone(),{},""); - // auto metaOpGraph = std::make_shared<GraphView>(); - // metaOpGraph->add(newQuantizeMetaop, false); - // quantizeMicro->replace(quantizeLinearNewGraph.graph, metaOpGraph); - //========================= - fuseToMetaOps(quantizeMicro,"Clip#0<-Add<-Round?<-Mul; Clip#0<1-Producer#0; Clip#0<2-Producer#1; Clip#0->Cast","QuantizeLinear"); graphView->add(addNodeProd); - Log::info("one======"); + //debug code: + // Hard coded visualisation and fix of cast with incorrect type + Log::debug("one======"); for (const auto nde : quantizeMicro->getNodes()){ if(nde->type() == "QuantizeLinear"){ - Log::info("{} ==================",nde->name()); + Log::debug("{} ==================",nde->name()); const auto quantigraph = std::static_pointer_cast<MetaOperator_Op>(nde->getOperator())->getMicroGraph(); for(const auto nde2 : quantigraph->getNodes() ){ if(nde2->type() == "Cast"){ - Log::info("-- type {}",nde2->type()); - Log::info("dtype {}", std::static_pointer_cast<OperatorTensor>(nde2->getOperator())->getOutput(0)->dataType()); + Log::debug("-- type {}",nde2->type()); + Log::debug("dtype {}", std::static_pointer_cast<OperatorTensor>(nde2->getOperator())->getOutput(0)->dataType()); nde2->getOperator()->setDataType(DataType::UInt8); - Log::info("newdtype {}", std::static_pointer_cast<OperatorTensor>(nde2->getOperator())->getOutput(0)->dataType()); - if(nde2->type() == "Clip"){ - quantigraph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/quantizer"); - } + Log::debug("newdtype {}", std::static_pointer_cast<OperatorTensor>(nde2->getOperator())->getOutput(0)->dataType()); } } } } + //end debug code - Log::info("2cast dtype {} =============================================",std::static_pointer_cast<OperatorTensor>(castNode->getOperator())->getOutput(0)->dataType()); - quantizeMicro->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/3quantizemicro"); - graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/1quantize"); - std::shared_ptr<GraphView> replacedGraph = std::make_shared<GraphView>(); replacedGraph->add(node); - graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/2quantize"); graphView->replace(replacedGraph, quantizeMicro); - graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/3quantize"); } } @@ -289,14 +264,13 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop, bo } //if conv has bias re calculate values of scaling factor and bias - if (qop && hasBias){ + if (QoperatorFormat && hasBias){ //bias and bias scaling factor have to be modified so it corresponds to ONNX's bias scaling factor formula: biasSF = inputSF * weightSF //TEMP: placeholder while quantizer node is not present at the input of convolution node const std::shared_ptr<Tensor> inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1}); const std::shared_ptr<Tensor> weightSFTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getParent(0)->getParent(1)->getOperator())->getOutput(0); - Log::info("dims: {}",weightSFTensor->dims()); inputSFTensor->setDataType(weightSFTensor->dataType()); const std::shared_ptr<Node> biasProd = node->getParent(2)->getParent(0)->getParent(0); @@ -327,10 +301,9 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop, bo } } - graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/beforeQOP"); //if qop desired match for qlinearconv form and create the corresponding metaoperator - if (qop){ + if (QoperatorFormat){ const std::set<SinglePassGraphMatching::MatchingResult> qlinearMatchs = SinglePassGraphMatching(match.graph).match("Conv2D#0<-DequantizeLinear#0;" "Conv2D#0<1-DequantizeLinear#1;" "Conv2D#0<2-(DequantizeLinear#2<1-Producer#0)?;" @@ -348,8 +321,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop, bo break; } } - // const std::shared_ptr<GraphView> qlinearconvGraph = onlyMatch.graph->clone();//deletable?? - onlyMatch.graph->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/oldorder"); if(hasBias){ //metaop/graph inputs reordered to ONNX standard @@ -410,9 +381,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop, bo } graphView->setBackend("cpu");//TODO get dynamically - graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/ENDRECIPE"); - - if(foldWeights){ //Fold quantize linear of weights and bias, leaving the quantized producer const std::set<SinglePassGraphMatching::MatchingResult> foldQuantize = SinglePassGraphMatching(graphView).match( @@ -435,9 +403,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool qop, bo graphView->replace(match.graph,foldedGraph); } } - graphView->save("/data1/is156025/ll277219/task_quantif_export/export/aidge_models/folded"); - Log::notice("after last verify----------------------------"); } } \ No newline at end of file -- GitLab From 3c90c3841fdd9dec94120a24b8d02490386f06a2 Mon Sep 17 00:00:00 2001 From: LOPEZ MAPE Lucas <lucas.lopezmape@cea.fr> Date: Fri, 21 Feb 2025 15:27:26 +0000 Subject: [PATCH 44/44] qlinearconv biasSF correctly taking into account inputSF --- src/recipes/ONNXRecipes.cpp | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/recipes/ONNXRecipes.cpp b/src/recipes/ONNXRecipes.cpp index 0fdaa16..87b9d2c 100644 --- a/src/recipes/ONNXRecipes.cpp +++ b/src/recipes/ONNXRecipes.cpp @@ -198,8 +198,8 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat graphView->add(addNodeProd); //debug code: - // Hard coded visualisation and fix of cast with incorrect type - Log::debug("one======"); + // Hard coded visualisation and fix of cast with incorrect type===== + Log::debug("debug======"); for (const auto nde : quantizeMicro->getNodes()){ if(nde->type() == "QuantizeLinear"){ Log::debug("{} ==================",nde->name()); @@ -212,10 +212,9 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat Log::debug("newdtype {}", std::static_pointer_cast<OperatorTensor>(nde2->getOperator())->getOutput(0)->dataType()); } } - } } - //end debug code + //end debug code======== std::shared_ptr<GraphView> replacedGraph = std::make_shared<GraphView>(); replacedGraph->add(node); @@ -238,7 +237,6 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat if(wholeQlinearMatches.size()<1) Log::warn("No quantized convolutions found"); - Log::info("found : {} ",wholeQlinearMatches.size()); for (const auto match : wholeQlinearMatches) { bool hasBias = false; @@ -267,11 +265,16 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat if (QoperatorFormat && hasBias){ //bias and bias scaling factor have to be modified so it corresponds to ONNX's bias scaling factor formula: biasSF = inputSF * weightSF - //TEMP: placeholder while quantizer node is not present at the input of convolution node - const std::shared_ptr<Tensor> inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1}); - const std::shared_ptr<Tensor> weightSFTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getParent(0)->getParent(1)->getOperator())->getOutput(0); - inputSFTensor->setDataType(weightSFTensor->dataType()); + + std::shared_ptr<Tensor> inputSFTensor; + if(node->getParent(0)->getParent(0)->type() == "QuantizeLinear"){ + inputSFTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(0)->getParent(0)->getParent(1)->getOperator())->getOutput(0); + } + else{ + inputSFTensor = std::make_shared<Tensor>(Array1D<double, 1> {1}); + inputSFTensor->setDataType(weightSFTensor->dataType()); + } const std::shared_ptr<Node> biasProd = node->getParent(2)->getParent(0)->getParent(0); const std::shared_ptr<Node> biasSFProd = node->getParent(2)->getParent(0)->getParent(1); @@ -348,7 +351,7 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat const std::shared_ptr<Node> quantizeLinearB = qlinearMetaOp->getParent(8); const auto quantizeNodes = std::static_pointer_cast<MetaOperator_Op>(quantizeLinearB->getOperator())->getMicroGraph()->getNodes(); - //TODO verify that this change does not impact calculations + //TODO: correct overflow and differences when quantization is performed in Int32 and uint8 (may need to fold in int32 or float and skip this quantizelinear node entirely) for (const auto node : quantizeNodes){ const std::string nodeOPtype= node->type(); if(nodeOPtype == "Cast" ){ @@ -379,8 +382,10 @@ void quantizeMatchingtoExport(std::shared_ptr<GraphView> graphView, bool Qoperat graphView->addChild(dequantGraph,std::pair<NodePtr, IOIndex_t>(node, IOIndex_t(0)),std::pair<NodePtr, IOIndex_t>(dequantMetaOp, IOIndex_t(0))); } + graphView->setBackend("cpu");//TODO get dynamically + //TODO: Bias must be always folded, it may be interesting to just fold when possible instead of giving the choice if(foldWeights){ //Fold quantize linear of weights and bias, leaving the quantized producer const std::set<SinglePassGraphMatching::MatchingResult> foldQuantize = SinglePassGraphMatching(graphView).match( -- GitLab