From 96855bd6fc2964749c17c41650671ac09da79739 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 8 Jan 2025 10:32:06 +0000 Subject: [PATCH 01/23] improve tensor manipulation routines + enhance insertCompensationNodes --- src/PTQ/CLE.cpp | 8 +- src/PTQ/PTQ.cpp | 6 +- src/QAT/QAT_LSQ.cpp | 228 ++++++++++++++++++++++++++------------------ 3 files changed, 143 insertions(+), 99 deletions(-) diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 28858d0..dc60b38 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -30,6 +30,12 @@ #include "aidge/operator/Reshape.hpp" #include "aidge/operator/Round.hpp" +#include "aidge/operator/Mul.hpp" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/operator/Abs.hpp" +#include "aidge/operator/Reshape.hpp" +#include "aidge/operator/Round.hpp" + namespace Aidge { @@ -49,7 +55,7 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) mulOp.setDataType(tensor->dataType()); mulOp.setBackend(tensor->backend()); - std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(scaling); + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); scalingTensor->setDataType(tensor->dataType()); scalingTensor->setBackend(tensor->backend()); diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 7c29ee0..190f7e2 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -72,13 +72,13 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } -static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) { auto mulOp = Mul_Op(); mulOp.setDataType(tensor->dataType()); mulOp.setBackend(tensor->backend()); - std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(scaling); + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); scalingTensor->setDataType(tensor->dataType()); scalingTensor->setBackend(tensor->backend()); @@ -932,7 +932,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u // Add the coeff producer to the multiplier node std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); - std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(signedMax); + std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax}); coeffProducer->getOperator()->setOutput(0, coeffTensor); coeffProducer->getOperator()->setDataType(DataType::Float64); diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 6eae077..a09dbb2 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -21,152 +21,190 @@ #include "aidge/graph/Matching.hpp" #include "aidge/recipes/QuantRecipes.hpp" +namespace Aidge { -namespace Aidge +void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize) { + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); -static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) -{ - auto valueTensor = (*tensor).abs().mean(); - std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu"); - return localTensor.get<float>(0); -} + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); -static float getTensorStd(std::shared_ptr<Tensor> tensor) -{ - auto valueTensor = (*tensor); - - auto skewedTensor = valueTensor - valueTensor.mean(); - auto squaredTensor = skewedTensor * skewedTensor; - auto varianceTensor = squaredTensor.mean(); + std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; + std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; - std::shared_ptr<Tensor> fallback; - auto localTensor = varianceTensor.refCastFrom(fallback, DataType::Float32, "cpu"); - - float variance = localTensor.get<float>(0); - return std::sqrt(variance); -} + // INPUT QUANTIZERS INSERTION + // TODO : double check this, and use createUniqueName() + auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); + auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName); -// INIT THE STEP SIZE OF A QUANTIZER NODE + // Set the step size -static bool initStepSize(std::shared_ptr<Node> quantizer) -{ - const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator()); + auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator(); + auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + inputStepSizeOp->setOutput(0, inputStepSizeTensor); - // This formula is the one proposed in the paper ... + // Absorb the ReLU when possible ... - // float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0)); - // float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second)); + // XXX is this safe ??? + bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); + // bool nodeHasParent = (linearNode->getParents().size() != 0); - // .. but this formula seems to work better !!! + if (nodeHasParent) { + auto parentNode = linearNode->getParents()[0]; + if (parentNode->type() == "ReLU") { + auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator()); + inputQuantizerOp->range() = unsignedRange; + graphView->replace({parentNode}, {}); + } + } - float inputStd = getTensorStd(quantizerOp->getInput(0)); - float stepSize = 8.0f * (inputStd / (quantizerOp->range().second)); + // We need to handle the case where the linear node is the first one ... - // TODO : use the scalar constructor - auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + if (nodeHasParent) { + graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0); + } else { + inputQuantizerNode->addChild(graphView); + graphView->add(inputQuantizerNode); + } - // XXX Manage backend here ? - stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend()); - stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType()); + // PARAM QUANTIZERS INSERTION - auto stepSizeProducer = quantizer->getParent(1); + // TODO : double check this, and use createUniqueName() + auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); + auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); + graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0); - stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); + // Set the step size - Log::notice(" [ INIT STEP SIZE = {} ] ", stepSize); + auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator(); + auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + paramStepSizeOp->setOutput(0, paramStepSizeTensor); + } - return false; } -static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) +static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) { - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|PaddedConv2D#|FC#)"); + auto backend = tensor->backend(); - for (const auto& match : matches) - { - auto linearNode = match.graph->rootNode(); - - // Log::notice(" SET INPUT QUANTIZER : {} ", linearNode->type()); + if (backend == "cuda") + tensor->setBackend("cpu"); - std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; - std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; + float value = (*tensor).abs().mean().get<float>(0); - // Create the input quantizer node + if (backend == "cuda") + tensor->setBackend("cuda"); - auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); - auto quantizerNode = LSQ(signedRange, quantizerName); + return value; +} - // Init the step-size using the node call stack +static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda) +{ + // Propagate the calibration tensor - quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + scheduler.forward(true, {calibrationData}); - // Absorb the ReLU when possible ... + // Store the input tensor statistics - bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ? + if (useCuda) + graphView->setBackend("cpu"); - if (nodeHasParent) + std::map<std::string, float> inputStats; + for (auto node : graphView->getNodes()) + { + if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! { - bool allParentsAreReLU = true; - for (auto parentNode : linearNode->getParents()) - if (parentNode->type() != "ReLU") - allParentsAreReLU = false; - - if (allParentsAreReLU) { - auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator()); - quantizerOp->range() = unsignedRange; - } - - // TODO : remove the ReLUs when possible + const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); + float inputAbsMean = getTensorAbsMean(op->getInput(0)); + inputStats.insert(std::make_pair(node->name(), inputAbsMean)); + fmt::println("{} -> {}", node->name(), inputAbsMean); } + } - // Insert the quantizer in the graphView ... - // (We need to handle the case where the linear node is the first one) + if (useCuda) + graphView->setBackend("cuda"); - if (nodeHasParent) { - graphView->insertParent(linearNode, quantizerNode, 0, 0, 0); - } else { - quantizerNode->addChild(graphView); - graphView->add(quantizerNode); + return inputStats; +} + +static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda) +{ + if (useCuda) + graphView->setBackend("cpu"); + + std::map<std::string, float> paramStats; + for (auto node : graphView->getNodes()) + { + if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! + { + const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); + float paramAbsMean = getTensorAbsMean(op->getInput(1)); + paramStats.insert(std::make_pair(node->name(), paramAbsMean)); + fmt::println("{} -> {}", node->name(), paramAbsMean); } } -} + + if (useCuda) + graphView->setBackend("cuda"); -// PARAM QUANTIZERS INSERTION + return paramStats; +} -static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) +static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats) { - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|PaddedConv2D#|FC#)"); - - std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); for (const auto& match : matches) - { - auto linearNode = match.graph->rootNode(); + { + auto linearNode = match.graph->rootNode(); - // Log::notice(" SET PARAM QUANTIZER : {} ", linearNode->type()); + // INPUT QUANTIZERS STEP-SIZES - // TODO : double check this, and use createUniqueName() - auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); - auto quantizerNode = LSQ(signedRange, quantizerName); + auto inputQuantNode = linearNode->getParent(0); + auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator()); + + float absMean = inputStats[linearNode->name()]; + float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second)); - // Init the step-size using the node call stack + auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator(); + // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); + auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + inputStepSizeOp->setOutput(0, inputStepSizeTensor); - quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); + // PARAM QUANTIZERS STEP-SIZES - // Insert the quantizer in the graphView + auto paramQuantNode = linearNode->getParent(1); + auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator()); - graphView->insertParent(linearNode, quantizerNode, 1, 0, 0); + absMean = paramStats[linearNode->name()]; + stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second)); + + auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator(); + // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); + auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + paramStepSizeOp->setOutput(0, paramStepSizeTensor); } } -void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) +void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData) { - sanitizeNodeNames(graphView); - setupInputQuantizers(graphView, nbBits); - setupParamQuantizers(graphView, nbBits); + bool useCuda = (calibrationData->backend() == "cuda"); + + // Collect the tensor statisics + auto inputStats = collectInputStats(graphView, calibrationData, useCuda); + + auto paramStats = collectParamStats(graphView, useCuda); + + // Insert the quantizers + insertQuantizers(graphView, nbBits, 1.0); + + // Adjust the quantizers step-sizes + adjustQuantizersStepSizes(graphView, inputStats, paramStats); } } \ No newline at end of file -- GitLab From 1935e7f1a481e3ca2e53faf4f7dd8874434c0573 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Mon, 13 Jan 2025 13:01:34 +0000 Subject: [PATCH 02/23] rework the LSQ code --- include/aidge/quantization/QAT/QAT_LSQ.hpp | 19 +- python_binding/pybind_QAT_LSQ.cpp | 3 + src/QAT/QAT_LSQ.cpp | 204 +++++++-------------- 3 files changed, 81 insertions(+), 145 deletions(-) diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp index 922187a..d7d03ca 100644 --- a/include/aidge/quantization/QAT/QAT_LSQ.hpp +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -9,14 +9,12 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ -#define AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ +#ifndef AIDGE_QUANTIZATION_QAT_LSQ_H_ +#define AIDGE_QUANTIZATION_QAT_LSQ_H_ -#include <cstddef> // std::size_t -#include <memory> - -#include "aidge/data/Tensor.hpp" +#include "aidge/graph/Node.hpp" #include "aidge/graph/GraphView.hpp" +#include "aidge/data/Tensor.hpp" namespace Aidge { namespace QuantLSQ { @@ -27,11 +25,12 @@ namespace QuantLSQ { * @param graphView The GraphView containing the network to quantize. * @param nbBits Number of quantization bits. */ - void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); -} // namespace QuantLSQ -} // namespace Aidge +void devLSQ(std::shared_ptr<Tensor> tensor); + +} +} -#endif /* AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ */ +#endif /* AIDGE_QUANTIZATION_QAT_LSQ_H_ */ diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp index 4bba3b6..0b9fcc2 100644 --- a/python_binding/pybind_QAT_LSQ.cpp +++ b/python_binding/pybind_QAT_LSQ.cpp @@ -24,5 +24,8 @@ void init_QAT_LSQ(py::module &m) { auto mQuantLSQ = m.def_submodule("lsq"); mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits")); + + mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor")); + } } // namespace Aidge diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index a09dbb2..04f2027 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -23,7 +23,42 @@ namespace Aidge { -void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize) +static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) +{ + auto valueTensor = (*tensor).abs().mean(); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu"); + return localTensor.get<float>(0); +} + +// INIT THE STEP SIZE OF A QUANTIZER NODE + +static bool initStepSize(std::shared_ptr<Node> quantizer) +{ + const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator()); + + float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0)); + + float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second)); + + auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + + // XXX Manage backend here ? + stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend()); + stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType()); + + auto stepSizeProducer = quantizer->getParent(1); + + stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); + + std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl; + + return false; +} + +// INPUT QUANTIZERS INSERTION + +static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); @@ -34,177 +69,76 @@ void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbB std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; - // INPUT QUANTIZERS INSERTION + // Create the input quantizer node - // TODO : double check this, and use createUniqueName() - auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); - auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName); + auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); + auto quantizerNode = LSQ(signedRange, quantizerName); - // Set the step size + // Init the step-size using the node call stack - auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator(); - auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - inputStepSizeOp->setOutput(0, inputStepSizeTensor); + quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); // Absorb the ReLU when possible ... - // XXX is this safe ??? - bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); - // bool nodeHasParent = (linearNode->getParents().size() != 0); + bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ? if (nodeHasParent) { auto parentNode = linearNode->getParents()[0]; if (parentNode->type() == "ReLU") { - auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator()); - inputQuantizerOp->range() = unsignedRange; + auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator()); + quantizerOp->range() = unsignedRange; graphView->replace({parentNode}, {}); } } - // We need to handle the case where the linear node is the first one ... + // Insert the quantizer in the graphView ... + // (We need to handle the case where the linear node is the first one) if (nodeHasParent) { - graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0); + graphView->insertParent(linearNode, quantizerNode, 0, 0, 0); } else { - inputQuantizerNode->addChild(graphView); - graphView->add(inputQuantizerNode); + quantizerNode->addChild(graphView); + graphView->add(quantizerNode); } - - // PARAM QUANTIZERS INSERTION - - // TODO : double check this, and use createUniqueName() - auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); - auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); - graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0); - - // Set the step size - - auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator(); - auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - paramStepSizeOp->setOutput(0, paramStepSizeTensor); } - } -static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) -{ - auto backend = tensor->backend(); - - if (backend == "cuda") - tensor->setBackend("cpu"); - - float value = (*tensor).abs().mean().get<float>(0); - - if (backend == "cuda") - tensor->setBackend("cuda"); - - return value; -} +// PARAM QUANTIZERS INSERTION -static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda) +static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { - // Propagate the calibration tensor + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - SequentialScheduler scheduler(graphView); - scheduler.resetScheduling(); - scheduler.forward(true, {calibrationData}); + std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; - // Store the input tensor statistics + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); - if (useCuda) - graphView->setBackend("cpu"); + // TODO : double check this, and use createUniqueName() + auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); + auto quantizerNode = LSQ(signedRange, quantizerName); - std::map<std::string, float> inputStats; - for (auto node : graphView->getNodes()) - { - if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! - { - const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); - float inputAbsMean = getTensorAbsMean(op->getInput(0)); - inputStats.insert(std::make_pair(node->name(), inputAbsMean)); - fmt::println("{} -> {}", node->name(), inputAbsMean); - } - } + // Init the step-size using the node call stack - if (useCuda) - graphView->setBackend("cuda"); + quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); - return inputStats; -} + // Insert the quantizer in the graphView -static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda) -{ - if (useCuda) - graphView->setBackend("cpu"); - - std::map<std::string, float> paramStats; - for (auto node : graphView->getNodes()) - { - if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! - { - const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); - float paramAbsMean = getTensorAbsMean(op->getInput(1)); - paramStats.insert(std::make_pair(node->name(), paramAbsMean)); - fmt::println("{} -> {}", node->name(), paramAbsMean); - } + graphView->insertParent(linearNode, quantizerNode, 1, 0, 0); } - - if (useCuda) - graphView->setBackend("cuda"); - - return paramStats; } -static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats) +void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) { - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); - - for (const auto& match : matches) - { - auto linearNode = match.graph->rootNode(); - - // INPUT QUANTIZERS STEP-SIZES - - auto inputQuantNode = linearNode->getParent(0); - auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator()); - - float absMean = inputStats[linearNode->name()]; - float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second)); - - auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator(); - // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); - auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - inputStepSizeOp->setOutput(0, inputStepSizeTensor); - - // PARAM QUANTIZERS STEP-SIZES - - auto paramQuantNode = linearNode->getParent(1); - auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator()); - - absMean = paramStats[linearNode->name()]; - stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second)); - - auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator(); - // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); - auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - paramStepSizeOp->setOutput(0, paramStepSizeTensor); - } + setupInputQuantizers(graphView, nbBits); + setupParamQuantizers(graphView, nbBits); } -void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData) +void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) { - bool useCuda = (calibrationData->backend() == "cuda"); - - // Collect the tensor statisics - auto inputStats = collectInputStats(graphView, calibrationData, useCuda); - - auto paramStats = collectParamStats(graphView, useCuda); - - // Insert the quantizers - insertQuantizers(graphView, nbBits, 1.0); - - // Adjust the quantizers step-sizes - adjustQuantizersStepSizes(graphView, inputStats, paramStats); + float mean = (tensor->mean()).get<float> (0); + std::cout << " MEAN = " << mean << std::endl; } } \ No newline at end of file -- GitLab From c7a21a914a69cefe610d6018d8e7b15839c4700f Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 8 Jan 2025 16:07:59 +0000 Subject: [PATCH 03/23] Adding the isScaling tag in the PTQ pipeline in order to replace the previous and now deprecated Scaling Metaoperator --- aidge_quantization/_version.py | 4 ++ include/aidge/operator/PTQMetaOps.hpp | 14 ++-- include/aidge/quantization_version.h | 6 +- python_binding/pybind_PTQ.cpp | 9 +++ src/PTQ/Clipping.cpp | 2 +- src/PTQ/PTQ.cpp | 96 +++++++++++++++------------ src/operator/PTQMetaOps.cpp | 39 +++++------ 7 files changed, 99 insertions(+), 71 deletions(-) create mode 100644 aidge_quantization/_version.py diff --git a/aidge_quantization/_version.py b/aidge_quantization/_version.py new file mode 100644 index 0000000..d4ec20e --- /dev/null +++ b/aidge_quantization/_version.py @@ -0,0 +1,4 @@ +# file generated by setuptools_scm +# don't change, don't track in version control +__version__ = version = '0.2.1.dev60+g8044e79.d20250106' +__version_tuple__ = version_tuple = (0, 2, 1, 'dev60', 'g8044e79.d20250106') diff --git a/include/aidge/operator/PTQMetaOps.hpp b/include/aidge/operator/PTQMetaOps.hpp index b9bad0d..8fd7f5d 100644 --- a/include/aidge/operator/PTQMetaOps.hpp +++ b/include/aidge/operator/PTQMetaOps.hpp @@ -29,13 +29,13 @@ namespace Aidge { /// @return A shared pointer to an instance of the meta-operator node. std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name); -/// @brief The purpose of Scaling is to encapsulate the Mul operator and tag it as a PTQ node rather than a regular Mul operator. -/// Therefore, this meta-operator consists solely of a [Mul] operation. -/// -/// @param scalingFactor The scaling factor to apply to the input (a scalar to multiply the input with). -/// @param name The name of the meta-operator node created. -/// @return A shared pointer to an instance of the scaling node. -std::shared_ptr<Aidge::Node> Scaling(double scalingFactor, const std::string& name = ""); +/// @brief Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. +/// This function multiplies the existing scaling factor by a given coefficient. It verifies that the node is of the correct type ("Mul") +/// and has the `isScaling` attribute. If these conditions are not met, a warning is logged. +/// @param node A shared pointer to an `Aidge::Node` object representing the node to modify. +/// @param coeff A double representing the multiplication coefficient to apply to the scaling factor. +void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff); + /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter. /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation. diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 546263a..f14a045 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -3,9 +3,9 @@ namespace Aidge { static constexpr const int PROJECT_VERSION_MAJOR = 0; -static constexpr const int PROJECT_VERSION_MINOR = 2; +static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; -static constexpr const char * PROJECT_VERSION = "0.2.0"; -static constexpr const char * PROJECT_GIT_HASH = "f50c860"; +static constexpr const char * PROJECT_VERSION = "0.3.0"; +static constexpr const char * PROJECT_GIT_HASH = "8c89214"; } #endif // VERSION_H diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index 1de7976..a717a94 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -17,6 +17,7 @@ #include "aidge/quantization/PTQ/Clipping.hpp" #include "aidge/quantization/PTQ/CLE.hpp" #include "aidge/quantization/PTQ/PTQ.hpp" +#include "aidge/quantization/PTQ/PTQMetaOps.hpp" #include "aidge/graph/GraphView.hpp" @@ -48,6 +49,14 @@ void init_PTQ(py::module &m) { :type network: :py:class:`aidge_core.GraphView` )mydelimiter"); + m.def( "multiply_scaling_factor",&multiplyScalingFactor,py::arg("node"), py::arg("coeff") + R"mydelimiter( + Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. This function multiplies the existing scaling factor by a given coefficient. + :param node: A node representing the node to modify. + :param coeff: A floating value representing the multiplication coefficient to apply to the scaling factor. + )mydelimiter" + ); + m.def("normalize_parameters", &normalizeParameters, py::arg("network"), R"mydelimiter( Normalize the parameters of each parametrized node, so that they fit in the [-1:1] range. diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp index 66b0ab3..ef34fdc 100644 --- a/src/PTQ/Clipping.cpp +++ b/src/PTQ/Clipping.cpp @@ -222,7 +222,7 @@ std::map<std::string, double> adjustRanges(Clipping clippingMode, std::map<std:: for (std::shared_ptr<Node> node : graphView->getNodes()) { - if (node->type() == "Scaling") + if (node->attributes()->hasAttr("isScaling")) { std::vector<int> histogram = histograms[node->name()]; diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 190f7e2..aa73f61 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -264,12 +264,19 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) Log::info(" ### inserting multiplicative node ..."); std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); - std::shared_ptr<Node> residualNode = Scaling(1.0, residualNodeName); + std::shared_ptr<Node> residualNode = Mul(residualNodeName); + residualNode->attributes()->addAttr("isScaling", 0.0); + + //Adding the SF as a producer of the node + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {1.0}); + std::shared_ptr<Node> scalingFactorProducer = addProducer(residualNode, 1, {1}, "ScalingFactor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); - residualNode->getOperator()->setDataType(DataType::Float64); //getDataType(parentNode) + residualNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) residualNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, residualNode, i, 0, 0); + graphView->add(scalingFactorProducer); } } } @@ -295,7 +302,16 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) if (isAffine(parentNode) || isMerging(parentNode)) { std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); - std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName); + //std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName); + + //Adding Mul operator with tag "isScaling" + std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName); + scalingNode->attributes()->addAttr("isScaling",0.0); + + //Adding the SF as a producer of the node + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {1.0}); + std::shared_ptr<Node> scalingFactorProducer = addProducer(scalingNode, 1, {1}, "ScalingFactor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) scalingNode->getOperator()->setBackend("cpu"); @@ -320,12 +336,14 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) for (std::size_t i = 0; i < nextNodes.size(); i++) scalingNode->addChild(nextNodes[i], 0, inputIndices[i]); + graphView->add(scalingFactorProducer); graphView->add(scalingNode); } else { // Log::info(" last node reached ! "); parentNode->addChild(scalingNode, 0, 0); + graphView->add(scalingFactorProducer); graphView->add(scalingNode); } } @@ -335,7 +353,7 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> mergingNode) { std::shared_ptr<Node> currNode = mergingNode; - while(currNode->type() != "Scaling") + while(!currNode->attributes()->hasAttr("isScaling")) { if (currNode->getParents().size() == 0) { @@ -378,7 +396,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) for (std::shared_ptr<Node> node : nodeVector) { // Scaling nodes still have a ratio of 1, so they are seamless ... - if (node->type() == "ReLU" || node->type() == "Scaling" || isSeamless(node)) + if (node->type() == "ReLU" || node->attributes()->hasAttr("isScaling") || isSeamless(node)) { if (node != firstNode) { @@ -439,8 +457,9 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); - double currScalingFactor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode, currScalingFactor / rescaling); + //double currScalingFactor = getScalingFactor(scalingNode); + //updateScalingFactor(scalingNode, currScalingFactor / rescaling); + multiplyScalingFactor(scalingNode,1/rescaling); accumulatedRatios[mergingNode->name()] /= rescaling; // optional ... } @@ -465,7 +484,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); for (std::shared_ptr<Node> node : nodeSet) { - if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) + if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) { std::shared_ptr<Operator> nodeOperator = node->getOperator(); std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0)); @@ -487,7 +506,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView // std::shared_ptr<Node> inputNode = getFirstNode(graphView); for (std::shared_ptr<Node> node : nodeSet) - if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) + if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) valueRanges.insert(std::make_pair(node->name(), 0)); if (useCuda) @@ -514,7 +533,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView std::map<std::string, double> sampleRanges; for (std::shared_ptr<Node> node : nodeSet) { - if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) + if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) { std::shared_ptr<Operator> nodeOperator = node->getOperator(); std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0)); @@ -536,7 +555,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView for (std::shared_ptr<Node> node : nodeSet) { - if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) + if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) { std::string nodeName = node->name(); if (sampleRanges[nodeName] > valueRanges[nodeName]) @@ -589,7 +608,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st // Here prevNode is either a 'Affine' or a 'Merging' // => do not split the cases, just handle the bias ... - if (node->type() == "Scaling") + if (node->attributes()->hasAttr("isScaling")) { // retrieve the previous scaling factor ... std::shared_ptr<Node> prevNode = node->getParent(0); @@ -598,8 +617,9 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st // ValueRanges must contains all the scaling nodes !!! double scalingFactor = valueRanges[node->name()]; - double currScalingFactor = getScalingFactor(node); - updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor)); + //double currScalingFactor = getScalingFactor(node); + //updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor)); + multiplyScalingFactor(node,1/(scalingFactor / prevScalingFactor)); scalingFactors[node->name()] = scalingFactor; @@ -642,8 +662,9 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name()); - double currScalingFactor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode, currScalingFactor * rescaling); + //double currScalingFactor = getScalingFactor(scalingNode); + //updateScalingFactor(scalingNode, currScalingFactor * rescaling); + multiplyScalingFactor(scalingNode,rescaling) ; } } } @@ -679,7 +700,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap signMap[node->name()].second = false; } - if (node->type() == "Scaling") + if (node->attributes()->hasAttr("isScaling")) { signMap[node->name()].second = false; @@ -726,7 +747,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap // Arbitration : Signed type wins ! for(std::shared_ptr<Node> parent : parentNodes) { - while (parent->type() != "Scaling") + while (!parent->attributes()->hasAttr("isScaling")) { signMap[parent->name()] = std::make_pair(false, false); // We are on a branch so nodes always have 1 parent ... @@ -842,8 +863,9 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - double currScalingFactor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode, currScalingFactor * rescaling); + // double currScalingFactor = getScalingFactor(scalingNode); + // updateScalingFactor(scalingNode, currScalingFactor * rescaling); + multiplyScalingFactor(scalingNode,rescaling) ; } if (isMerging(node)) @@ -858,23 +880,27 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming - updateScalingFactor(scalingNode, currScalingFactor * rescaling); + // double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming + // updateScalingFactor(scalingNode, currScalingFactor * rescaling); + multiplyScalingFactor(scalingNode,rescaling) ; } // Handle the Scaling Nodes ... - if (node->type() == "Scaling") + if (node->attributes()->hasAttr("isScaling")) { if (!noQuant) { // Replace the Scaling Node by Quantizer + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double old_sf = localTensor.get<double>(0);//!\\ - std::shared_ptr<Node> quantizerNode = Quantizer(getScalingFactor(node), -(signedMax + 1), signedMax, node->name()); + std::shared_ptr<Node> quantizerNode = Quantizer(old_sf, -(signedMax + 1), signedMax, node->name()); quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) quantizerNode->getOperator()->setBackend("cpu"); - - graphView->replace({node}, {quantizerNode}); + graphView->replace({node,node->getParent(1)}, {quantizerNode}); if (optimizeSigns) { @@ -888,6 +914,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ double currScalingFactor = getScalingFactor(quantizerNode); updateScalingFactor(quantizerNode, currScalingFactor * rescaling); + if(outputIsUnsigned) { @@ -965,7 +992,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool double approx = std::pow(2, std::ceil(std::log2(base))); - updateScalingFactor(scalingNode, approx); + updateScalingFactor(scalingNode,approx); double ratio = base / approx; @@ -988,7 +1015,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool static void printScalingFactors(std::shared_ptr<GraphView> graphView) { for (auto node : retrieveNodeVector(graphView)) - if (node->type() == "Scaling" || node->type() == "Quantizer") + if (node->attributes()->hasAttr("isScaling") || node->type() == "Quantizer") { double scalingFactor = getScalingFactor(node); Log::info(" {:.6f} ({})", scalingFactor, node->name()); @@ -1010,18 +1037,6 @@ static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std: tensor->setDataType(dataType); } -static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges) -{ - SequentialScheduler scheduler(graphView); - scheduler.resetScheduling(); - scheduler.generateScheduling(); - - auto scheduling = scheduler.getStaticScheduling(); - for (auto node : scheduling) - if (node->type() == "Scaling") - Log::info(" {} range = {} ", node->name(), valueRanges[node->name()]); -} - void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) { Log::info(" === QUANT PTQ 0.2.21 === "); @@ -1041,7 +1056,6 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, insertScalingNodes(graphView); crossLayerEqualization(graphView); - Log::info(" Normalizing the parameters ..."); normalizeParameters(graphView); diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index 56245da..7be194c 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -60,23 +60,6 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli return metaopNode; } -std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name) -{ - std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); - - std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_Scaling" : ""); - - std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); - scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); - - std::shared_ptr<GraphView> graphView = Sequential({mulNode}); - std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); - - NodePtr metaopNode = MetaOperator("Scaling", connectedGraphView, {}, name); - - return metaopNode; -} - static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType) { std::shared_ptr<Node> mulNode = nullptr; @@ -87,9 +70,27 @@ static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, st return mulNode; } +void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) +{ + if(node->type() == "Mul" && node->attributes()->hasAttr("isScaling")) + { + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double previousScalingFactor = localTensor.get<double>(0); + std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); + node->input(1).first->getOperator()->setOutput(0, finalTensor); + } + else + { + Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); + } +} + + void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) { - if(metaOpNode->type() != "Scaling" && metaOpNode->type() != "Quantizer") + if(metaOpNode->type() != "Quantizer") Log::warn(" Cannot update the scaling factor on Node of type {}", metaOpNode->type()); std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); @@ -106,7 +107,7 @@ void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) double getScalingFactor(std::shared_ptr<Node> MetaOpNode) { - if (MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") { + if (MetaOpNode->type() != "Quantizer") { Log::warn(" Cannot get the scaling factor on Node of type {}", MetaOpNode->type()); return 0; } -- GitLab From 6df0775951f98b163ba451fe886bedf80ae8452c Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Mon, 13 Jan 2025 15:43:30 +0000 Subject: [PATCH 04/23] Refactoring Scaling Metaop deletions by removing old getScalingFactor and updateScalingFactor; Adding clear tag isCompensation for Mul used as compensations nodes --- include/aidge/operator/PTQMetaOps.hpp | 8 ------ include/aidge/quantization/PTQ/PTQ.hpp | 8 ++++++ include/aidge/quantization_version.h | 2 +- python_binding/pybind_PTQ.cpp | 2 +- src/PTQ/PTQ.cpp | 35 +++++++++++++++----------- src/operator/PTQMetaOps.cpp | 16 ------------ 6 files changed, 31 insertions(+), 40 deletions(-) diff --git a/include/aidge/operator/PTQMetaOps.hpp b/include/aidge/operator/PTQMetaOps.hpp index 8fd7f5d..9ca76fb 100644 --- a/include/aidge/operator/PTQMetaOps.hpp +++ b/include/aidge/operator/PTQMetaOps.hpp @@ -29,14 +29,6 @@ namespace Aidge { /// @return A shared pointer to an instance of the meta-operator node. std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name); -/// @brief Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. -/// This function multiplies the existing scaling factor by a given coefficient. It verifies that the node is of the correct type ("Mul") -/// and has the `isScaling` attribute. If these conditions are not met, a warning is logged. -/// @param node A shared pointer to an `Aidge::Node` object representing the node to modify. -/// @param coeff A double representing the multiplication coefficient to apply to the scaling factor. -void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff); - - /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter. /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation. /// The meta-operator node must be a PTQ-specific operator, such as a Quantizer or Scaling node. diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index bfe671e..96fb2ed 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -77,6 +77,14 @@ namespace Aidge { */ bool checkArchitecture(std::shared_ptr<GraphView> graphView); + /** + * @brief This function multiplies the existing scaling factor by a given coefficient. It verifies that the node is of the correct type ("Mul") + * and has the `isScaling` attribute. If these conditions are not met, a warning is logged. + * @param node A shared pointer to an `Aidge::Node` object representing the node to modify. + * @param coeff A double representing the multiplication coefficient to apply to the scaling factor. + */ + void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node, double coeff); + void prepareNetwork(std::shared_ptr<GraphView> graphView); diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index f14a045..740621a 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "8c89214"; +static constexpr const char * PROJECT_GIT_HASH = "b4af1ce"; } #endif // VERSION_H diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index a717a94..0ff5859 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -49,7 +49,7 @@ void init_PTQ(py::module &m) { :type network: :py:class:`aidge_core.GraphView` )mydelimiter"); - m.def( "multiply_scaling_factor",&multiplyScalingFactor,py::arg("node"), py::arg("coeff") + m.def( "multiply_scaling_factor",&multiplyScalingFactor,py::arg("node"), py::arg("coeff"), R"mydelimiter( Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. This function multiplies the existing scaling factor by a given coefficient. :param node: A node representing the node to modify. diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index aa73f61..c0ea86c 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -72,6 +72,23 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } +void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) +{ + if(node->type() == "Mul" && node->attributes()->hasAttr("isScaling")) + { + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double previousScalingFactor = localTensor.get<double>(0); + std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); + node->input(1).first->getOperator()->setOutput(0, finalTensor); + } + else + { + Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); + } +} + static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) { auto mulOp = Mul_Op(); @@ -457,8 +474,6 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); - //double currScalingFactor = getScalingFactor(scalingNode); - //updateScalingFactor(scalingNode, currScalingFactor / rescaling); multiplyScalingFactor(scalingNode,1/rescaling); accumulatedRatios[mergingNode->name()] /= rescaling; // optional ... @@ -617,8 +632,6 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st // ValueRanges must contains all the scaling nodes !!! double scalingFactor = valueRanges[node->name()]; - //double currScalingFactor = getScalingFactor(node); - //updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor)); multiplyScalingFactor(node,1/(scalingFactor / prevScalingFactor)); scalingFactors[node->name()] = scalingFactor; @@ -661,9 +674,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name()); - - //double currScalingFactor = getScalingFactor(scalingNode); - //updateScalingFactor(scalingNode, currScalingFactor * rescaling); + multiplyScalingFactor(scalingNode,rescaling) ; } } @@ -863,8 +874,6 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - // double currScalingFactor = getScalingFactor(scalingNode); - // updateScalingFactor(scalingNode, currScalingFactor * rescaling); multiplyScalingFactor(scalingNode,rescaling) ; } @@ -880,8 +889,6 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - // double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming - // updateScalingFactor(scalingNode, currScalingFactor * rescaling); multiplyScalingFactor(scalingNode,rescaling) ; } @@ -951,6 +958,8 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); std::shared_ptr<Node> mulNode = Mul(mulNodeName); + + mulNode->attributes()->addAttr("isCompensation",0.0); mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) mulNode->getOperator()->setBackend("cpu"); @@ -982,9 +991,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool for (std::shared_ptr<Node> node : nodeVector) { - // TODO : use Compensation nodes instead of Mul nodes - - if (isAffine(node) || (node->type() == "Mul")) + if (isAffine(node) || (node->type() == "Mul" && node->attributes()->hasAttr("isCompensation"))) { std::shared_ptr<Node> scalingNode = (*node->getChildren().begin()); diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index 7be194c..937fa48 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -70,22 +70,6 @@ static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, st return mulNode; } -void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) -{ - if(node->type() == "Mul" && node->attributes()->hasAttr("isScaling")) - { - auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); - std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); - double previousScalingFactor = localTensor.get<double>(0); - std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); - node->input(1).first->getOperator()->setOutput(0, finalTensor); - } - else - { - Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); - } -} void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) -- GitLab From d677b02f303f8c80c556a1aad47140a7a5c48eb5 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Mon, 13 Jan 2025 15:56:11 +0000 Subject: [PATCH 05/23] Changing include in python bindings --- python_binding/pybind_PTQ.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index 0ff5859..0a37a60 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -17,8 +17,6 @@ #include "aidge/quantization/PTQ/Clipping.hpp" #include "aidge/quantization/PTQ/CLE.hpp" #include "aidge/quantization/PTQ/PTQ.hpp" -#include "aidge/quantization/PTQ/PTQMetaOps.hpp" - #include "aidge/graph/GraphView.hpp" namespace py = pybind11; -- GitLab From fe36b4a13adc28b6b0a2b3ed76a74464f30995c7 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 15 Jan 2025 11:05:21 +0000 Subject: [PATCH 06/23] rebasing with dev --- include/aidge/quantization_version.h | 2 +- src/PTQ/PTQ.cpp | 48 ++++++++++++---------------- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 740621a..d773aa8 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "b4af1ce"; +static constexpr const char * PROJECT_GIT_HASH = "94747bf"; } #endif // VERSION_H diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index c0ea86c..f95a94a 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -283,6 +283,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); std::shared_ptr<Node> residualNode = Mul(residualNodeName); residualNode->attributes()->addAttr("isScaling", 0.0); + residualNode->attributes()->addAttr("isResidual", 0.0); //Adding the SF as a producer of the node std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {1.0}); @@ -944,43 +945,36 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u { // A merging node is always followed by a Quantizer node at this point - if (node->type() == "Quantizer") + if (node->type() == "Quantizer" && node->attributes()->hasAttr("isResidual")) { // check if the Quantizer is a residual one, and insert a compensation node if so ... + // create and insert the multplicative node before the Quantizer - bool prevNodeIsForking = ((node->getParent(0))->getChildren().size() > 1); - bool prevNodeIsAffine = isAffine(node->getParent(0)); - bool insertNode = prevNodeIsForking || !prevNodeIsAffine; - - if (insertNode) - { - // create and insert the multplicative node before the Quantizer - - std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); - std::shared_ptr<Node> mulNode = Mul(mulNodeName); - - mulNode->attributes()->addAttr("isCompensation",0.0); - mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) - mulNode->getOperator()->setBackend("cpu"); + std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); + std::shared_ptr<Node> mulNode = Mul(mulNodeName); + + mulNode->attributes()->addAttr("isCompensation",0.0); + mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + mulNode->getOperator()->setBackend("cpu"); - graphView->insertParent(node, mulNode, 0, 0, 0); + graphView->insertParent(node, mulNode, 0, 0, 0); - // Add the coeff producer to the multiplier node + // Add the coeff producer to the multiplier node - std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); - std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax}); - coeffProducer->getOperator()->setOutput(0, coeffTensor); + std::shared_ptr<Node> coeffProducer = addProducer(mulNode, 1, {1}, ""); + std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(Array1D<double, 1> {signedMax}); + coeffProducer->getOperator()->setOutput(0, coeffTensor); - coeffProducer->getOperator()->setDataType(DataType::Float64); - coeffProducer->getOperator()->setBackend("cpu"); + coeffProducer->getOperator()->setDataType(DataType::Float64); + coeffProducer->getOperator()->setBackend("cpu"); - graphView->add(coeffProducer); // needed ? + graphView->add(coeffProducer); // needed ? - // Adapt the scaling factor value accordingly + // Adapt the scaling factor value accordingly - double currScalingFactor = getScalingFactor(node); - updateScalingFactor(node, currScalingFactor / signedMax); - } + double currScalingFactor = getScalingFactor(node); + updateScalingFactor(node, currScalingFactor / signedMax); + } } } -- GitLab From d34354f6721d8bd127cb2d9926f3fdbb1cb412bb Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Mon, 20 Jan 2025 15:53:11 +0000 Subject: [PATCH 07/23] Fixing isResidual bug in SSA when using tag; replacemnt of std::cout with Log::debug --- include/aidge/quantization_version.h | 2 +- src/PTQ/PTQ.cpp | 14 ++------------ src/QAT/QAT_LSQ.cpp | 5 ++--- 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index d773aa8..429e4bd 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "94747bf"; +static constexpr const char * PROJECT_GIT_HASH = "e464870"; } #endif // VERSION_H diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index f95a94a..e8abf75 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -14,7 +14,6 @@ #include "aidge/quantization/PTQ/PTQ.hpp" #include "aidge/operator/PTQMetaOps.hpp" - #include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/graph/Node.hpp" @@ -945,8 +944,9 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u { // A merging node is always followed by a Quantizer node at this point - if (node->type() == "Quantizer" && node->attributes()->hasAttr("isResidual")) + if (node->type() == "Quantizer" && (node->attributes()->hasAttr("isResidual") || !isAffine(node->getParent(0)))) { + // check if the Quantizer is a residual one, and insert a compensation node if so ... // create and insert the multplicative node before the Quantizer @@ -1064,17 +1064,11 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda); //Log::info(" === RANGES (BEFORE ADJUST) ==="); - //printRanges(graphView, valueRanges); Log::info(" Optimizing the clipping values ..."); valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose); //Log::info(" === RANGES (AFTER ADJUST) ==="); - //printRanges(graphView, valueRanges); - - Log::info(" Normalizing the activations ..."); - normalizeActivations(graphView, valueRanges); - Log::info(" Quantizing the normalized network ..."); quantizeNormalizedNetwork(graphView, nbBits, noQuant, optimizeSigns, verbose); @@ -1091,10 +1085,6 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, printScalingFactors(graphView); //Log::info(" === SCALINGS (BEFORE CAST) ==="); - //printScalingFactors(graphView); - - setupDataType(graphView, inputDataSet, initialDataType); - if (useCuda) graphView->setBackend("cuda"); diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 04f2027..8a42770 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -13,7 +13,6 @@ #include "aidge/operator/LSQ.hpp" #include "aidge/operator/ReLU.hpp" - #include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/scheduler/SequentialScheduler.hpp" @@ -51,7 +50,7 @@ static bool initStepSize(std::shared_ptr<Node> quantizer) stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); - std::cout << " [ INIT STEP SIZE = " << stepSize << " ] " << std::endl; + Log::debug("[ INIT STEP SIZE = {} ]",stepSize); return false; } @@ -138,7 +137,7 @@ void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBi void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) { float mean = (tensor->mean()).get<float> (0); - std::cout << " MEAN = " << mean << std::endl; + Log::debug("MEAN = {}",mean); } } \ No newline at end of file -- GitLab From eb4144b1e3142082432b1fb6533ba163eafaceb3 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Fri, 17 Jan 2025 15:29:47 +0000 Subject: [PATCH 08/23] Starting Work on adding Scaling Nodes (Tagged Mul) below Producers --- aidge_quantization/_version.py | 2 +- include/aidge/quantization/PTQ/PTQ.hpp | 1 + src/PTQ/CLE.cpp | 43 +++++- src/PTQ/PTQ.cpp | 183 +++++++++++++++++++++---- 4 files changed, 198 insertions(+), 31 deletions(-) diff --git a/aidge_quantization/_version.py b/aidge_quantization/_version.py index d4ec20e..2d34d35 100644 --- a/aidge_quantization/_version.py +++ b/aidge_quantization/_version.py @@ -1,4 +1,4 @@ # file generated by setuptools_scm # don't change, don't track in version control __version__ = version = '0.2.1.dev60+g8044e79.d20250106' -__version_tuple__ = version_tuple = (0, 2, 1, 'dev60', 'g8044e79.d20250106') +__version_tuple__ = version_tuple = (0, 2, 1, 'dev60', 'g8044e79.d20250106') \ No newline at end of file diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index 96fb2ed..d8d198b 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -69,6 +69,7 @@ namespace Aidge { * @return The scheduled vector of nodes */ std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> graphView, bool newSchedule = true, bool verbose = false); + bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView); /** * @brief Determine whether an input GraphView can be quantized or not. diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index dc60b38..010ecc5 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -136,17 +136,48 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD { std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; + std::cout << "CLE\n"; + std::cout << "node name is: " << n1->name() << std::endl; + std::cout << "node name is: " << n2->name() << std::endl; + std::cout << "node parent name is: " << n1->name() << std::endl; + std::cout << "node parent name is: " << n2->name() << std::endl; + + std::shared_ptr<Aidge::Tensor> n1localTensor, n2localTensor; + if(n1->getParent(1)->attributes()->hasAttr("isProducerScaling")) + { + std::static_pointer_cast<OperatorTensor>(n1->getParent(1)->getOperator())->getOutput(0)->print(); + n1localTensor = std::static_pointer_cast<OperatorTensor>(n1->getParent(1)->getOperator())->getOutput(0); + } + else + { + n1localTensor = getWeightTensor(n1); + } + + if(n2->getParent(1)->attributes()->hasAttr("isProducerScaling")) + { + n2localTensor = std::static_pointer_cast<OperatorTensor>(n2->getParent(1)->getOperator())->getOutput(0); + + } + else + { + n2localTensor = getWeightTensor(n2); + } + + double r1 = getTensorAbsoluteMax(n1localTensor); + double r2 = getTensorAbsoluteMax(n2localTensor); + std::cout << "valeur: " << r1 <<std::endl; + std::cout << "valeur: " << r2 <<std::endl; - double r1 = getTensorAbsoluteMax(getWeightTensor(n1)); - double r2 = getTensorAbsoluteMax(getWeightTensor(n2)); double s1 = std::sqrt(r1 * r2) / r1; double s2 = std::sqrt(r1 * r2) / r2; - rescaleTensor(getWeightTensor(n1), s1); - rescaleTensor(getWeightTensor(n2), s2); - - rescaleTensor(getBiasTensor(n1), s1); + //rescaleTensor(getWeightTensor(n1), s1); + insertScalingBelowProducer(n1->getParent(1),s1,graphView); + //rescaleTensor(getWeightTensor(n2), s2); + insertScalingBelowProducer(n2->getParent(1),s2,graphView); + //rescaleTensor(getBiasTensor(n1), s1); + insertScalingBelowProducer(n1->getParent(2),s1,graphView); double rangeDelta = std::abs(r1 - r2); if (rangeDelta > maxRangeDelta) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index e8abf75..c8d7c47 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -54,6 +54,120 @@ bool isMerging(std::shared_ptr<Node> node) { return (mergingNodeTypes.find(node->type()) != mergingNodeTypes.end()); } +static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> parentNode) +{ + int index = 0; + while (node->getParent(index) != parentNode) + index++; + return index; +} + +void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) +{ + if(node->type() == "Mul" && node->attributes()->hasAttr("isProducerScaling")) + { + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double previousScalingFactor = localTensor.get<double>(0); + std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); + node->input(1).first->getOperator()->setOutput(0, finalTensor); + } + else + { + Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); + } +} +bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView) +{ + std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); + roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + roundNode->getOperator()->setBackend("cpu"); + + if (node->getChildren().size() > 0) + { + // SCALING NODE INSERTION + + // We always have one output from Affine and Add nodes, but possibly multiple childs + std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); + + // For each node in nextNodes store the connexion index + std::vector<int> inputIndices(nextNodes.size()); + for (std::size_t i = 0; i < nextNodes.size(); i++) + inputIndices[i] = getInputIndex(nextNodes[i], node); + + for (std::shared_ptr<Node> nextNode : nextNodes) + node->removeChild(nextNode, 0); + + node->addChild(roundNode, 0, 0); + + for (std::size_t i = 0; i < nextNodes.size(); i++) + roundNode->addChild(nextNodes[i], 0, inputIndices[i]); + graphView->add(roundNode); + } + else + { + Log::warn("Unusual producer "); + node->addChild(roundNode, 0, 0); + graphView->add(roundNode); + } + return true; +} +bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView) +{ + if(node->attributes()->hasAttr("isProducerScaling")) + { + multiplyScalingFactor(node,sf); + return true; + } + if(node->type() != "Producer") + { + Log::warn(" Cannot apply a scaling factor on a node which is not a producer", node->type()); + return false; + } + std::string scalingNodeName = makeUniqueName(node->name() + "_ProducerScaling", graphView); + + std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName); + scalingNode->attributes()->addAttr("isProducerScaling",0.0); + + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {sf}); + std::shared_ptr<Node> scalingFactorProducer = addProducer(scalingNode, 1, {1}, "Factor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); + graphView->add(scalingFactorProducer); + + scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + scalingNode->getOperator()->setBackend("cpu"); + + if (node->getChildren().size() > 0) + { + // SCALING NODE INSERTION + + // We always have one output from Affine and Add nodes, but possibly multiple childs + std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); + + // For each node in nextNodes store the connexion index + std::vector<int> inputIndices(nextNodes.size()); + for (std::size_t i = 0; i < nextNodes.size(); i++) + inputIndices[i] = getInputIndex(nextNodes[i], node); + + for (std::shared_ptr<Node> nextNode : nextNodes) + node->removeChild(nextNode, 0); + + node->addChild(scalingNode, 0, 0); + + for (std::size_t i = 0; i < nextNodes.size(); i++) + scalingNode->addChild(nextNodes[i], 0, inputIndices[i]); + + graphView->add(scalingNode); + } + else + { + Log::warn("Unusual producer "); + node->addChild(scalingNode, 0, 0); + graphView->add(scalingNode); + } + return true; +} bool checkArchitecture(std::shared_ptr<GraphView> graphView) { @@ -167,6 +281,15 @@ static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::s return remainingNodes; } +static std::vector<std::shared_ptr<Node>> removeProdScalingNodes(std::vector<std::shared_ptr<Node>> nodeVector) +{ + std::vector<std::shared_ptr<Node>> remainingNodes; + for (std::shared_ptr<Node> node : nodeVector) + if (!node->attributes()->hasAttr("isProducerScaling")) + remainingNodes.push_back(node); + + return remainingNodes; +} static void fixScheduling(std::vector<std::shared_ptr<Node>>& nodeVector) { @@ -211,6 +334,7 @@ std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> fixScheduling(nodeVector); nodeVector = removeMatchingNodes(nodeVector, "Producer"); + nodeVector = removeProdScalingNodes(nodeVector); if (verbose) { @@ -300,13 +424,6 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) } } -static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> parentNode) -{ - int index = 0; - while (node->getParent(index) != parentNode) - index++; - return index; -} void insertScalingNodes(std::shared_ptr<GraphView> graphView) { @@ -429,7 +546,8 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); double scaling = getTensorAbsoluteMax(weightTensor); double ratio = 1.0 / scaling; - rescaleTensor(weightTensor, ratio); + //rescaleTensor(weightTensor, ratio); + insertScalingBelowProducer(node->getParent(1),ratio,graphView); // Accumulate the ratio if (node == firstNode) @@ -447,7 +565,8 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) if (nodeHasBias(node)) { std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - rescaleTensor(biasTensor, accumulatedRatios[node->name()] ); + //rescaleTensor(biasTensor, accumulatedRatios[node->name()] ); + insertScalingBelowProducer(node->getParent(2),accumulatedRatios[node->name()],graphView); } } @@ -606,7 +725,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st for (std::shared_ptr<Node> node : nodeVector) { // Seamless scaling factor propagation ... - + if (isAffine(node) || isSeamless(node) || node->type() == "ReLU") { if (node == firstNode) @@ -620,11 +739,13 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st } } + // Here prevNode is either a 'Affine' or a 'Merging' // => do not split the cases, just handle the bias ... if (node->attributes()->hasAttr("isScaling")) { + // retrieve the previous scaling factor ... std::shared_ptr<Node> prevNode = node->getParent(0); double prevScalingFactor = scalingFactors[prevNode->name()]; @@ -640,11 +761,13 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st if (isAffine(prevNode)) { + bool prevNodeHasBias = nodeHasBias(prevNode); if (prevNodeHasBias) - { + { std::shared_ptr<Tensor> biasTensor = getBiasTensor(prevNode); - rescaleTensor(biasTensor, 1.0 / prevScalingFactor); + //rescaleTensor(biasTensor, 1.0 / prevScalingFactor); + insertScalingBelowProducer(prevNode->getParent(2),1.0 / prevScalingFactor,graphView); } } } @@ -842,10 +965,12 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ // Rescale the weight tensor std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - rescaleTensor(weightTensor, signedMax); + //rescaleTensor(weightTensor, signedMax); + insertScalingBelowProducer(node->getParent(1),signedMax,graphView); if (!noQuant) - roundTensor(weightTensor); + insertRoundBelowProducer(node->getParent(1),graphView); + //roundTensor(weightTensor); // Rescale the bias tensor @@ -856,10 +981,12 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - rescaleTensor(biasTensor, rescaling); + //rescaleTensor(biasTensor, rescaling); + insertScalingBelowProducer(node->getParent(2),rescaling,graphView); if (!noQuant) - roundTensor(biasTensor); + insertRoundBelowProducer(node->getParent(2),graphView); + //roundTensor(biasTensor); } // Compensate the rescaling using the next Scaling node @@ -997,17 +1124,20 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool double ratio = base / approx; - std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - rescaleTensor(weightTensor, ratio); + //std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); + //rescaleTensor(weightTensor, ratio); + insertScalingBelowProducer(node->getParent(1),ratio,graphView); if (!noQuant) - roundTensor(weightTensor); + insertRoundBelowProducer(node->getParent(1),graphView); if (nodeHasBias(node)) { - std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - rescaleTensor(biasTensor, ratio); + //std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); + //rescaleTensor(biasTensor, ratio); + insertScalingBelowProducer(node->getParent(2),ratio,graphView); + if (!noQuant) - roundTensor(biasTensor); + insertRoundBelowProducer(node->getParent(2),graphView); } } } @@ -1068,7 +1198,11 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, Log::info(" Optimizing the clipping values ..."); valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose); - //Log::info(" === RANGES (AFTER ADJUST) ==="); + //Log:debug("=== RANGES (AFTER ADJUST) ==="); + //printRanges(graphView, valueRanges); + Log::info(" Normalizing the activations ..."); + normalizeActivations(graphView, valueRanges); + Log::info(" Quantizing the normalized network ..."); quantizeNormalizedNetwork(graphView, nbBits, noQuant, optimizeSigns, verbose); @@ -1120,7 +1254,8 @@ void clearBiases(std::shared_ptr<GraphView> graphView) for (std::shared_ptr<Node> node : graphView->getNodes()) { if (node->type() == "FC" || node->type() == "Conv2D") { std::shared_ptr<Tensor> biasTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2); - rescaleTensor(biasTensor, 0); + //rescaleTensor(biasTensor, 0); + insertScalingBelowProducer(node->getParent(2),0,graphView); } } } -- GitLab From 8a11f8a5ff0c82ab688cffcafd24dc828a44bb0e Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Mon, 20 Jan 2025 14:22:55 +0000 Subject: [PATCH 09/23] Correction the Single Shift Approximation error with the new method for updating weight and bias --- include/aidge/quantization_version.h | 2 +- src/PTQ/PTQ.cpp | 13 ++++++++++++- src/operator/PTQMetaOps.cpp | 16 ++++++++-------- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 429e4bd..37853e3 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "e464870"; +static constexpr const char * PROJECT_GIT_HASH = "03286c7"; } #endif // VERSION_H diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index c8d7c47..9e4fdfd 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -111,18 +111,27 @@ bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphVi node->addChild(roundNode, 0, 0); graphView->add(roundNode); } + roundNode->attributes()->addAttr("isProducerRounding",0.0); return true; } bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView) { + if(node->attributes()->hasAttr("isProducerRounding")) + { + //In this case we 'bump' the node to the one above him (an actual ProducerScaling) + // because the round node is not usable (only used when SSA is enabled) + node = node->getParent(0); + } if(node->attributes()->hasAttr("isProducerScaling")) { + // We accumulate the multiples scaling factors by multiplying the SF of the ProducerScaling node + // (adding new nodes each time would make the graph unusable) multiplyScalingFactor(node,sf); return true; } if(node->type() != "Producer") { - Log::warn(" Cannot apply a scaling factor on a node which is not a producer", node->type()); + Log::warn(" Cannot apply a scaling factor on a node which is not a producer on a node of type {} whose name is {}", node->type(),node->name()); return false; } std::string scalingNodeName = makeUniqueName(node->name() + "_ProducerScaling", graphView); @@ -1126,6 +1135,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool //std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); //rescaleTensor(weightTensor, ratio); + Log::warn("A\n"); insertScalingBelowProducer(node->getParent(1),ratio,graphView); if (!noQuant) insertRoundBelowProducer(node->getParent(1),graphView); @@ -1134,6 +1144,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool { //std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); //rescaleTensor(biasTensor, ratio); + Log::warn("B\n"); insertScalingBelowProducer(node->getParent(2),ratio,graphView); if (!noQuant) diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index 937fa48..f86d454 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -74,8 +74,8 @@ static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, st void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) { - if(metaOpNode->type() != "Quantizer") - Log::warn(" Cannot update the scaling factor on Node of type {}", metaOpNode->type()); + if(metaOpNode->type() != "Scaling" && metaOpNode->type() != "Quantizer") + Log::warn("Cannot update the scaling factor on Node of type {}", metaOpNode->type()); std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); @@ -84,15 +84,15 @@ void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); if (!mulNode) - Log::warn(" Invalid PTQ MetaOperator, no Mul node found inside ! "); + Log::warn("Invalid PTQ MetaOperator, no Mul node found inside ! "); mulNode->input(1).first->getOperator()->setOutput(0, scalingFactorTensor); } double getScalingFactor(std::shared_ptr<Node> MetaOpNode) { - if (MetaOpNode->type() != "Quantizer") { - Log::warn(" Cannot get the scaling factor on Node of type {}", MetaOpNode->type()); + if (MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") { + Log::warn("Cannot get the scaling factor on Node of type {}", MetaOpNode->type()); return 0; } @@ -101,7 +101,7 @@ double getScalingFactor(std::shared_ptr<Node> MetaOpNode) std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); if (!mulNode) { - Log::warn(" Invalid PTQ MetaOperator, no Mul found inside node of type {}", MetaOpNode->type()); + Log::warn("Invalid PTQ MetaOperator, no Mul found inside node of type {}", MetaOpNode->type()); return 0; } @@ -116,7 +116,7 @@ double getScalingFactor(std::shared_ptr<Node> MetaOpNode) void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max) { if (quantizerNode->type() != "Quantizer") { - Log::warn(" Cannot set the clipping range on Node of type {}", quantizerNode->type()); + Log::warn("Cannot set the clipping range on Node of type {}", quantizerNode->type()); return; } @@ -125,7 +125,7 @@ void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max) std::shared_ptr<Node> clipNode = getSubNode(metaOp->getMicroGraph(), "Clip"); if (!clipNode) { - Log::warn(" Invalid PTQ MetaOperator, no Clip found inside node of type {}", quantizerNode->type()); + Log::warn("Invalid PTQ MetaOperator, no Clip found inside node of type {}", quantizerNode->type()); return; } -- GitLab From 676189835655b351cd6f52f55bdc5e1827e70093 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Tue, 21 Jan 2025 14:15:26 +0000 Subject: [PATCH 10/23] Fixing bug related to the lower result in resnet(switching the network to float64 solved it --- include/aidge/quantization_version.h | 2 +- src/PTQ/CLE.cpp | 5 ++++- src/PTQ/PTQ.cpp | 22 +--------------------- 3 files changed, 6 insertions(+), 23 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 37853e3..2e53dfc 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "03286c7"; +static constexpr const char * PROJECT_GIT_HASH = "01880af"; } #endif // VERSION_H diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 010ecc5..c8f93e0 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -20,7 +20,10 @@ #include "aidge/quantization/PTQ/PTQ.hpp" // retrieveNodeVector #include "aidge/graph/GraphView.hpp" -#include "aidge/graph/Node.hpp" + +#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/scheduler/Scheduler.hpp" +#include "aidge/utils/Log.hpp" #include "aidge/operator/OperatorTensor.hpp" #include "aidge/utils/Log.hpp" diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 9e4fdfd..9ab77b2 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -363,7 +363,6 @@ static std::shared_ptr<Node> getFirstNode(std::shared_ptr<GraphView> graphView) void prepareNetwork(std::shared_ptr<GraphView> graphView) { removeFlatten(graphView); - sanitizeNodeNames(graphView); bool containsBatchNorm = false; @@ -972,30 +971,23 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ if (isAffine(node)) { // Rescale the weight tensor - std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - //rescaleTensor(weightTensor, signedMax); insertScalingBelowProducer(node->getParent(1),signedMax,graphView); if (!noQuant) insertRoundBelowProducer(node->getParent(1),graphView); - //roundTensor(weightTensor); // Rescale the bias tensor - if (nodeHasBias(node)) { bool inputIsUnsigned = signMap[node->name()].first; double rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax; - - + std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - //rescaleTensor(biasTensor, rescaling); insertScalingBelowProducer(node->getParent(2),rescaling,graphView); if (!noQuant) insertRoundBelowProducer(node->getParent(2),graphView); - //roundTensor(biasTensor); } // Compensate the rescaling using the next Scaling node @@ -1133,18 +1125,12 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool double ratio = base / approx; - //std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - //rescaleTensor(weightTensor, ratio); - Log::warn("A\n"); insertScalingBelowProducer(node->getParent(1),ratio,graphView); if (!noQuant) insertRoundBelowProducer(node->getParent(1),graphView); if (nodeHasBias(node)) { - //std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - //rescaleTensor(biasTensor, ratio); - Log::warn("B\n"); insertScalingBelowProducer(node->getParent(2),ratio,graphView); if (!noQuant) @@ -1271,10 +1257,4 @@ void clearBiases(std::shared_ptr<GraphView> graphView) } } -void devPTQ(std::shared_ptr<GraphView> graphView) -{ - for (std::shared_ptr<Node> node : graphView->getNodes()) - Log::info(" UUU : {}", node->name()); -} - } -- GitLab From 3c170ec0487a5bc4677dec84521b303afa15211b Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 22 Jan 2025 10:27:01 +0000 Subject: [PATCH 11/23] Rebasing on dev --- include/aidge/quantization_version.h | 2 +- src/PTQ/CLE.cpp | 9 +-------- src/PTQ/PTQ.cpp | 22 +--------------------- 3 files changed, 3 insertions(+), 30 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 2e53dfc..5a7e98b 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "01880af"; +static constexpr const char * PROJECT_GIT_HASH = "a749505"; } #endif // VERSION_H diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index c8f93e0..01dcd33 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -58,7 +58,7 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) mulOp.setDataType(tensor->dataType()); mulOp.setBackend(tensor->backend()); - std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); + std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scaling}); scalingTensor->setDataType(tensor->dataType()); scalingTensor->setBackend(tensor->backend()); @@ -139,11 +139,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD { std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; - std::cout << "CLE\n"; - std::cout << "node name is: " << n1->name() << std::endl; - std::cout << "node name is: " << n2->name() << std::endl; - std::cout << "node parent name is: " << n1->name() << std::endl; - std::cout << "node parent name is: " << n2->name() << std::endl; std::shared_ptr<Aidge::Tensor> n1localTensor, n2localTensor; if(n1->getParent(1)->attributes()->hasAttr("isProducerScaling")) @@ -168,8 +163,6 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD double r1 = getTensorAbsoluteMax(n1localTensor); double r2 = getTensorAbsoluteMax(n2localTensor); - std::cout << "valeur: " << r1 <<std::endl; - std::cout << "valeur: " << r2 <<std::endl; double s1 = std::sqrt(r1 * r2) / r1; diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 9ab77b2..0217bfd 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -26,11 +26,8 @@ #include "aidge/operator/ReLU.hpp" #include "aidge/operator/BatchNorm.hpp" #include "aidge/operator/Conv.hpp" - #include "aidge/operator/ArgMax.hpp" -#include "aidge/operator/Abs.hpp" #include "aidge/operator/Reshape.hpp" -#include "aidge/operator/Round.hpp" #include "aidge/recipes/Recipes.hpp" @@ -64,7 +61,7 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) { - if(node->type() == "Mul" && node->attributes()->hasAttr("isProducerScaling")) + if(node->type() == "Mul" && (node->attributes()->hasAttr("isProducerScaling") || node->attributes()->hasAttr("isScaling"))) { auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); std::shared_ptr<Tensor> fallback; @@ -194,23 +191,6 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } -void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) -{ - if(node->type() == "Mul" && node->attributes()->hasAttr("isScaling")) - { - auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); - std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); - double previousScalingFactor = localTensor.get<double>(0); - std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); - node->input(1).first->getOperator()->setOutput(0, finalTensor); - } - else - { - Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); - } -} - static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) { auto mulOp = Mul_Op(); -- GitLab From 4a1defc5d1df4db67b0e1fcce32d3ded5b8eb17c Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 22 Jan 2025 13:06:51 +0000 Subject: [PATCH 12/23] Correcting Log::warn into AIDGE_ASSERT to make the code safer --- include/aidge/quantization/PTQ/PTQ.hpp | 21 ++++- include/aidge/quantization_version.h | 2 +- src/PTQ/PTQ.cpp | 110 +++++++++---------------- 3 files changed, 61 insertions(+), 72 deletions(-) diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index d8d198b..1d1b71b 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -69,7 +69,26 @@ namespace Aidge { * @return The scheduled vector of nodes */ std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> graphView, bool newSchedule = true, bool verbose = false); - bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView); + + /** + * @brief Inserts a scaling node below the given producer node in the graph view. + * If the node is already a producer scaling node, it accumulates the scaling factor by multiplyins its value directly. + * + * @param node A shared pointer to the producer node where the scaling node will be inserted (below). + * @param scalingFactor The scaling factor to apply. + * @param graphView A shared pointer to the graph view in which the nodes are located. + * @return True if the scaling node was successfully inserted or the scaling factor was accumulated; False otherwise. + */ + bool insertScalingBelowProducer(std::shared_ptr<Node> node, double scalingFactor, std::shared_ptr<GraphView> graphView); + + /** + * @brief Inserts a rounding node below the given producer (also below its ows producerScaling) node in the graph view. + * + * @param node A shared pointer to the producer node where the rounding node will be inserted. + * @param graphView A shared pointer to the graph view in which the nodes are located. + * @return True if the rounding node was successfully inserted; False otherwise. + */ + bool insertRoundBelowProducer(std::shared_ptr<Node> node, std::shared_ptr<GraphView> graphView); /** * @brief Determine whether an input GraphView can be quantized or not. diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 5a7e98b..9b4e3de 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "a749505"; +static constexpr const char * PROJECT_GIT_HASH = "5ec6543"; } #endif // VERSION_H diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 0217bfd..7cc08c0 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -61,57 +61,39 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) { - if(node->type() == "Mul" && (node->attributes()->hasAttr("isProducerScaling") || node->attributes()->hasAttr("isScaling"))) - { - auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); - std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); - double previousScalingFactor = localTensor.get<double>(0); - std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); - node->input(1).first->getOperator()->setOutput(0, finalTensor); - } - else - { - Log::warn(" Cannot update the scaling factor on Node of type {} with no scaling tag", node->type()); - } + AIDGE_ASSERT(node->type() == "Mul" && (node->attributes()->hasAttr("isProducerScaling") || node->attributes()->hasAttr("isScaling")), + "Cannot update the scaling factor on Node of type {} with no scaling tag",node->type()); + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + double previousScalingFactor = localTensor.get<double>(0); + std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); + node->input(1).first->getOperator()->setOutput(0, finalTensor); } bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView) { std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) roundNode->getOperator()->setBackend("cpu"); - - if (node->getChildren().size() > 0) - { - // SCALING NODE INSERTION + AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a scaling node."); + std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); + std::vector<int> inputIndices(nextNodes.size()); + for (std::size_t i = 0; i < nextNodes.size(); i++) + inputIndices[i] = getInputIndex(nextNodes[i], node); - // We always have one output from Affine and Add nodes, but possibly multiple childs - std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); + for (std::shared_ptr<Node> nextNode : nextNodes) + node->removeChild(nextNode, 0); - // For each node in nextNodes store the connexion index - std::vector<int> inputIndices(nextNodes.size()); - for (std::size_t i = 0; i < nextNodes.size(); i++) - inputIndices[i] = getInputIndex(nextNodes[i], node); - - for (std::shared_ptr<Node> nextNode : nextNodes) - node->removeChild(nextNode, 0); - - node->addChild(roundNode, 0, 0); + node->addChild(roundNode, 0, 0); - for (std::size_t i = 0; i < nextNodes.size(); i++) - roundNode->addChild(nextNodes[i], 0, inputIndices[i]); - graphView->add(roundNode); - } - else - { - Log::warn("Unusual producer "); - node->addChild(roundNode, 0, 0); + for (std::size_t i = 0; i < nextNodes.size(); i++) + roundNode->addChild(nextNodes[i], 0, inputIndices[i]); graphView->add(roundNode); - } + roundNode->attributes()->addAttr("isProducerRounding",0.0); return true; } -bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::shared_ptr<GraphView> graphView) +bool insertScalingBelowProducer(std::shared_ptr<Node> node,double scalingFactor, std::shared_ptr<GraphView> graphView) { if(node->attributes()->hasAttr("isProducerRounding")) { @@ -123,55 +105,39 @@ bool insertScalingBelowProducer(std::shared_ptr<Node> node,double sf, std::share { // We accumulate the multiples scaling factors by multiplying the SF of the ProducerScaling node // (adding new nodes each time would make the graph unusable) - multiplyScalingFactor(node,sf); + multiplyScalingFactor(node,scalingFactor); return true; } - if(node->type() != "Producer") - { - Log::warn(" Cannot apply a scaling factor on a node which is not a producer on a node of type {} whose name is {}", node->type(),node->name()); - return false; - } + AIDGE_ASSERT(node->type() == "Producer","Cannot apply a scaling factor on node of type: {} which is not a producer", node->type()); std::string scalingNodeName = makeUniqueName(node->name() + "_ProducerScaling", graphView); std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName); scalingNode->attributes()->addAttr("isProducerScaling",0.0); - std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {sf}); + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); std::shared_ptr<Node> scalingFactorProducer = addProducer(scalingNode, 1, {1}, "Factor"); scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); graphView->add(scalingFactorProducer); scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) scalingNode->getOperator()->setBackend("cpu"); + AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a scaling node."); + std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); - if (node->getChildren().size() > 0) - { - // SCALING NODE INSERTION + // For each node in nextNodes store the connexion index + std::vector<int> inputIndices(nextNodes.size()); + for (std::size_t i = 0; i < nextNodes.size(); i++) + inputIndices[i] = getInputIndex(nextNodes[i], node); - // We always have one output from Affine and Add nodes, but possibly multiple childs - std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); + for (std::shared_ptr<Node> nextNode : nextNodes) + node->removeChild(nextNode, 0); - // For each node in nextNodes store the connexion index - std::vector<int> inputIndices(nextNodes.size()); - for (std::size_t i = 0; i < nextNodes.size(); i++) - inputIndices[i] = getInputIndex(nextNodes[i], node); - - for (std::shared_ptr<Node> nextNode : nextNodes) - node->removeChild(nextNode, 0); + node->addChild(scalingNode, 0, 0); - node->addChild(scalingNode, 0, 0); + for (std::size_t i = 0; i < nextNodes.size(); i++) + scalingNode->addChild(nextNodes[i], 0, inputIndices[i]); - for (std::size_t i = 0; i < nextNodes.size(); i++) - scalingNode->addChild(nextNodes[i], 0, inputIndices[i]); - - graphView->add(scalingNode); - } - else - { - Log::warn("Unusual producer "); - node->addChild(scalingNode, 0, 0); - graphView->add(scalingNode); - } + graphView->add(scalingNode); return true; } @@ -1236,5 +1202,9 @@ void clearBiases(std::shared_ptr<GraphView> graphView) } } } - +void devPTQ(std::shared_ptr<GraphView> graphView) +{ + for (std::shared_ptr<Node> node : graphView->getNodes()) + Log::debug(" UUU : {}", node->name()); +} } -- GitLab From 583be8740747536db8e28aac5a0abad0fc22c2f3 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 22 Jan 2025 14:36:02 +0000 Subject: [PATCH 13/23] Changing the CLE to fit with the new method of ProducerScaling --- include/aidge/quantization_version.h | 2 +- src/PTQ/CLE.cpp | 38 ++++++++++------------------ 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 9b4e3de..eba0eab 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "5ec6543"; +static constexpr const char * PROJECT_GIT_HASH = "c374ce4"; } #endif // VERSION_H diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 01dcd33..6b0226f 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -107,6 +107,16 @@ static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) return flatTensor->get<double>(maxIndex); } +//Function used to extraxt the local tensor (from a ProducerScalingNode) +std::shared_ptr<Aidge::Tensor> getLocalTensor(std::shared_ptr<Node> node) { + if (node->getParent(1)->attributes()->hasAttr("isProducerScaling")) { + std::shared_ptr<Aidge::OperatorTensor> operatorTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getOperator()); + operatorTensor->forward();// We need the forward pass to compute the scaled value of the Tensor + return operatorTensor->getOutput(0); + } else { + return getWeightTensor(node); + } +} void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta) { @@ -140,39 +150,17 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; - std::shared_ptr<Aidge::Tensor> n1localTensor, n2localTensor; - if(n1->getParent(1)->attributes()->hasAttr("isProducerScaling")) - { - std::static_pointer_cast<OperatorTensor>(n1->getParent(1)->getOperator())->getOutput(0)->print(); - n1localTensor = std::static_pointer_cast<OperatorTensor>(n1->getParent(1)->getOperator())->getOutput(0); - } - else - { - n1localTensor = getWeightTensor(n1); - } - - if(n2->getParent(1)->attributes()->hasAttr("isProducerScaling")) - { - n2localTensor = std::static_pointer_cast<OperatorTensor>(n2->getParent(1)->getOperator())->getOutput(0); - - } - else - { - n2localTensor = getWeightTensor(n2); - } - + std::shared_ptr<Aidge::Tensor> n1localTensor = getLocalTensor(n1); + std::shared_ptr<Aidge::Tensor> n2localTensor = getLocalTensor(n2); + double r1 = getTensorAbsoluteMax(n1localTensor); double r2 = getTensorAbsoluteMax(n2localTensor); - double s1 = std::sqrt(r1 * r2) / r1; double s2 = std::sqrt(r1 * r2) / r2; - //rescaleTensor(getWeightTensor(n1), s1); insertScalingBelowProducer(n1->getParent(1),s1,graphView); - //rescaleTensor(getWeightTensor(n2), s2); insertScalingBelowProducer(n2->getParent(1),s2,graphView); - //rescaleTensor(getBiasTensor(n1), s1); insertScalingBelowProducer(n1->getParent(2),s1,graphView); double rangeDelta = std::abs(r1 - r2); -- GitLab From c84e02a232afa67eb425679c4bd070d4c6e20f8d Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Thu, 23 Jan 2025 11:37:34 +0000 Subject: [PATCH 14/23] Minor refactorization of PTQ.cpp (deleting de^recated functions ) --- src/PTQ/PTQ.cpp | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 7cc08c0..b8d4ce2 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -75,7 +75,7 @@ bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphVi std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) roundNode->getOperator()->setBackend("cpu"); - AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a scaling node."); + AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a rounding node."); std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); std::vector<int> inputIndices(nextNodes.size()); for (std::size_t i = 0; i < nextNodes.size(); i++) @@ -157,38 +157,6 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } -static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) -{ - auto mulOp = Mul_Op(); - mulOp.setDataType(tensor->dataType()); - mulOp.setBackend(tensor->backend()); - - std::shared_ptr<Aidge::Tensor> scalingTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1> {scaling}); - scalingTensor->setDataType(tensor->dataType()); - scalingTensor->setBackend(tensor->backend()); - - mulOp.associateInput(0, tensor); - mulOp.associateInput(1, scalingTensor); - - mulOp.forward(); - - auto outTensor = mulOp.getOutput(0); - *tensor = *outTensor; -} - -static void roundTensor(std::shared_ptr<Tensor> tensor) -{ - auto roundOp = Round_Op(); - roundOp.setDataType(tensor->dataType()); - roundOp.setBackend(tensor->backend()); - - roundOp.associateInput(0, tensor); - roundOp.forward(); - - auto outTensor = roundOp.getOutput(0); - *tensor = *outTensor; -} - // TODO : make the retreival of argmax values backend independant (refCastFrom) static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) { -- GitLab From 9902af66343f04b0eb0e4819745292db62174669 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Thu, 23 Jan 2025 12:08:34 +0000 Subject: [PATCH 15/23] Refactorizing the code to add the function InsertBetween to easily insert a node between 2 already connected --- include/aidge/quantization_version.h | 2 +- src/PTQ/PTQ.cpp | 79 +++++++++++----------------- 2 files changed, 33 insertions(+), 48 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index eba0eab..909ab28 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "c374ce4"; +static constexpr const char * PROJECT_GIT_HASH = "f0f9e60"; } #endif // VERSION_H diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index b8d4ce2..2780fcf 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -70,25 +70,42 @@ void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) std::shared_ptr<Tensor> finalTensor = std::make_shared<Tensor>(Array1D<double, 1> {previousScalingFactor * coeff}); node->input(1).first->getOperator()->setOutput(0, finalTensor); } +/* Util function to insert a node below another one already connected */ +void insertNodeBetween(std::shared_ptr<Node> parent, + std::shared_ptr<Node> newNode, + std::shared_ptr<GraphView> graphView) +{ + // Checking the parents always have at least 1 children + AIDGE_ASSERT(parent->getChildren().size() > 0, "The parent node must have at least one child to insert a new node."); + + // Retrieve children connection indexes + std::vector<std::shared_ptr<Node>> nextNodes = parent->getChildren(0); + std::vector<int> inputIndices(nextNodes.size()); + for (std::size_t i = 0; i < nextNodes.size(); i++) { + inputIndices[i] = getInputIndex(nextNodes[i], parent); + } + + // Disconnect childs from parent + for (std::shared_ptr<Node> nextNode : nextNodes) { + parent->removeChild(nextNode, 0); + } + + // Insert the new node between the child and the parent + parent->addChild(newNode, 0, 0); + for (std::size_t i = 0; i < nextNodes.size(); i++) { + newNode->addChild(nextNodes[i], 0, inputIndices[i]); + } + + graphView->add(newNode); +} + bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView) { std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) roundNode->getOperator()->setBackend("cpu"); - AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a rounding node."); - std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); - std::vector<int> inputIndices(nextNodes.size()); - for (std::size_t i = 0; i < nextNodes.size(); i++) - inputIndices[i] = getInputIndex(nextNodes[i], node); - - for (std::shared_ptr<Node> nextNode : nextNodes) - node->removeChild(nextNode, 0); - node->addChild(roundNode, 0, 0); - - for (std::size_t i = 0; i < nextNodes.size(); i++) - roundNode->addChild(nextNodes[i], 0, inputIndices[i]); - graphView->add(roundNode); + insertNodeBetween(node,roundNode,graphView); roundNode->attributes()->addAttr("isProducerRounding",0.0); return true; @@ -121,23 +138,9 @@ bool insertScalingBelowProducer(std::shared_ptr<Node> node,double scalingFactor, scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) scalingNode->getOperator()->setBackend("cpu"); - AIDGE_ASSERT(node->getChildren().size() > 0, "The producer node must have at least one child to insert a scaling node."); - std::vector<std::shared_ptr<Node>> nextNodes = node->getChildren(0); - - // For each node in nextNodes store the connexion index - std::vector<int> inputIndices(nextNodes.size()); - for (std::size_t i = 0; i < nextNodes.size(); i++) - inputIndices[i] = getInputIndex(nextNodes[i], node); - - for (std::shared_ptr<Node> nextNode : nextNodes) - node->removeChild(nextNode, 0); - node->addChild(scalingNode, 0, 0); + insertNodeBetween(node, scalingNode, graphView); - for (std::size_t i = 0; i < nextNodes.size(); i++) - scalingNode->addChild(nextNodes[i], 0, inputIndices[i]); - - graphView->add(scalingNode); return true; } @@ -374,26 +377,8 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) if (parentNode->getChildren().size() > 0) { - // SCALING NODE INSERTION - - // We always have one output from Affine and Add nodes, but possibly multiple childs - std::vector<std::shared_ptr<Node>> nextNodes = parentNode->getChildren(0); - - // For each node in nextNodes store the connexion index - std::vector<int> inputIndices(nextNodes.size()); - for (std::size_t i = 0; i < nextNodes.size(); i++) - inputIndices[i] = getInputIndex(nextNodes[i], parentNode); - - for (std::shared_ptr<Node> nextNode : nextNodes) - parentNode->removeChild(nextNode, 0); - - parentNode->addChild(scalingNode, 0, 0); - - for (std::size_t i = 0; i < nextNodes.size(); i++) - scalingNode->addChild(nextNodes[i], 0, inputIndices[i]); - + insertNodeBetween(parentNode,scalingNode,graphView); graphView->add(scalingFactorProducer); - graphView->add(scalingNode); } else { -- GitLab From 3fe3dab22681a4e6a6505f04492bd0dcdb637d00 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Thu, 23 Jan 2025 14:21:04 +0000 Subject: [PATCH 16/23] Removed _version.py and quantization_version.hpp from tracking --- include/aidge/quantization_version.h | 2 +- src/PTQ/CLE.cpp | 10 ++++++---- src/PTQ/PTQ.cpp | 10 ++++++---- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index 909ab28..b7e3bb7 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -6,6 +6,6 @@ static constexpr const int PROJECT_VERSION_MAJOR = 0; static constexpr const int PROJECT_VERSION_MINOR = 3; static constexpr const int PROJECT_VERSION_PATCH = 0; static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "f0f9e60"; +static constexpr const char * PROJECT_GIT_HASH = "487718d"; } #endif // VERSION_H diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 6b0226f..4ed87fc 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -76,7 +76,7 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) { // get the abs tensor - + std::shared_ptr<Tensor> fallback; //Fallback tensor for refCastFR std::shared_ptr<Tensor> absTensor = std::make_shared<Tensor>(tensor->abs()); // flatten the abs tensor @@ -90,6 +90,7 @@ static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) reshapeOp.associateInput(0, absTensor); reshapeOp.forward(); std::shared_ptr<Tensor> flatTensor = reshapeOp.getOutput(0); + const Tensor& localFlatTensor = flatTensor->refCastFrom(fallback, DataType::Float64, "cpu"); // Get the argmax @@ -99,13 +100,14 @@ static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) argmaxOp.associateInput(0, flatTensor); argmaxOp.forward(); - std::shared_ptr<Tensor> argmaxTensor = argmaxOp.getOutput(0); + + const Tensor& argMaxTensor = argmaxOp.getOutput(0)->refCastFrom(fallback, DataType::Float64, "cpu"); // Return the max - int maxIndex = std::round(argmaxTensor->get<double>(0)); + int maxIndex = std::round(argMaxTensor.get<double>(0)); - return flatTensor->get<double>(maxIndex); + return localFlatTensor.get<double>(maxIndex); } //Function used to extraxt the local tensor (from a ProducerScalingNode) std::shared_ptr<Aidge::Tensor> getLocalTensor(std::shared_ptr<Node> node) { diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 2780fcf..6df3b2d 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -59,6 +59,7 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren return index; } + void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) { AIDGE_ASSERT(node->type() == "Mul" && (node->attributes()->hasAttr("isProducerScaling") || node->attributes()->hasAttr("isScaling")), @@ -160,10 +161,10 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } -// TODO : make the retreival of argmax values backend independant (refCastFrom) static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) { // get the abs tensor + std::shared_ptr<Tensor> fallback; //Fallback tensor for refCastFR std::shared_ptr<Tensor> absTensor = std::make_shared<Tensor>(tensor->abs()); @@ -178,6 +179,7 @@ static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) reshapeOp.associateInput(0, absTensor); reshapeOp.forward(); std::shared_ptr<Tensor> flatTensor = reshapeOp.getOutput(0); + const Tensor& localFlatTensor = flatTensor->refCastFrom(fallback, DataType::Float64, "cpu"); // Get the argmax @@ -187,13 +189,13 @@ static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) argmaxOp.associateInput(0, flatTensor); argmaxOp.forward(); - std::shared_ptr<Tensor> argmaxTensor = argmaxOp.getOutput(0); + const Tensor& argMaxTensor = argmaxOp.getOutput(0)->refCastFrom(fallback, DataType::Float64, "cpu"); // Return the max - int maxIndex = std::round(argmaxTensor->get<double>(0)); + int maxIndex = std::round(argMaxTensor.get<double>(0)); - return flatTensor->get<double>(maxIndex); + return localFlatTensor.get<double>(maxIndex); } -- GitLab From 8b20726a50aae394628b7b94a7a5625d56df06ba Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Thu, 23 Jan 2025 15:00:21 +0000 Subject: [PATCH 17/23] Removing unwanted file --- include/aidge/quantization/QAT/QAT_LSQ.hpp | 18 +- include/aidge/quantization_version.h | 6 +- src/QAT/QAT_LSQ.cpp | 208 ++++++++++++++------- 3 files changed, 155 insertions(+), 77 deletions(-) diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp index d7d03ca..4970be0 100644 --- a/include/aidge/quantization/QAT/QAT_LSQ.hpp +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -20,14 +20,22 @@ namespace Aidge { namespace QuantLSQ { /** - * @brief Given a GraphView with parameters properly initialized, insert - * the LSQ quantizer nodes, and setup the adjustment their step-sizes. - * @param graphView The GraphView containing the network to quantize. + * @brief Insert the LSQ quantizer nodes in a given GraphView + * @param graphView The GraphView containing the graph to quantize. * @param nbBits Number of quantization bits. + * @param span Fixed output span of the quantizers. */ -void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); +void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float step_size); -void devLSQ(std::shared_ptr<Tensor> tensor); +/** + * @brief Given a GraphView with parameters properly initialized and some calibration data, + * insert the LSQ quantizer nodes, and adjust their step-sizes. + * @param graphView The GraphView containing the graph to quantize. + * @param nbBits Number of quantization bits. + * @param calibrationData Calibration data used to adjust the spans. + * @param scale Multiplicative constant applied to the spans. + */ +void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData); } } diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h index b7e3bb7..546263a 100644 --- a/include/aidge/quantization_version.h +++ b/include/aidge/quantization_version.h @@ -3,9 +3,9 @@ namespace Aidge { static constexpr const int PROJECT_VERSION_MAJOR = 0; -static constexpr const int PROJECT_VERSION_MINOR = 3; +static constexpr const int PROJECT_VERSION_MINOR = 2; static constexpr const int PROJECT_VERSION_PATCH = 0; -static constexpr const char * PROJECT_VERSION = "0.3.0"; -static constexpr const char * PROJECT_GIT_HASH = "487718d"; +static constexpr const char * PROJECT_VERSION = "0.2.0"; +static constexpr const char * PROJECT_GIT_HASH = "f50c860"; } #endif // VERSION_H diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 8a42770..9b51e84 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -13,6 +13,7 @@ #include "aidge/operator/LSQ.hpp" #include "aidge/operator/ReLU.hpp" + #include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/scheduler/SequentialScheduler.hpp" @@ -22,42 +23,7 @@ namespace Aidge { -static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) -{ - auto valueTensor = (*tensor).abs().mean(); - std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu"); - return localTensor.get<float>(0); -} - -// INIT THE STEP SIZE OF A QUANTIZER NODE - -static bool initStepSize(std::shared_ptr<Node> quantizer) -{ - const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator()); - - float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0)); - - float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second)); - - auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); - - // XXX Manage backend here ? - stepSizeTensor->setBackend(quantizerOp->getInput(0)->backend()); - stepSizeTensor->setDataType(quantizerOp->getInput(0)->dataType()); - - auto stepSizeProducer = quantizer->getParent(1); - - stepSizeProducer->getOperator()->setOutput(0, stepSizeTensor); - - Log::debug("[ INIT STEP SIZE = {} ]",stepSize); - - return false; -} - -// INPUT QUANTIZERS INSERTION - -static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) +void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize) { const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); @@ -68,76 +34,180 @@ static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nb std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; - // Create the input quantizer node + // INPUT QUANTIZERS INSERTION - auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); - auto quantizerNode = LSQ(signedRange, quantizerName); + // TODO : double check this, and use createUniqueName() + auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView); + auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName); - // Init the step-size using the node call stack + // Set the step size - quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); + auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator(); + auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + inputStepSizeOp->setOutput(0, inputStepSizeTensor); // Absorb the ReLU when possible ... - bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ? + // XXX is this safe ??? + bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); + // bool nodeHasParent = (linearNode->getParents().size() != 0); if (nodeHasParent) { auto parentNode = linearNode->getParents()[0]; if (parentNode->type() == "ReLU") { - auto quantizerOp = std::static_pointer_cast<LSQ_Op> (quantizerNode->getOperator()); - quantizerOp->range() = unsignedRange; + auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator()); + inputQuantizerOp->range() = unsignedRange; graphView->replace({parentNode}, {}); } } - // Insert the quantizer in the graphView ... - // (We need to handle the case where the linear node is the first one) + // We need to handle the case where the linear node is the first one ... if (nodeHasParent) { - graphView->insertParent(linearNode, quantizerNode, 0, 0, 0); + graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0); } else { - quantizerNode->addChild(graphView); - graphView->add(quantizerNode); + inputQuantizerNode->addChild(graphView); + graphView->add(inputQuantizerNode); } + + // PARAM QUANTIZERS INSERTION + + // TODO : double check this, and use createUniqueName() + auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); + auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); + graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0); + + // Set the step size + + auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator(); + auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + paramStepSizeOp->setOutput(0, paramStepSizeTensor); } + } -// PARAM QUANTIZERS INSERTION +static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) +{ + auto backend = tensor->backend(); + if (backend == "cuda") + tensor->setBackend("cpu"); + + float acc = 0; + float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr()); + for(std::size_t i = 0; i < tensor->size(); i++) + acc += std::abs(castedTensor[i]); + acc /= static_cast<float> (tensor->size()); + + if (backend == "cuda") + tensor->setBackend("cuda"); + + return acc; +} -static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) +static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda) { - const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); + // Propagate the calibration tensor - std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + scheduler.forward(true, {calibrationData}); - for (const auto& match : matches) - { - auto linearNode = match.graph->rootNode(); + // Store the input tensor statistics - // TODO : double check this, and use createUniqueName() - auto quantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView); - auto quantizerNode = LSQ(signedRange, quantizerName); + if (useCuda) + graphView->setBackend("cpu"); - // Init the step-size using the node call stack + std::map<std::string, float> inputStats; + for (auto node : graphView->getNodes()) + { + if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! + { + const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); + float inputAbsMean = getTensorAbsMean(op->getInput(0)); + inputStats.insert(std::make_pair(node->name(), inputAbsMean)); + fmt::println("{} -> {}", node->name(), inputAbsMean); + } + } - quantizerNode->addBeforeForward([quantizerNode](){ return initStepSize(quantizerNode); }); + if (useCuda) + graphView->setBackend("cuda"); - // Insert the quantizer in the graphView + return inputStats; +} - graphView->insertParent(linearNode, quantizerNode, 1, 0, 0); +static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda) +{ + if (useCuda) + graphView->setBackend("cpu"); + + std::map<std::string, float> paramStats; + for (auto node : graphView->getNodes()) + { + if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!! + { + const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); + float paramAbsMean = getTensorAbsMean(op->getInput(1)); + paramStats.insert(std::make_pair(node->name(), paramAbsMean)); + fmt::println("{} -> {}", node->name(), paramAbsMean); + } } + + if (useCuda) + graphView->setBackend("cuda"); + + return paramStats; } -void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) +static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats) { - setupInputQuantizers(graphView, nbBits); - setupParamQuantizers(graphView, nbBits); + const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + // INPUT QUANTIZERS STEP-SIZES + + auto inputQuantNode = linearNode->getParent(0); + auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator()); + + float absMean = inputStats[linearNode->name()]; + float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second)); + + auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator(); + // XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); + auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + inputStepSizeOp->setOutput(0, inputStepSizeTensor); + + // PARAM QUANTIZERS STEP-SIZES + + auto paramQuantNode = linearNode->getParent(1); + auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator()); + + absMean = paramStats[linearNode->name()]; + stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second)); + + auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator(); + // XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); + auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); + paramStepSizeOp->setOutput(0, paramStepSizeTensor); + } } -void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) +void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData) { - float mean = (tensor->mean()).get<float> (0); - Log::debug("MEAN = {}",mean); + bool useCuda = (calibrationData->backend() == "cuda"); + + // Collect the tensor statisics + auto inputStats = collectInputStats(graphView, calibrationData, useCuda); + + auto paramStats = collectParamStats(graphView, useCuda); + + // Insert the quantizers + insertQuantizers(graphView, nbBits, 1.0); + + // Adjust the quantizers step-sizes + adjustQuantizersStepSizes(graphView, inputStats, paramStats); } } \ No newline at end of file -- GitLab From 924f4ed899607a2fb29f109cc85b8bb8b94e5c29 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Thu, 23 Jan 2025 15:02:12 +0000 Subject: [PATCH 18/23] untracking quantization/_version.py --- aidge_quantization/_version.py | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 aidge_quantization/_version.py diff --git a/aidge_quantization/_version.py b/aidge_quantization/_version.py deleted file mode 100644 index 2d34d35..0000000 --- a/aidge_quantization/_version.py +++ /dev/null @@ -1,4 +0,0 @@ -# file generated by setuptools_scm -# don't change, don't track in version control -__version__ = version = '0.2.1.dev60+g8044e79.d20250106' -__version_tuple__ = version_tuple = (0, 2, 1, 'dev60', 'g8044e79.d20250106') \ No newline at end of file -- GitLab From f54ca69e0ec7f22b3f6fc4a93ac1dc0e7e40c789 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Thu, 23 Jan 2025 17:37:05 +0000 Subject: [PATCH 19/23] Setting up the backend of new inserted node by using the parent node backend --- python_binding/pybind_PTQ.cpp | 9 +-------- python_binding/pybind_QAT_LSQ.cpp | 5 ++--- src/PTQ/PTQ.cpp | 18 +++++++++--------- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index 0a37a60..1de7976 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -17,6 +17,7 @@ #include "aidge/quantization/PTQ/Clipping.hpp" #include "aidge/quantization/PTQ/CLE.hpp" #include "aidge/quantization/PTQ/PTQ.hpp" + #include "aidge/graph/GraphView.hpp" namespace py = pybind11; @@ -47,14 +48,6 @@ void init_PTQ(py::module &m) { :type network: :py:class:`aidge_core.GraphView` )mydelimiter"); - m.def( "multiply_scaling_factor",&multiplyScalingFactor,py::arg("node"), py::arg("coeff"), - R"mydelimiter( - Updates the scaling factor of a "Mul" node in a graph if the node is marked as a scaling node. This function multiplies the existing scaling factor by a given coefficient. - :param node: A node representing the node to modify. - :param coeff: A floating value representing the multiplication coefficient to apply to the scaling factor. - )mydelimiter" - ); - m.def("normalize_parameters", &normalizeParameters, py::arg("network"), R"mydelimiter( Normalize the parameters of each parametrized node, so that they fit in the [-1:1] range. diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp index 0b9fcc2..206985e 100644 --- a/python_binding/pybind_QAT_LSQ.cpp +++ b/python_binding/pybind_QAT_LSQ.cpp @@ -23,9 +23,8 @@ void init_QAT_LSQ(py::module &m) { auto mQuantLSQ = m.def_submodule("lsq"); - mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits")); - - mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor")); + mQuantLSQ.def("insert_quantizers", &QuantLSQ::insertQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("step_size")); + mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data")); } } // namespace Aidge diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 6df3b2d..27f2fcc 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -104,10 +104,9 @@ bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphVi { std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) - roundNode->getOperator()->setBackend("cpu"); + roundNode->getOperator()->setBackend(node->getOperator()->backend()); insertNodeBetween(node,roundNode,graphView); - roundNode->attributes()->addAttr("isProducerRounding",0.0); return true; } @@ -137,8 +136,9 @@ bool insertScalingBelowProducer(std::shared_ptr<Node> node,double scalingFactor, scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); graphView->add(scalingFactorProducer); - scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) - scalingNode->getOperator()->setBackend("cpu"); + scalingNode->getOperator()->setDataType(DataType::Float64); + std::string producerBackend = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getOutput(0)->backend(); + scalingNode->getOperator()->setBackend(producerBackend); insertNodeBetween(node, scalingNode, graphView); @@ -341,7 +341,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); residualNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) - residualNode->getOperator()->setBackend("cpu"); + residualNode->getOperator()->setBackend(parentNode->getOperator()->backend()); graphView->insertParent(node, residualNode, i, 0, 0); graphView->add(scalingFactorProducer); @@ -375,7 +375,7 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) - scalingNode->getOperator()->setBackend("cpu"); + scalingNode->getOperator()->setBackend(parentNode->getOperator()->backend()); if (parentNode->getChildren().size() > 0) { @@ -935,7 +935,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> quantizerNode = Quantizer(old_sf, -(signedMax + 1), signedMax, node->name()); quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) - quantizerNode->getOperator()->setBackend("cpu"); + quantizerNode->getOperator()->setBackend(node->getOperator()->backend()); graphView->replace({node,node->getParent(1)}, {quantizerNode}); if (optimizeSigns) @@ -984,7 +984,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u mulNode->attributes()->addAttr("isCompensation",0.0); mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) - mulNode->getOperator()->setBackend("cpu"); + mulNode->getOperator()->setBackend(node->getOperator()->backend()); graphView->insertParent(node, mulNode, 0, 0, 0); @@ -995,7 +995,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u coeffProducer->getOperator()->setOutput(0, coeffTensor); coeffProducer->getOperator()->setDataType(DataType::Float64); - coeffProducer->getOperator()->setBackend("cpu"); + coeffProducer->getOperator()->setBackend(node->getOperator()->backend()); graphView->add(coeffProducer); // needed ? -- GitLab From 8106adf1f0d86ae7b81ab3178b3d5a869ac7b5f7 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Tue, 18 Feb 2025 10:34:02 +0000 Subject: [PATCH 20/23] Fixing double Rounding bug --- src/PTQ/PTQ.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 27f2fcc..4a7f740 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -102,13 +102,17 @@ void insertNodeBetween(std::shared_ptr<Node> parent, bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView) { - std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); - roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) - roundNode->getOperator()->setBackend(node->getOperator()->backend()); + if(node->attributes()->hasAttr("isProducerScaling") && node->type() != "Round") + { + std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); + roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + roundNode->getOperator()->setBackend(node->getOperator()->backend()); - insertNodeBetween(node,roundNode,graphView); - roundNode->attributes()->addAttr("isProducerRounding",0.0); - return true; + insertNodeBetween(node,roundNode,graphView); + roundNode->attributes()->addAttr("isProducerRounding",0.0); + return true; + } + return false; } bool insertScalingBelowProducer(std::shared_ptr<Node> node,double scalingFactor, std::shared_ptr<GraphView> graphView) { -- GitLab From 094abfd2091483f9528a29f7e14f587a68cf8061 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Tue, 18 Feb 2025 16:04:12 +0000 Subject: [PATCH 21/23] Rebase on dev --- src/PTQ/PTQ.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 4a7f740..bb6c66c 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -1120,13 +1120,9 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (verbose) printScalingFactors(graphView); - //Log::info(" === SCALINGS (BEFORE CAST) ==="); if (useCuda) graphView->setBackend("cuda"); - //Log::info(" === SCALINGS (AFTER CAST) ==="); - //printScalingFactors(graphView); - Log::info(" Reseting the scheduler ..."); SequentialScheduler scheduler(graphView); scheduler.resetScheduling(); @@ -1161,9 +1157,4 @@ void clearBiases(std::shared_ptr<GraphView> graphView) } } } -void devPTQ(std::shared_ptr<GraphView> graphView) -{ - for (std::shared_ptr<Node> node : graphView->getNodes()) - Log::debug(" UUU : {}", node->name()); -} } -- GitLab From 748966015292964f43f0f0dd9e409116b2ad0c03 Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 19 Feb 2025 13:35:31 +0000 Subject: [PATCH 22/23] Fixing rebase bugs --- python_binding/pybind_PTQ.cpp | 7 ------- src/PTQ/PTQ.cpp | 2 ++ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index 1de7976..ae0a0de 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -213,13 +213,6 @@ void init_PTQ(py::module &m) { :type network: :py:class:`aidge_core.GraphView` )mydelimiter"); - m.def("dev_ptq", &devPTQ, py::arg("network"), - R"mydelimiter( - Developement and test routine. - :param network: The GraphView under test. - :type network: :py:class:`aidge_core.GraphView` - )mydelimiter"); - m.def("prepare_network", &prepareNetwork, py::arg("network"), "prepare the network for the PTQ"); } diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index bb6c66c..cfdbbdd 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -21,8 +21,10 @@ #include "aidge/scheduler/Scheduler.hpp" #include "aidge/utils/Log.hpp" + #include "aidge/operator/Producer.hpp" #include "aidge/operator/Mul.hpp" +#include "aidge/operator/Round.hpp" #include "aidge/operator/ReLU.hpp" #include "aidge/operator/BatchNorm.hpp" #include "aidge/operator/Conv.hpp" -- GitLab From 944fd3e79f61a2b81f985d54c773374c0c5d470e Mon Sep 17 00:00:00 2001 From: Noam ZERAH <noam.zerah@cea.fr> Date: Wed, 19 Feb 2025 13:54:22 +0000 Subject: [PATCH 23/23] Adding a namespace to all quantization ptq tags --- src/PTQ/CLE.cpp | 2 +- src/PTQ/Clipping.cpp | 2 +- src/PTQ/PTQ.cpp | 50 ++++++++++++++++++++++---------------------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 4ed87fc..2738f8a 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -111,7 +111,7 @@ static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) } //Function used to extraxt the local tensor (from a ProducerScalingNode) std::shared_ptr<Aidge::Tensor> getLocalTensor(std::shared_ptr<Node> node) { - if (node->getParent(1)->attributes()->hasAttr("isProducerScaling")) { + if (node->getParent(1)->attributes()->hasAttr("quantization.ptq.isProducerScaling")) { std::shared_ptr<Aidge::OperatorTensor> operatorTensor = std::static_pointer_cast<OperatorTensor>(node->getParent(1)->getOperator()); operatorTensor->forward();// We need the forward pass to compute the scaled value of the Tensor return operatorTensor->getOutput(0); diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp index ef34fdc..a4e7fed 100644 --- a/src/PTQ/Clipping.cpp +++ b/src/PTQ/Clipping.cpp @@ -222,7 +222,7 @@ std::map<std::string, double> adjustRanges(Clipping clippingMode, std::map<std:: for (std::shared_ptr<Node> node : graphView->getNodes()) { - if (node->attributes()->hasAttr("isScaling")) + if (node->attributes()->hasAttr("quantization.ptq.isScaling")) { std::vector<int> histogram = histograms[node->name()]; diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index cfdbbdd..f03fc7b 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -64,7 +64,7 @@ static int getInputIndex(std::shared_ptr<Node> node, std::shared_ptr<Node> paren void multiplyScalingFactor(std::shared_ptr<Aidge::Node> node,double coeff) { - AIDGE_ASSERT(node->type() == "Mul" && (node->attributes()->hasAttr("isProducerScaling") || node->attributes()->hasAttr("isScaling")), + AIDGE_ASSERT(node->type() == "Mul" && (node->attributes()->hasAttr("quantization.ptq.isProducerScaling") || node->attributes()->hasAttr("quantization.ptq.isScaling")), "Cannot update the scaling factor on Node of type {} with no scaling tag",node->type()); auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(1); std::shared_ptr<Tensor> fallback; @@ -104,27 +104,27 @@ void insertNodeBetween(std::shared_ptr<Node> parent, bool insertRoundBelowProducer(std::shared_ptr<Node> node,std::shared_ptr<GraphView> graphView) { - if(node->attributes()->hasAttr("isProducerScaling") && node->type() != "Round") + if(node->attributes()->hasAttr("quantization.ptq.isProducerScaling") && node->type() != "Round") { std::shared_ptr<Aidge::Node> roundNode = Round(node->name() + "_Round"); roundNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) roundNode->getOperator()->setBackend(node->getOperator()->backend()); insertNodeBetween(node,roundNode,graphView); - roundNode->attributes()->addAttr("isProducerRounding",0.0); + roundNode->attributes()->addAttr("quantization.ptq.isProducerRounding",0.0); return true; } return false; } bool insertScalingBelowProducer(std::shared_ptr<Node> node,double scalingFactor, std::shared_ptr<GraphView> graphView) { - if(node->attributes()->hasAttr("isProducerRounding")) + if(node->attributes()->hasAttr("quantization.ptq.isProducerRounding")) { //In this case we 'bump' the node to the one above him (an actual ProducerScaling) // because the round node is not usable (only used when SSA is enabled) node = node->getParent(0); } - if(node->attributes()->hasAttr("isProducerScaling")) + if(node->attributes()->hasAttr("quantization.ptq.isProducerScaling")) { // We accumulate the multiples scaling factors by multiplying the SF of the ProducerScaling node // (adding new nodes each time would make the graph unusable) @@ -135,7 +135,7 @@ bool insertScalingBelowProducer(std::shared_ptr<Node> node,double scalingFactor, std::string scalingNodeName = makeUniqueName(node->name() + "_ProducerScaling", graphView); std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName); - scalingNode->attributes()->addAttr("isProducerScaling",0.0); + scalingNode->attributes()->addAttr("quantization.ptq.isProducerScaling",0.0); std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); std::shared_ptr<Node> scalingFactorProducer = addProducer(scalingNode, 1, {1}, "Factor"); @@ -219,7 +219,7 @@ static std::vector<std::shared_ptr<Node>> removeProdScalingNodes(std::vector<std { std::vector<std::shared_ptr<Node>> remainingNodes; for (std::shared_ptr<Node> node : nodeVector) - if (!node->attributes()->hasAttr("isProducerScaling")) + if (!node->attributes()->hasAttr("quantization.ptq.isProducerScaling")) remainingNodes.push_back(node); return remainingNodes; @@ -338,8 +338,8 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); std::shared_ptr<Node> residualNode = Mul(residualNodeName); - residualNode->attributes()->addAttr("isScaling", 0.0); - residualNode->attributes()->addAttr("isResidual", 0.0); + residualNode->attributes()->addAttr("quantization.ptq.isScaling", 0.0); + residualNode->attributes()->addAttr("quantization.ptq.isResidual", 0.0); //Adding the SF as a producer of the node std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {1.0}); @@ -371,9 +371,9 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); //std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName); - //Adding Mul operator with tag "isScaling" + //Adding Mul operator with tag "quantization.ptq.isScaling" std::shared_ptr<Aidge::Node> scalingNode = Mul(scalingNodeName); - scalingNode->attributes()->addAttr("isScaling",0.0); + scalingNode->attributes()->addAttr("quantization.ptq.isScaling",0.0); //Adding the SF as a producer of the node std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {1.0}); @@ -402,7 +402,7 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> mergingNode) { std::shared_ptr<Node> currNode = mergingNode; - while(!currNode->attributes()->hasAttr("isScaling")) + while(!currNode->attributes()->hasAttr("quantization.ptq.isScaling")) { if (currNode->getParents().size() == 0) { @@ -445,7 +445,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) for (std::shared_ptr<Node> node : nodeVector) { // Scaling nodes still have a ratio of 1, so they are seamless ... - if (node->type() == "ReLU" || node->attributes()->hasAttr("isScaling") || isSeamless(node)) + if (node->type() == "ReLU" || node->attributes()->hasAttr("quantization.ptq.isScaling") || isSeamless(node)) { if (node != firstNode) { @@ -533,7 +533,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); for (std::shared_ptr<Node> node : nodeSet) { - if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) + if ((scalingNodesOnly && (node->attributes()->hasAttr("quantization.ptq.isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) { std::shared_ptr<Operator> nodeOperator = node->getOperator(); std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0)); @@ -555,7 +555,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView // std::shared_ptr<Node> inputNode = getFirstNode(graphView); for (std::shared_ptr<Node> node : nodeSet) - if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) + if ((scalingNodesOnly && (node->attributes()->hasAttr("quantization.ptq.isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) valueRanges.insert(std::make_pair(node->name(), 0)); if (useCuda) @@ -582,7 +582,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView std::map<std::string, double> sampleRanges; for (std::shared_ptr<Node> node : nodeSet) { - if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) + if ((scalingNodesOnly && (node->attributes()->hasAttr("quantization.ptq.isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) { std::shared_ptr<Operator> nodeOperator = node->getOperator(); std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0)); @@ -604,7 +604,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView for (std::shared_ptr<Node> node : nodeSet) { - if ((scalingNodesOnly && (node->attributes()->hasAttr("isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) + if ((scalingNodesOnly && (node->attributes()->hasAttr("quantization.ptq.isScaling"))) || (!scalingNodesOnly && (node->type() != "Producer"))) { std::string nodeName = node->name(); if (sampleRanges[nodeName] > valueRanges[nodeName]) @@ -658,7 +658,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st // Here prevNode is either a 'Affine' or a 'Merging' // => do not split the cases, just handle the bias ... - if (node->attributes()->hasAttr("isScaling")) + if (node->attributes()->hasAttr("quantization.ptq.isScaling")) { // retrieve the previous scaling factor ... @@ -749,7 +749,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap signMap[node->name()].second = false; } - if (node->attributes()->hasAttr("isScaling")) + if (node->attributes()->hasAttr("quantization.ptq.isScaling")) { signMap[node->name()].second = false; @@ -796,7 +796,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap // Arbitration : Signed type wins ! for(std::shared_ptr<Node> parent : parentNodes) { - while (!parent->attributes()->hasAttr("isScaling")) + while (!parent->attributes()->hasAttr("quantization.ptq.isScaling")) { signMap[parent->name()] = std::make_pair(false, false); // We are on a branch so nodes always have 1 parent ... @@ -929,7 +929,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ // Handle the Scaling Nodes ... - if (node->attributes()->hasAttr("isScaling")) + if (node->attributes()->hasAttr("quantization.ptq.isScaling")) { if (!noQuant) { @@ -979,7 +979,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u { // A merging node is always followed by a Quantizer node at this point - if (node->type() == "Quantizer" && (node->attributes()->hasAttr("isResidual") || !isAffine(node->getParent(0)))) + if (node->type() == "Quantizer" && (node->attributes()->hasAttr("quantization.ptq.isResidual") || !isAffine(node->getParent(0)))) { // check if the Quantizer is a residual one, and insert a compensation node if so ... @@ -988,7 +988,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); std::shared_ptr<Node> mulNode = Mul(mulNodeName); - mulNode->attributes()->addAttr("isCompensation",0.0); + mulNode->attributes()->addAttr("quantization.ptq.isCompensation",0.0); mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) mulNode->getOperator()->setBackend(node->getOperator()->backend()); @@ -1020,7 +1020,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool for (std::shared_ptr<Node> node : nodeVector) { - if (isAffine(node) || (node->type() == "Mul" && node->attributes()->hasAttr("isCompensation"))) + if (isAffine(node) || (node->type() == "Mul" && node->attributes()->hasAttr("quantization.ptq.isCompensation"))) { std::shared_ptr<Node> scalingNode = (*node->getChildren().begin()); @@ -1050,7 +1050,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool static void printScalingFactors(std::shared_ptr<GraphView> graphView) { for (auto node : retrieveNodeVector(graphView)) - if (node->attributes()->hasAttr("isScaling") || node->type() == "Quantizer") + if (node->attributes()->hasAttr("quantization.ptq.isScaling") || node->type() == "Quantizer") { double scalingFactor = getScalingFactor(node); Log::info(" {:.6f} ({})", scalingFactor, node->name()); -- GitLab