diff --git a/README.md b/README.md index 467c8d1668201bdd32ca905971b27eda198ea5ee..1e4c14b3b75e7765747f7d1d543fba8d90c15672 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ In that case, here is the standard pipeline: 5) perform the output value normalization, over a calibration dataset 6) quantize the normalized network -## Further functionalities +## Further work -The next feature of this module will consist in providing smart clipping methods for the normalizations. -Later works will provide Quantization Aware Training (QAT). +* add smart clipping methods for the normalizations. +* add Quantization Aware Training (QAT). diff --git a/include/aidge/QuantPTQ.hpp b/include/aidge/QuantPTQ.hpp index 14731ba4add3389f3ca06c42d0c9b68f68a5a321..499ded0d7f9054acddec09f6c0ed90b4ed4140a4 100644 --- a/include/aidge/QuantPTQ.hpp +++ b/include/aidge/QuantPTQ.hpp @@ -64,7 +64,7 @@ namespace Aidge { * @param graphView The GraphView to be quantized. * @param nbBits The desired number of bits of the quantization. */ - void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, int nbBits); + void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits); /** * @brief Main quantization routine. Performs every step of the quantization pipeline. @@ -72,7 +72,7 @@ namespace Aidge { * @param nbBits The desired number of bits of the quantization. * @param inputDataSet The input dataset on which the value ranges are computed. */ - void quantizeNetwork(std::shared_ptr<GraphView> graphView, int nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet); + void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet); /** * @brief Compute the weight ranges of every affine node. Provided for debuging purposes. diff --git a/src/QuantPTQ.cpp b/src/QuantPTQ.cpp index ff657c4e9a96f1df5c4e31822819b86685cc788f..2f12d0febaae49b0c589295bf39521c4783b01d7 100644 --- a/src/QuantPTQ.cpp +++ b/src/QuantPTQ.cpp @@ -37,6 +37,7 @@ #include "aidge/operator/ReLU.hpp" #include "aidge/operator/Scaling.hpp" #include "aidge/recipes/Recipes.hpp" +#include "aidge/operator/MetaOperator.hpp" namespace Aidge{ @@ -51,43 +52,46 @@ static std::string makeUniqueName(std::string baseName, std::shared_ptr<GraphVie if (!isInside) return baseName; + int index = 1; std::string newName = baseName; - for (std::uint32_t index = 1; isInside; ++index) + while (isInside) { newName = baseName + "_" + std::to_string(index); - isInside = (existingNames.find(newName) != existingNames.cend()); + isInside = (existingNames.find(newName) != existingNames.end()); + index++; } return newName; } +static bool isAffine(std::shared_ptr<Node> node) +{ + std::set<std::string> affineNodeTypes({"FC", "Conv", "ConvDepthWise", "PaddedConv", "PaddedConvDepthWise"}); + return (affineNodeTypes.find(node->type()) != affineNodeTypes.end()); +} + +static bool isSeamless(std::shared_ptr<Node> node) +{ + std::set<std::string> seamlessNodeTypes({"Pad", "MaxPooling", "AvgPooling", "PaddedMaxPooling", "PaddedAvgPooling", "GlobalAveragePooling"}); + return (seamlessNodeTypes.find(node->type()) != seamlessNodeTypes.end()); +} + bool checkArchitecture(std::shared_ptr<GraphView> graphView) { - std::set<std::string> supportedNodeTypes( - {"FC", "Conv", "ConvDepthWise", "Pad", "MaxPooling", "AvgPooling", "Add", "Concat", "Softmax", "ReLU", "Producer"} - ); + std::set<std::string> otherNodeTypes({"Add", "Concat", "Softmax", "ReLU", "Producer"}); for (std::shared_ptr<Node> node : graphView->getNodes()) - if (supportedNodeTypes.find(node->type()) == supportedNodeTypes.end()) { + { + bool isOther = otherNodeTypes.find(node->type()) != otherNodeTypes.end(); + if (!isOther && !isAffine(node) && !isSeamless(node)) { Log::info(" GraphView can't be quantized : node type {} is not supported !", node->type()); return false; } + } return true; } -static bool isAffine(std::shared_ptr<Node> node) -{ - std::set<std::string> affineNodeTypes({"FC", "Conv", "ConvDepthWise"}); - return (affineNodeTypes.find(node->type()) != affineNodeTypes.end()); -} - -static bool isSeamless(std::shared_ptr<Node> node) -{ - std::set<std::string> seamlessNodeTypes({"Pad", "MaxPooling", "AvgPooling"}); - return (seamlessNodeTypes.find(node->type()) != seamlessNodeTypes.end()); -} - static std::shared_ptr<Node> getFirstNode(std::shared_ptr<GraphView> graphView) { std::shared_ptr<Node> currNode = graphView->rootNode(); @@ -95,7 +99,7 @@ static std::shared_ptr<Node> getFirstNode(std::shared_ptr<GraphView> graphView) currNode = *(currNode->getChildren()).begin(); std::shared_ptr<Node> parentNode = currNode->getParent(0); - while (parentNode->type() != Producer_Op::Type) { + while (parentNode->type() != "Producer") { currNode = parentNode; parentNode = currNode->getParent(0); } @@ -143,7 +147,7 @@ static void roundTensor(std::shared_ptr<Tensor> tensor) static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) { // Get the tensor data pointer and edit it - float* castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr()); + float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr()); // Get the tensor absolute max value float maxValue = 0.0f; @@ -225,7 +229,7 @@ std::vector<std::shared_ptr<Node>> extractNodeVector(std::shared_ptr<GraphView> fixScheduling(nodeVector); - removeMatchingNodes(nodeVector, Producer_Op::Type); + removeMatchingNodes(nodeVector, "Producer"); if (verbose) { @@ -243,7 +247,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) for (std::shared_ptr<Node> node : nodeVector) { - if (node->type() == Add_Op::Type || node->type() == Concat_Op::Type) + if (node->type() == "Add" || node->type() == "Concat") { int nbParents = node->getParents().size(); for (int i = 0; i < nbParents; i++) @@ -304,7 +308,7 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) graphView->forwardDims(); - // XXX XXX XXX Append identity if needed ... + // XXX Append identity if needed ... if (getLastNode(graphView)->type() == "Scaling") appendIdentity(graphView); @@ -314,7 +318,7 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> mergingNode) { std::shared_ptr<Node> currNode = mergingNode; - while(currNode->type() != Scaling_Op::Type) + while(currNode->type() != "Scaling") currNode = currNode->getParents()[0]; return currNode; } @@ -373,7 +377,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) } - if (node->type() == Add_Op::Type || node->type() == Concat_Op::Type) + if (node->type() == "Add" || node->type() == "Concat") { // We should assert if merging nodes are all scalings ! std::vector<std::shared_ptr<Node>> mergingNodes = node->getParents(); @@ -498,7 +502,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::vector<std: } } - if (node->type() == Scaling_Op::Type) + if (node->type() == "Scaling") { // Retreive the previous scaling factor ... std::shared_ptr<Node> prevNode = node->getParent(0); @@ -527,7 +531,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::vector<std: // Compute the max scaling ... float maxScaling = 0; int maxNodeIndex = 0; - for (size_t i = 0; i < mergingNodes.size(); i++) + for (std::size_t i = 0; i < mergingNodes.size(); i++) { float merginNodeScaling = scalingFactors[mergingNodes[i]->name()]; if (merginNodeScaling > maxScaling) { @@ -561,7 +565,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::vector<std: } } -void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, int nbBits) +void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits) { float signedMax = (1 << (nbBits - 1)) - 1; @@ -592,7 +596,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, int nbBits) std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); std::shared_ptr<Scaling_Op> scalingOperator = std::static_pointer_cast<Scaling_Op> (scalingNode->getOperator()); scalingOperator->getAttr<float>("scalingFactor") /= signedMax; - scalingOperator->getAttr<std::size_t>("quantizedNbBits") = static_cast<std::size_t>(nbBits); + scalingOperator->getAttr<std::size_t>("quantizedNbBits") = nbBits; } } @@ -602,15 +606,17 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, int nbBits) if (node->type() == "Scaling") { std::shared_ptr<Scaling_Op> scalingOperator = std::static_pointer_cast<Scaling_Op> (node->getOperator()); - scalingOperator->getAttr<size_t>("quantizedNbBits") = nbBits; // XXX HERE !!! + scalingOperator->getAttr<std::size_t>("quantizedNbBits") = nbBits; // XXX HERE !!! } } } void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet) { - Log::info(" Expanding the meta-ops ... "); - expandMetaOps(graphView, false); + Log::info(" === QUANT PTQ 0.2.7 === "); + + if (!checkArchitecture(graphView)) + return; Log::info(" Removing the flatten nodes ... "); removeFlatten(graphView); @@ -653,7 +659,7 @@ std::map<std::string, float> getWeightRanges(std::shared_ptr<GraphView> graphVie void clearBiases(std::shared_ptr<GraphView> graphView) { for (std::shared_ptr<Node> node : graphView->getNodes()) { - if (node->type() == FC_Op::Type || node->type() == Conv_Op<2>::Type) { + if (node->type() == "FC" || node->type() == "Conv") { std::shared_ptr<Tensor> biasTensor = std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2); rescaleTensor(biasTensor, 0); } @@ -662,20 +668,11 @@ void clearBiases(std::shared_ptr<GraphView> graphView) { void devPTQ(std::shared_ptr<GraphView> graphView) { - //for (std::shared_ptr<Node> node : graphView->getNodes()) - // if (node->type() == "GlobalAveragePool") - // graphView->replace({node}, {}); - - std::cout << " Expanding the meta-ops ... " << std::endl; - expandMetaOps(graphView, false); - std::cout << " Done ! " << std::endl; - for (std::shared_ptr<Node> node : graphView->getNodes()) std::cout << " ### node : " << node->type() << std::endl; } - } @@ -725,7 +722,7 @@ std::map<std::string, std::vector<int>> getValueHistograms(std::shared_ptr<Graph // Fill the histogram ... - for (size_t i = 0; i < valueTensor->size(); i++) + for (std::size_t i = 0; i < valueTensor->size(); i++) { float ratio = std::abs(castedTensor[i]) / valueRanges[node->name()]; int bin_index = (int) (ratio * (nb_bins - 1)); @@ -743,7 +740,7 @@ void printHistograms(std::map<std::string, std::vector<int>> histograms) for (it = histograms.begin(); it != histograms.end(); it++) { std::cout << " Node : " << it->first << " -> "; - for (size_t i = 0; i < it->second.size(); i++) + for (std::size_t i = 0; i < it->second.size(); i++) std::cout << (it->second)[i] << " "; std::cout << std::endl; } diff --git a/version.txt b/version.txt index 8a9ecc2ea99d607e92feae1656ddbf6fdd82a2c1..0ea3a944b399d25f7e1b8fe684d754eb8da9fe7f 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.0.1 \ No newline at end of file +0.2.0