diff --git a/include/aidge/quantization/PTQ/Clipping.hpp b/include/aidge/quantization/PTQ/Clipping.hpp index 159b64f12f8c6ae2bb3e88592b29f211e15fa614..35f23f5f2022128238e1991717876d6462d0b6da 100644 --- a/include/aidge/quantization/PTQ/Clipping.hpp +++ b/include/aidge/quantization/PTQ/Clipping.hpp @@ -33,10 +33,10 @@ namespace Aidge * @param valueRanges A map associating each considered node name to its corresponding output range. * @param nbBins Desired number of bins of the returned histograms. * @param graphView The GraphView containing the considered nodes. - * @param inputDataSet The input dataset, consisting of a vector of input samples. + * @param calibrationSet The calibration dataset, consisting of a vector of input samples. * @return A map associating each node name to it's corresponding activation histogram. */ - std::unordered_map<std::shared_ptr<Node>, std::vector<int>> computeHistograms(std::unordered_map<std::shared_ptr<Node>, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda); + std::unordered_map<std::shared_ptr<Node>, std::vector<int>> computeHistograms(std::unordered_map<std::shared_ptr<Node>, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> calibrationSet, bool useCuda); /** * @brief Given an input activation histogram, compute the optimal clipping value in the sense of the Lp norm. @@ -63,11 +63,11 @@ namespace Aidge * @param valueRanges The map associating each affine node to its output range. * @param nbBits The quantization number of bits. * @param graphView The GraphView containing the considered nodes. - * @param inputDataSet The input dataset, consisting of a vector of input samples. + * @param calibrationSet The calibration dataset, consisting of a vector of input samples. * @param verbose Whether to print the clipping values or not. * @return The corrected map associating each provided node to its clipped range. */ - std::unordered_map<std::shared_ptr<Node>, double> adjustRanges(Clipping clippingMode, std::unordered_map<std::shared_ptr<Node>, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose); + std::unordered_map<std::shared_ptr<Node>, double> adjustRanges(Clipping clippingMode, std::unordered_map<std::shared_ptr<Node>, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> calibrationSet, bool useCuda, bool verbose); } diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index 71a4754a72a85d16b771a78f20726284874e3959..968f9b5cd2d6c93892052a2e740e11388d5edef6 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -131,11 +131,11 @@ namespace Aidge { /** * @brief Compute the activation ranges of every affine node, given an input dataset. * @param graphView The GraphView containing the affine nodes, on which the inferences are performed. - * @param inputDataSet The input dataset, consisting of a vector of input samples. + * @param calibrationSet The calibration dataset, consisting of a vector of input samples. * @param scalingNodesOnly Whether to restrain the retreival of the ranges to scaling nodes only or not. * @return A map associating each affine node name to it's corresponding output range. */ - std::unordered_map<std::shared_ptr<Node>, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda); + std::unordered_map<std::shared_ptr<Node>, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> calibrationSet, bool scalingNodesOnly, bool useCuda); /** * @brief Normalize the activations of each affine node so that they fit in the [-1:1] range. @@ -179,7 +179,7 @@ namespace Aidge { * @brief Main quantization routine. Performs every step of the quantization pipeline. * @param graphView The GraphView to be quantized. * @param nbBits The desired number of bits of the quantization. - * @param inputDataSet The input dataset used for the activations calibration. + * @param calibrationSet The calibration dataset used for the activations calibration. * @param targetType The desired data-type of the outputed GraphView. * @param clippingMode: Type of the clipping optimization. Can be either 'MAX', 'MSE', 'AA' or 'KL'. * @param noQuant Whether to apply the rounding operations or not. @@ -189,7 +189,7 @@ namespace Aidge { * @param foldGraph Whether to fold the parameter quantizers after the quantization or not. * @param verbose Whether to print internal informations about the quantization process or not. */ - void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType targetType, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool foldGraph, bool verbose); + void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> calibrationSet, DataType targetType, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool foldGraph, bool verbose); /** diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index d125f1d3678ad2a3b128c6f72ea640116e66cbdd..ad6931c8f6dcc9e6f3dd8d16fb57e6cadf06efe6 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -55,13 +55,13 @@ void init_PTQ(py::module &m) { :type network: :py:class:`aidge_core.GraphView` )mydelimiter"); - m.def("compute_ranges", &computeRanges, py::arg("network"), py::arg("input_dataset"), py::arg("scaling_nodes_only"), py::arg("use_cuda"), + m.def("compute_ranges", &computeRanges, py::arg("network"), py::arg("calibration_set"), py::arg("scaling_nodes_only"), py::arg("use_cuda"), R"mydelimiter( Compute the activation ranges of every affine node, given an input dataset. :param network: The GraphView containing the affine nodes, on which the inferences are performed. :type network: :py:class:`aidge_core.GraphView` - :param input_dataset: The input dataset, consisting of a vector of input samples. - :type input_dataset: list of :py:class:`aidge_core.Tensor` + :param calibration_set: The input dataset, consisting of a vector of input samples. + :type calibration_set: list of :py:class:`aidge_core.Tensor` :param scaling_nodes_only: Whether to restrain the retreival of the ranges to scaling nodes only or not :type scaling_nodes_only: bool :return: A map associating each considered node name to it's corresponding output range. @@ -78,15 +78,15 @@ void init_PTQ(py::module &m) { :type value_ranges: list of float. )mydelimiter"); - m.def("quantize_normalized_network", &quantizeNormalizedNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("no_quantization")=false, py::arg("optimize_signs"), py::arg("verbose") = false, + m.def("quantize_normalized_network", &quantizeNormalizedNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("no_quant")=false, py::arg("optimize_signs"), py::arg("verbose") = false, R"mydelimiter( Quantize an already normalized (in term of parameters and activations) network. :param network: The GraphView to be quantized. :type network: :py:class:`aidge_core.GraphView` :param nb_bits: The desired number of bits of the quantization. :type nb_bits: int - :param apply_rounding: Whether to apply the rounding operations or not. - :type apply_rounding: bool + :param no_quant: Whether to apply the rounding operations or not. + :type no_quant: bool :param optimize_signs: Whether to take account of the IO signs of the operators or not. :type optimize_signs: bool :param verbose: Whether to print the sign map or not. @@ -105,21 +105,21 @@ void init_PTQ(py::module &m) { :type single_shift: bool )mydelimiter"); - m.def("quantize_network", &quantizeNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("target_type"), py::arg("clipping_mode") = Clipping::MAX , py::arg("no_quantization") = false, py::arg("optimize_signs") = false, py::arg("single_shift") = false, py::arg("use_cuda") = false, py::arg("fold_graph") = true, py::arg("verbose") = false, + m.def("quantize_network", &quantizeNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_set"), py::arg("target_type"), py::arg("clipping_mode") = Clipping::MAX , py::arg("no_quant") = false, py::arg("optimize_signs") = false, py::arg("single_shift") = false, py::arg("use_cuda") = false, py::arg("fold_graph") = true, py::arg("verbose") = false, R"mydelimiter( Main quantization routine. Performs every step of the quantization pipeline. :param network: The GraphView to be quantized. :type network: :py:class:`aidge_core.GraphView` :param nb_bits: The desired number of bits of the quantization. :type nb_bits: int - :param input_dataset: The input dataset used for the activations calibration. - :type input_dataset: list of :py:class:`aidge_core.Tensor` + :param calibration_set: The input dataset used for the activations calibration. + :type calibration_set: list of :py:class:`aidge_core.Tensor` :param target_type: The desired data-type of the outputed GraphView. :type target_type: :py:class:`aidge_core.DataType` :param clipping_mode: Type of the clipping optimization. Can be either 'MAX', 'MSE', 'AA' or 'KL'. :type clipping_mode: :py:class:`aidge_quantization.Clipping` - :param no_quantization: Whether to apply the rounding operations or not. - :type no_quantization: bool + :param no_quant: Whether to apply the rounding operations or not. + :type no_quant: bool :param optimize_signs: Whether to take account of the IO signs of the operators or not. :type optimize_signs: bool :param single_shift: Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes parameters. @@ -132,7 +132,7 @@ void init_PTQ(py::module &m) { :type verbose: bool )mydelimiter"); - m.def("compute_histograms", &computeHistograms, py::arg("value_ranges"), py::arg("nb_bins"), py::arg("network"), py::arg("input_dataset"), py::arg("use_cuda"), + m.def("compute_histograms", &computeHistograms, py::arg("value_ranges"), py::arg("nb_bins"), py::arg("network"), py::arg("calibration_set"), py::arg("use_cuda"), R"mydelimiter( Compute the histograms of the activations of each node contained in the map of the ranges (passed as argument). :param value_ranges: A map associating each considered node name to its corresponding output range. @@ -141,8 +141,8 @@ void init_PTQ(py::module &m) { :type nb_bins: int :param network: The GraphView containing the considered nodes. :type network: :py:class:`aidge_core.GraphView` - :param input_dataset: The input dataset, consisting of a list of input samples. - :type input_dataset: list of :py:class:`aidge_core.Tensor` + :param calibration_set: The input dataset, consisting of a list of input samples. + :type calibration_set: list of :py:class:`aidge_core.Tensor` :return: A map associating each node name to it's corresponding activation histogram. :rtype: dict )mydelimiter"); @@ -171,7 +171,7 @@ void init_PTQ(py::module &m) { :rtype: float )mydelimiter"); - m.def("adjust_ranges", &adjustRanges, py::arg("clipping_mode"), py::arg("value_ranges"), py::arg("nb_bits"), py::arg("network"), py::arg("input_dataset"), py::arg("use_cuda"), py::arg("verbose") = false, + m.def("adjust_ranges", &adjustRanges, py::arg("clipping_mode"), py::arg("value_ranges"), py::arg("nb_bits"), py::arg("network"), py::arg("calibration_set"), py::arg("use_cuda"), py::arg("verbose") = false, R"mydelimiter( Return a corrected map of the provided activation ranges. To do so compute the optimal clipping values for every node and multiply the input ranges by those values. @@ -184,8 +184,8 @@ void init_PTQ(py::module &m) { :type nb_bits: int :param network: The GraphView containing the considered nodes. :type network: :py:class:`aidge_core.GraphView` - :param input_dataset: The input dataset, consisting of a list of input samples. - :type input_dataset: list of :py:class:`aidge_core.Tensor` + :param calibration_set: The input dataset, consisting of a list of input samples. + :type calibration_set: list of :py:class:`aidge_core.Tensor` :param verbose: Whether to print the clipping values or not. :type verbose: bool :return: The corrected map associating to each provided node its clipped range. diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp index c54bfb07eb303b3a5aacad45914179b7aa10245e..41a1d24ff76e30d81d078f111f636ed5f3f97eca 100644 --- a/src/PTQ/Clipping.cpp +++ b/src/PTQ/Clipping.cpp @@ -19,7 +19,7 @@ namespace Aidge { -std::unordered_map<std::shared_ptr<Node>, std::vector<int>> computeHistograms(std::unordered_map<std::shared_ptr<Node>, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda) +std::unordered_map<std::shared_ptr<Node>, std::vector<int>> computeHistograms(std::unordered_map<std::shared_ptr<Node>, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> calibrationSet, bool useCuda) { if (useCuda) graphView->setBackend("cuda"); @@ -47,7 +47,7 @@ std::unordered_map<std::shared_ptr<Node>, std::vector<int>> computeHistograms(st int it = 0; - for (std::shared_ptr<Tensor> inputTensor : inputDataSet) + for (std::shared_ptr<Tensor> inputTensor : calibrationSet) { Log::debug(" IT (BIS) : {}", it++); @@ -197,7 +197,7 @@ double computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) } -std::unordered_map<std::shared_ptr<Node>, double> adjustRanges(Clipping clippingMode, std::unordered_map<std::shared_ptr<Node>, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose) +std::unordered_map<std::shared_ptr<Node>, double> adjustRanges(Clipping clippingMode, std::unordered_map<std::shared_ptr<Node>, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> calibrationSet, bool useCuda, bool verbose) { double clipping = 1.0f; @@ -208,7 +208,7 @@ std::unordered_map<std::shared_ptr<Node>, double> adjustRanges(Clipping clipping if (verbose) Log::info(" === CLIPPING VALUES === "); - std::unordered_map<std::shared_ptr<Node>, std::vector<int>> histograms = computeHistograms(valueRanges, nbBins, graphView, inputDataSet, useCuda); + std::unordered_map<std::shared_ptr<Node>, std::vector<int>> histograms = computeHistograms(valueRanges, nbBins, graphView, calibrationSet, useCuda); for (std::shared_ptr<Node> node : graphView->getNodes()) { diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index a99dc179c35a785fc1a66fb92fe681cd641dacf1..52edce0a1cad3453bf7dc0ba3f7ec2de0590bf47 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -714,7 +714,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) } } -std::unordered_map<std::shared_ptr<Node>, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda) +std::unordered_map<std::shared_ptr<Node>, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> calibrationSet, bool scalingNodesOnly, bool useCuda) { std::unordered_map<std::shared_ptr<Node>, double> valueRanges; std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); @@ -733,7 +733,7 @@ std::unordered_map<std::shared_ptr<Node>, double> computeRanges(std::shared_ptr< int it = 0; - for (std::shared_ptr<Tensor> sample : inputDataSet) + for (std::shared_ptr<Tensor> sample : calibrationSet) { //Log::info(" IT : {}", it++); @@ -1251,7 +1251,7 @@ static void printScalingFactors(std::shared_ptr<GraphView> graphView) } } -static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType dataType) +static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> calibrationSet, DataType dataType) { graphView->setDataType(dataType); @@ -1262,11 +1262,11 @@ static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std: inputTensor->setDataType(dataType); } - for (auto tensor : inputDataSet) + for (auto tensor : calibrationSet) tensor->setDataType(dataType); } -void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType targetType, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool foldGraph, bool verbose) +void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> calibrationSet, DataType targetType, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool foldGraph, bool verbose) { Log::notice(" === QUANT PTQ 0.2.21 === "); @@ -1275,8 +1275,8 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, if (!checkArchitecture(graphView)) return; - DataType initialDataType = (inputDataSet[0])->dataType(); - setupDataType(graphView, inputDataSet, DataType::Float64); + DataType initialDataType = (calibrationSet[0])->dataType(); + setupDataType(graphView, calibrationSet, DataType::Float64); Log::notice(" Preparing the network for the PTQ ... "); prepareNetwork(graphView); @@ -1291,10 +1291,10 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, normalizeParameters(graphView); Log::notice(" Computing the value ranges ..."); - std::unordered_map<std::shared_ptr<Node>, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda); + std::unordered_map<std::shared_ptr<Node>, double> valueRanges = computeRanges(graphView, calibrationSet, true, useCuda); Log::notice(" Optimizing the clipping values ..."); - valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose); + valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, calibrationSet, useCuda, verbose); Log::notice(" Normalizing the activations ..."); normalizeActivations(graphView, valueRanges); diff --git a/src/operator/PTQMetaOps.cpp b/src/operator/PTQMetaOps.cpp index 0f59d842a9b70ab14bea798d4a19d2938899ab0e..450880d2fd8d553192146e611e6a084abbf73eb7 100644 --- a/src/operator/PTQMetaOps.cpp +++ b/src/operator/PTQMetaOps.cpp @@ -45,7 +45,7 @@ static void addAttr(std::shared_ptr<Aidge::Node> node, std::string attr, double node->attributes()->addAttr("quantization.ptq." + attr, value); } -// XXX TODO : rework this +// TODO : rework this static void copyDynamicAttributes(std::shared_ptr<Aidge::Node> prevNode, std::shared_ptr<Aidge::Node> newNode) { if (hasAttr(prevNode, "isProducerQuantizer")) @@ -66,10 +66,9 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, const std::string& name) scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); // TODO : the above could be replaced by : -/* - std::shared_ptr<Node> scalingFactorProducer = Producer(scalingFactorTensor); - scalingFactorProducer->addChild(mulNode, 0, 1); -*/ + // std::shared_ptr<Node> scalingFactorProducer = Producer(scalingFactorTensor); + // scalingFactorProducer->addChild(mulNode, 0, 1); + // create the graphView ... std::shared_ptr<GraphView> graphView = Sequential({mulNode}); @@ -132,13 +131,13 @@ void appendRoundClip(std::shared_ptr<Node>& quantizer, double clipMin, double cl // append round - auto roundNode = Round(); + auto roundNode = Round(quantizer->name() + "_RoundQuant"); outputNode->addChild(roundNode, 0, 0); microGraph->add(roundNode); // append clip - auto clipNode = Clip(); + auto clipNode = Clip(quantizer->name() + "_ClipQuant"); auto minTensor = std::make_shared<Tensor>(clipMin); auto minNode = Producer(minTensor); @@ -310,10 +309,10 @@ void replaceScalingWithBitShift(std::shared_ptr<Node>& quantizer) // create the replacement bit-shift nodes - auto bitShiftNode = BitShift(bitShiftDirection, bitShiftRounding, ""); // XXX add a name !!! + auto bitShiftNode = BitShift(bitShiftDirection, bitShiftRounding, quantizer->name() + "_BitShiftQuant"); auto bitShiftTensor = std::make_shared<Tensor>(Array1D<int, 1> {bitShiftAmount}); - auto bitShiftProducer = Producer(bitShiftTensor, ""); // XXX add a name !!! + auto bitShiftProducer = Producer(bitShiftTensor, "bitShiftAmount"); bitShiftProducer->addChild(bitShiftNode, 0, 1); // edit the micrograph @@ -349,8 +348,8 @@ void castQuantizerIOs(std::shared_ptr<Node>& quantizer, Aidge::DataType external auto mulOp = std::static_pointer_cast<OperatorTensor> (mulNode->getOperator()); auto internalType = mulOp->getOutput(0)->dataType(); - auto castInputNode = Cast(internalType, ""); // add a name ! - auto castOutputNode = Cast(externalType, ""); // add a name ! + auto castInputNode = Cast(internalType, quantizer->name() + "_CastIn"); + auto castOutputNode = Cast(externalType, quantizer->name() + "_CastOut"); microGraph = Sequential({castInputNode, microGraph, castOutputNode});