Skip to content
Snippets Groups Projects
Commit 67057f5b authored by Benjamin Halimi's avatar Benjamin Halimi
Browse files

minor changes

parent 225f380b
No related branches found
No related tags found
2 merge requests!54Update 0.3.1 -> 0.4.0,!50Enhancement : Quantizer only PTQ
Pipeline #70994 failed
......@@ -33,10 +33,10 @@ namespace Aidge
* @param valueRanges A map associating each considered node name to its corresponding output range.
* @param nbBins Desired number of bins of the returned histograms.
* @param graphView The GraphView containing the considered nodes.
* @param inputDataSet The input dataset, consisting of a vector of input samples.
* @param calibrationSet The calibration dataset, consisting of a vector of input samples.
* @return A map associating each node name to it's corresponding activation histogram.
*/
std::unordered_map<std::shared_ptr<Node>, std::vector<int>> computeHistograms(std::unordered_map<std::shared_ptr<Node>, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda);
std::unordered_map<std::shared_ptr<Node>, std::vector<int>> computeHistograms(std::unordered_map<std::shared_ptr<Node>, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> calibrationSet, bool useCuda);
/**
* @brief Given an input activation histogram, compute the optimal clipping value in the sense of the Lp norm.
......@@ -63,11 +63,11 @@ namespace Aidge
* @param valueRanges The map associating each affine node to its output range.
* @param nbBits The quantization number of bits.
* @param graphView The GraphView containing the considered nodes.
* @param inputDataSet The input dataset, consisting of a vector of input samples.
* @param calibrationSet The calibration dataset, consisting of a vector of input samples.
* @param verbose Whether to print the clipping values or not.
* @return The corrected map associating each provided node to its clipped range.
*/
std::unordered_map<std::shared_ptr<Node>, double> adjustRanges(Clipping clippingMode, std::unordered_map<std::shared_ptr<Node>, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose);
std::unordered_map<std::shared_ptr<Node>, double> adjustRanges(Clipping clippingMode, std::unordered_map<std::shared_ptr<Node>, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> calibrationSet, bool useCuda, bool verbose);
}
......
......@@ -131,11 +131,11 @@ namespace Aidge {
/**
* @brief Compute the activation ranges of every affine node, given an input dataset.
* @param graphView The GraphView containing the affine nodes, on which the inferences are performed.
* @param inputDataSet The input dataset, consisting of a vector of input samples.
* @param calibrationSet The calibration dataset, consisting of a vector of input samples.
* @param scalingNodesOnly Whether to restrain the retreival of the ranges to scaling nodes only or not.
* @return A map associating each affine node name to it's corresponding output range.
*/
std::unordered_map<std::shared_ptr<Node>, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda);
std::unordered_map<std::shared_ptr<Node>, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> calibrationSet, bool scalingNodesOnly, bool useCuda);
/**
* @brief Normalize the activations of each affine node so that they fit in the [-1:1] range.
......@@ -179,7 +179,7 @@ namespace Aidge {
* @brief Main quantization routine. Performs every step of the quantization pipeline.
* @param graphView The GraphView to be quantized.
* @param nbBits The desired number of bits of the quantization.
* @param inputDataSet The input dataset used for the activations calibration.
* @param calibrationSet The calibration dataset used for the activations calibration.
* @param targetType The desired data-type of the outputed GraphView.
* @param clippingMode: Type of the clipping optimization. Can be either 'MAX', 'MSE', 'AA' or 'KL'.
* @param noQuant Whether to apply the rounding operations or not.
......@@ -189,7 +189,7 @@ namespace Aidge {
* @param foldGraph Whether to fold the parameter quantizers after the quantization or not.
* @param verbose Whether to print internal informations about the quantization process or not.
*/
void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType targetType, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool foldGraph, bool verbose);
void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> calibrationSet, DataType targetType, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool foldGraph, bool verbose);
/**
......
......@@ -55,13 +55,13 @@ void init_PTQ(py::module &m) {
:type network: :py:class:`aidge_core.GraphView`
)mydelimiter");
m.def("compute_ranges", &computeRanges, py::arg("network"), py::arg("input_dataset"), py::arg("scaling_nodes_only"), py::arg("use_cuda"),
m.def("compute_ranges", &computeRanges, py::arg("network"), py::arg("calibration_set"), py::arg("scaling_nodes_only"), py::arg("use_cuda"),
R"mydelimiter(
Compute the activation ranges of every affine node, given an input dataset.
:param network: The GraphView containing the affine nodes, on which the inferences are performed.
:type network: :py:class:`aidge_core.GraphView`
:param input_dataset: The input dataset, consisting of a vector of input samples.
:type input_dataset: list of :py:class:`aidge_core.Tensor`
:param calibration_set: The input dataset, consisting of a vector of input samples.
:type calibration_set: list of :py:class:`aidge_core.Tensor`
:param scaling_nodes_only: Whether to restrain the retreival of the ranges to scaling nodes only or not
:type scaling_nodes_only: bool
:return: A map associating each considered node name to it's corresponding output range.
......@@ -78,15 +78,15 @@ void init_PTQ(py::module &m) {
:type value_ranges: list of float.
)mydelimiter");
m.def("quantize_normalized_network", &quantizeNormalizedNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("no_quantization")=false, py::arg("optimize_signs"), py::arg("verbose") = false,
m.def("quantize_normalized_network", &quantizeNormalizedNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("no_quant")=false, py::arg("optimize_signs"), py::arg("verbose") = false,
R"mydelimiter(
Quantize an already normalized (in term of parameters and activations) network.
:param network: The GraphView to be quantized.
:type network: :py:class:`aidge_core.GraphView`
:param nb_bits: The desired number of bits of the quantization.
:type nb_bits: int
:param apply_rounding: Whether to apply the rounding operations or not.
:type apply_rounding: bool
:param no_quant: Whether to apply the rounding operations or not.
:type no_quant: bool
:param optimize_signs: Whether to take account of the IO signs of the operators or not.
:type optimize_signs: bool
:param verbose: Whether to print the sign map or not.
......@@ -105,21 +105,21 @@ void init_PTQ(py::module &m) {
:type single_shift: bool
)mydelimiter");
m.def("quantize_network", &quantizeNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("target_type"), py::arg("clipping_mode") = Clipping::MAX , py::arg("no_quantization") = false, py::arg("optimize_signs") = false, py::arg("single_shift") = false, py::arg("use_cuda") = false, py::arg("fold_graph") = true, py::arg("verbose") = false,
m.def("quantize_network", &quantizeNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_set"), py::arg("target_type"), py::arg("clipping_mode") = Clipping::MAX , py::arg("no_quant") = false, py::arg("optimize_signs") = false, py::arg("single_shift") = false, py::arg("use_cuda") = false, py::arg("fold_graph") = true, py::arg("verbose") = false,
R"mydelimiter(
Main quantization routine. Performs every step of the quantization pipeline.
:param network: The GraphView to be quantized.
:type network: :py:class:`aidge_core.GraphView`
:param nb_bits: The desired number of bits of the quantization.
:type nb_bits: int
:param input_dataset: The input dataset used for the activations calibration.
:type input_dataset: list of :py:class:`aidge_core.Tensor`
:param calibration_set: The input dataset used for the activations calibration.
:type calibration_set: list of :py:class:`aidge_core.Tensor`
:param target_type: The desired data-type of the outputed GraphView.
:type target_type: :py:class:`aidge_core.DataType`
:param clipping_mode: Type of the clipping optimization. Can be either 'MAX', 'MSE', 'AA' or 'KL'.
:type clipping_mode: :py:class:`aidge_quantization.Clipping`
:param no_quantization: Whether to apply the rounding operations or not.
:type no_quantization: bool
:param no_quant: Whether to apply the rounding operations or not.
:type no_quant: bool
:param optimize_signs: Whether to take account of the IO signs of the operators or not.
:type optimize_signs: bool
:param single_shift: Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes parameters.
......@@ -132,7 +132,7 @@ void init_PTQ(py::module &m) {
:type verbose: bool
)mydelimiter");
m.def("compute_histograms", &computeHistograms, py::arg("value_ranges"), py::arg("nb_bins"), py::arg("network"), py::arg("input_dataset"), py::arg("use_cuda"),
m.def("compute_histograms", &computeHistograms, py::arg("value_ranges"), py::arg("nb_bins"), py::arg("network"), py::arg("calibration_set"), py::arg("use_cuda"),
R"mydelimiter(
Compute the histograms of the activations of each node contained in the map of the ranges (passed as argument).
:param value_ranges: A map associating each considered node name to its corresponding output range.
......@@ -141,8 +141,8 @@ void init_PTQ(py::module &m) {
:type nb_bins: int
:param network: The GraphView containing the considered nodes.
:type network: :py:class:`aidge_core.GraphView`
:param input_dataset: The input dataset, consisting of a list of input samples.
:type input_dataset: list of :py:class:`aidge_core.Tensor`
:param calibration_set: The input dataset, consisting of a list of input samples.
:type calibration_set: list of :py:class:`aidge_core.Tensor`
:return: A map associating each node name to it's corresponding activation histogram.
:rtype: dict
)mydelimiter");
......@@ -171,7 +171,7 @@ void init_PTQ(py::module &m) {
:rtype: float
)mydelimiter");
m.def("adjust_ranges", &adjustRanges, py::arg("clipping_mode"), py::arg("value_ranges"), py::arg("nb_bits"), py::arg("network"), py::arg("input_dataset"), py::arg("use_cuda"), py::arg("verbose") = false,
m.def("adjust_ranges", &adjustRanges, py::arg("clipping_mode"), py::arg("value_ranges"), py::arg("nb_bits"), py::arg("network"), py::arg("calibration_set"), py::arg("use_cuda"), py::arg("verbose") = false,
R"mydelimiter(
Return a corrected map of the provided activation ranges.
To do so compute the optimal clipping values for every node and multiply the input ranges by those values.
......@@ -184,8 +184,8 @@ void init_PTQ(py::module &m) {
:type nb_bits: int
:param network: The GraphView containing the considered nodes.
:type network: :py:class:`aidge_core.GraphView`
:param input_dataset: The input dataset, consisting of a list of input samples.
:type input_dataset: list of :py:class:`aidge_core.Tensor`
:param calibration_set: The input dataset, consisting of a list of input samples.
:type calibration_set: list of :py:class:`aidge_core.Tensor`
:param verbose: Whether to print the clipping values or not.
:type verbose: bool
:return: The corrected map associating to each provided node its clipped range.
......
......@@ -19,7 +19,7 @@
namespace Aidge
{
std::unordered_map<std::shared_ptr<Node>, std::vector<int>> computeHistograms(std::unordered_map<std::shared_ptr<Node>, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda)
std::unordered_map<std::shared_ptr<Node>, std::vector<int>> computeHistograms(std::unordered_map<std::shared_ptr<Node>, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> calibrationSet, bool useCuda)
{
if (useCuda)
graphView->setBackend("cuda");
......@@ -47,7 +47,7 @@ std::unordered_map<std::shared_ptr<Node>, std::vector<int>> computeHistograms(st
int it = 0;
for (std::shared_ptr<Tensor> inputTensor : inputDataSet)
for (std::shared_ptr<Tensor> inputTensor : calibrationSet)
{
Log::debug(" IT (BIS) : {}", it++);
......@@ -197,7 +197,7 @@ double computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits)
}
std::unordered_map<std::shared_ptr<Node>, double> adjustRanges(Clipping clippingMode, std::unordered_map<std::shared_ptr<Node>, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose)
std::unordered_map<std::shared_ptr<Node>, double> adjustRanges(Clipping clippingMode, std::unordered_map<std::shared_ptr<Node>, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> calibrationSet, bool useCuda, bool verbose)
{
double clipping = 1.0f;
......@@ -208,7 +208,7 @@ std::unordered_map<std::shared_ptr<Node>, double> adjustRanges(Clipping clipping
if (verbose)
Log::info(" === CLIPPING VALUES === ");
std::unordered_map<std::shared_ptr<Node>, std::vector<int>> histograms = computeHistograms(valueRanges, nbBins, graphView, inputDataSet, useCuda);
std::unordered_map<std::shared_ptr<Node>, std::vector<int>> histograms = computeHistograms(valueRanges, nbBins, graphView, calibrationSet, useCuda);
for (std::shared_ptr<Node> node : graphView->getNodes())
{
......
......@@ -714,7 +714,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView)
}
}
std::unordered_map<std::shared_ptr<Node>, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda)
std::unordered_map<std::shared_ptr<Node>, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> calibrationSet, bool scalingNodesOnly, bool useCuda)
{
std::unordered_map<std::shared_ptr<Node>, double> valueRanges;
std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes();
......@@ -733,7 +733,7 @@ std::unordered_map<std::shared_ptr<Node>, double> computeRanges(std::shared_ptr<
int it = 0;
for (std::shared_ptr<Tensor> sample : inputDataSet)
for (std::shared_ptr<Tensor> sample : calibrationSet)
{
//Log::info(" IT : {}", it++);
......@@ -1251,7 +1251,7 @@ static void printScalingFactors(std::shared_ptr<GraphView> graphView)
}
}
static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType dataType)
static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> calibrationSet, DataType dataType)
{
graphView->setDataType(dataType);
......@@ -1262,11 +1262,11 @@ static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std:
inputTensor->setDataType(dataType);
}
for (auto tensor : inputDataSet)
for (auto tensor : calibrationSet)
tensor->setDataType(dataType);
}
void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType targetType, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool foldGraph, bool verbose)
void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> calibrationSet, DataType targetType, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool foldGraph, bool verbose)
{
Log::notice(" === QUANT PTQ 0.2.21 === ");
......@@ -1275,8 +1275,8 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
if (!checkArchitecture(graphView))
return;
DataType initialDataType = (inputDataSet[0])->dataType();
setupDataType(graphView, inputDataSet, DataType::Float64);
DataType initialDataType = (calibrationSet[0])->dataType();
setupDataType(graphView, calibrationSet, DataType::Float64);
Log::notice(" Preparing the network for the PTQ ... ");
prepareNetwork(graphView);
......@@ -1291,10 +1291,10 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits,
normalizeParameters(graphView);
Log::notice(" Computing the value ranges ...");
std::unordered_map<std::shared_ptr<Node>, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda);
std::unordered_map<std::shared_ptr<Node>, double> valueRanges = computeRanges(graphView, calibrationSet, true, useCuda);
Log::notice(" Optimizing the clipping values ...");
valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose);
valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, calibrationSet, useCuda, verbose);
Log::notice(" Normalizing the activations ...");
normalizeActivations(graphView, valueRanges);
......
......@@ -45,7 +45,7 @@ static void addAttr(std::shared_ptr<Aidge::Node> node, std::string attr, double
node->attributes()->addAttr("quantization.ptq." + attr, value);
}
// XXX TODO : rework this
// TODO : rework this
static void copyDynamicAttributes(std::shared_ptr<Aidge::Node> prevNode, std::shared_ptr<Aidge::Node> newNode)
{
if (hasAttr(prevNode, "isProducerQuantizer"))
......@@ -66,10 +66,9 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, const std::string& name)
scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor);
// TODO : the above could be replaced by :
/*
std::shared_ptr<Node> scalingFactorProducer = Producer(scalingFactorTensor);
scalingFactorProducer->addChild(mulNode, 0, 1);
*/
// std::shared_ptr<Node> scalingFactorProducer = Producer(scalingFactorTensor);
// scalingFactorProducer->addChild(mulNode, 0, 1);
// create the graphView ...
std::shared_ptr<GraphView> graphView = Sequential({mulNode});
......@@ -132,13 +131,13 @@ void appendRoundClip(std::shared_ptr<Node>& quantizer, double clipMin, double cl
// append round
auto roundNode = Round();
auto roundNode = Round(quantizer->name() + "_RoundQuant");
outputNode->addChild(roundNode, 0, 0);
microGraph->add(roundNode);
// append clip
auto clipNode = Clip();
auto clipNode = Clip(quantizer->name() + "_ClipQuant");
auto minTensor = std::make_shared<Tensor>(clipMin);
auto minNode = Producer(minTensor);
......@@ -310,10 +309,10 @@ void replaceScalingWithBitShift(std::shared_ptr<Node>& quantizer)
// create the replacement bit-shift nodes
auto bitShiftNode = BitShift(bitShiftDirection, bitShiftRounding, ""); // XXX add a name !!!
auto bitShiftNode = BitShift(bitShiftDirection, bitShiftRounding, quantizer->name() + "_BitShiftQuant");
auto bitShiftTensor = std::make_shared<Tensor>(Array1D<int, 1> {bitShiftAmount});
auto bitShiftProducer = Producer(bitShiftTensor, ""); // XXX add a name !!!
auto bitShiftProducer = Producer(bitShiftTensor, "bitShiftAmount");
bitShiftProducer->addChild(bitShiftNode, 0, 1);
// edit the micrograph
......@@ -349,8 +348,8 @@ void castQuantizerIOs(std::shared_ptr<Node>& quantizer, Aidge::DataType external
auto mulOp = std::static_pointer_cast<OperatorTensor> (mulNode->getOperator());
auto internalType = mulOp->getOutput(0)->dataType();
auto castInputNode = Cast(internalType, ""); // add a name !
auto castOutputNode = Cast(externalType, ""); // add a name !
auto castInputNode = Cast(internalType, quantizer->name() + "_CastIn");
auto castOutputNode = Cast(externalType, quantizer->name() + "_CastOut");
microGraph = Sequential({castInputNode, microGraph, castOutputNode});
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment