Skip to content
Snippets Groups Projects

Quantization Polish

Merged Benjamin Halimi requested to merge bhalimi/aidge_quantization:DevPTQ into main
5 files
+ 409
159
Compare changes
  • Side-by-side
  • Inline
Files
5
+ 61
4
@@ -23,15 +23,72 @@
namespace Aidge {
void clearBiases(std::shared_ptr<GraphView> graphView);
void insertResidualNodes(std::shared_ptr<GraphView> graphView);
/**
* @brief Determine whether an input GraphView can be quantized or not.
* @param graphView The GraphView to be checked.
* @return True if the GraphView can be quantized, else false.
*/
bool checkArchitecture(std::shared_ptr<GraphView> graphView);
/**
* @brief Insert a scaling node after each affine node of the GraphView.
* @param graphView The GraphView containing the affine nodes.
*/
void insertScalingNodes(std::shared_ptr<GraphView> graphView);
/**
* @brief Normalize the parameters of each parametrized node, so that they fit in the [-1:1] range.
* @param graphView The GraphView containing the parametrized nodes.
*/
void normalizeParameters(std::shared_ptr<GraphView> graphView);
/**
* @brief Compute the value ranges of every affine node output, given an input dataset.
* @param graphView The GraphView containing the affine nodes, on which the inferences are performed.
* @param inputDataSet The input dataset, consisting of a vector of input samples.
* @return A map associating each affine node name to it's corresponding output range.
*/
std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet);
/**
* @brief Normalize the activations of each affine node so that it become equal to one.
* This is done by reconfiguring the scaling nodes, as well as rescaling the weights and biases tensors.
* @param graphView The GraphView containing the affine nodes.
* @param inputDataSet The input dataset on which the value ranges are computed.
*/
void normalizeActivations(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet);
void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits);
void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet);
/**
* @brief Quantize an already normalized (in term of parameters and activations) network.
* @param graphView The GraphView to be quantized.
* @param nbBits The desired number of bits of the quantization.
*/
void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, int nbBits);
/**
* @brief Main quantization routine. Performs every step of the quantization pipeline.
* @param graphView The GraphView to be quantized.
* @param nbBits The desired number of bits of the quantization.
* @param inputDataSet The input dataset on which the value ranges are computed.
*/
void quantizeNetwork(std::shared_ptr<GraphView> graphView, int nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet);
/**
* @brief Compute the weight ranges of every affine node. Provided for debuging purposes.
* @param graphView The GraphView containing the affine nodes.
* @return A map associating each affine node name to it's corresponding weight range.
*/
std::map<std::string, float> getWeightRanges(std::shared_ptr<GraphView> graphView);
/**
* @brief Clear the affine nodes biases. Provided form debuging purposes.
* @param graphView The GraphView containing the affine nodes.
*/
void clearBiases(std::shared_ptr<GraphView> graphView);
void devPTQ(std::shared_ptr<GraphView> graphView);
}
#endif /* AIDGE_QUANTIZATION_QUANTPTQ_H_ */
Loading