Skip to content
Snippets Groups Projects
Commit 7307439f authored by Benjamin Halimi's avatar Benjamin Halimi
Browse files

refactor the LSQ code

parent e6d14185
No related branches found
No related tags found
2 merge requests!54Update 0.3.1 -> 0.4.0,!36Global Quantization Improvements
......@@ -29,7 +29,6 @@ namespace QuantLSQ {
*/
void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits);
void devLSQ(std::shared_ptr<Tensor> tensor);
} // namespace QuantLSQ
} // namespace Aidge
......
......@@ -23,11 +23,6 @@ void init_QAT_LSQ(py::module &m) {
auto mQuantLSQ = m.def_submodule("lsq");
mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits"));
mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor"));
//mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data"));
}
} // namespace Aidge
......@@ -21,25 +21,50 @@
#include "aidge/graph/Matching.hpp"
#include "aidge/recipes/QuantRecipes.hpp"
namespace Aidge {
namespace Aidge
{
static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
{
//std::cout << " GET TENSOR ABS MEAN " << std::endl;
auto valueTensor = (*tensor).abs().mean();
std::shared_ptr<Tensor> fallback;
const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu");
return localTensor.get<float>(0);
}
static float getTensorStd(std::shared_ptr<Tensor> tensor)
{
auto valueTensor = (*tensor);
auto skewedTensor = valueTensor - valueTensor.mean();
auto squaredTensor = skewedTensor * skewedTensor;
auto varianceTensor = squaredTensor.mean();
std::shared_ptr<Tensor> fallback;
auto localTensor = varianceTensor.refCastFrom(fallback, DataType::Float32, "cpu");
float variance = localTensor.get<float>(0);
return std::sqrt(variance);
}
// INIT THE STEP SIZE OF A QUANTIZER NODE
static bool initStepSize(std::shared_ptr<Node> quantizer)
{
const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator());
float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0));
// This formula is the one proposed in the paper ...
// float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0));
// float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second));
float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second));
// .. but this formula seems to work better !!!
float inputStd = getTensorStd(quantizerOp->getInput(0));
float stepSize = 8.0f * (inputStd / (quantizerOp->range().second));
auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
......@@ -56,8 +81,6 @@ static bool initStepSize(std::shared_ptr<Node> quantizer)
return false;
}
// INPUT QUANTIZERS INSERTION
static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
{
const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
......@@ -137,207 +160,9 @@ static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nb
void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
{
sanitizeNodeNames(graphView);
setupInputQuantizers(graphView, nbBits);
setupParamQuantizers(graphView, nbBits);
}
void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor)
{
float mean = (tensor->mean()).get<float> (0);
std::cout << " MEAN = " << mean << std::endl;
}
}
/*
namespace Aidge {
static void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize)
{
const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
for (const auto& match : matches)
{
auto linearNode = match.graph->rootNode();
std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1};
// INPUT QUANTIZERS INSERTION
// TODO : double check this, and use createUniqueName()
auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView);
auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName);
// Set the step size
auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator();
auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
inputStepSizeOp->setOutput(0, inputStepSizeTensor);
// Absorb the ReLU when possible ...
// XXX is this safe ???
bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]);
// bool nodeHasParent = (linearNode->getParents().size() != 0);
if (nodeHasParent) {
auto parentNode = linearNode->getParents()[0];
if (parentNode->type() == "ReLU") {
auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator());
inputQuantizerOp->range() = unsignedRange;
graphView->replace({parentNode}, {});
}
}
// We need to handle the case where the linear node is the first one ...
if (nodeHasParent) {
graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0);
} else {
inputQuantizerNode->addChild(graphView);
graphView->add(inputQuantizerNode);
}
// PARAM QUANTIZERS INSERTION
// TODO : double check this, and use createUniqueName()
auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);
auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName);
graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0);
// Set the step size
auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator();
auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
paramStepSizeOp->setOutput(0, paramStepSizeTensor);
}
}
static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
{
auto backend = tensor->backend();
if (backend == "cuda")
tensor->setBackend("cpu");
float acc = 0;
float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr());
for(std::size_t i = 0; i < tensor->size(); i++)
acc += std::abs(castedTensor[i]);
acc /= static_cast<float> (tensor->size());
if (backend == "cuda")
tensor->setBackend("cuda");
return acc;
}
static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda)
{
// Propagate the calibration tensor
SequentialScheduler scheduler(graphView);
scheduler.resetScheduling();
scheduler.forward(true, {calibrationData});
// Store the input tensor statistics
if (useCuda)
graphView->setBackend("cpu");
std::map<std::string, float> inputStats;
for (auto node : graphView->getNodes())
{
if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
{
const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
float inputAbsMean = getTensorAbsMean(op->getInput(0));
inputStats.insert(std::make_pair(node->name(), inputAbsMean));
std::cout << node->name() << " -> " << inputAbsMean << std::endl;
}
}
if (useCuda)
graphView->setBackend("cuda");
return inputStats;
}
static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda)
{
if (useCuda)
graphView->setBackend("cpu");
std::map<std::string, float> paramStats;
for (auto node : graphView->getNodes())
{
if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
{
const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
float paramAbsMean = getTensorAbsMean(op->getInput(1));
paramStats.insert(std::make_pair(node->name(), paramAbsMean));
std::cout << node->name() << " -> " << paramAbsMean << std::endl;
}
}
if (useCuda)
graphView->setBackend("cuda");
return paramStats;
}
static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats)
{
const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
for (const auto& match : matches)
{
auto linearNode = match.graph->rootNode();
// INPUT QUANTIZERS STEP-SIZES
auto inputQuantNode = linearNode->getParent(0);
auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator());
float absMean = inputStats[linearNode->name()];
float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second));
auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator();
// XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
inputStepSizeOp->setOutput(0, inputStepSizeTensor);
// PARAM QUANTIZERS STEP-SIZES
auto paramQuantNode = linearNode->getParent(1);
auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator());
absMean = paramStats[linearNode->name()];
stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second));
auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator();
// XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
paramStepSizeOp->setOutput(0, paramStepSizeTensor);
}
}
void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData)
{
bool useCuda = (calibrationData->backend() == "cuda");
// Collect the tensor statisics
auto inputStats = collectInputStats(graphView, calibrationData, useCuda);
auto paramStats = collectParamStats(graphView, useCuda);
// Insert the quantizers
insertQuantizers(graphView, nbBits, 1.0);
// Adjust the quantizers step-sizes
adjustQuantizersStepSizes(graphView, inputStats, paramStats);
}
}
*/
\ No newline at end of file
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment