Skip to content
Snippets Groups Projects
Commit 7307439f authored by Benjamin Halimi's avatar Benjamin Halimi
Browse files

refactor the LSQ code

parent e6d14185
No related branches found
No related tags found
2 merge requests!54Update 0.3.1 -> 0.4.0,!36Global Quantization Improvements
...@@ -29,7 +29,6 @@ namespace QuantLSQ { ...@@ -29,7 +29,6 @@ namespace QuantLSQ {
*/ */
void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits); void setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits);
void devLSQ(std::shared_ptr<Tensor> tensor);
} // namespace QuantLSQ } // namespace QuantLSQ
} // namespace Aidge } // namespace Aidge
......
...@@ -23,11 +23,6 @@ void init_QAT_LSQ(py::module &m) { ...@@ -23,11 +23,6 @@ void init_QAT_LSQ(py::module &m) {
auto mQuantLSQ = m.def_submodule("lsq"); auto mQuantLSQ = m.def_submodule("lsq");
mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits")); mQuantLSQ.def("setup_quantizers", &QuantLSQ::setupQuantizers, py::arg("network"), py::arg("nb_bits"));
mQuantLSQ.def("dev_lsq", &QuantLSQ::devLSQ, py::arg("tensor"));
//mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data"));
} }
} // namespace Aidge } // namespace Aidge
...@@ -21,25 +21,50 @@ ...@@ -21,25 +21,50 @@
#include "aidge/graph/Matching.hpp" #include "aidge/graph/Matching.hpp"
#include "aidge/recipes/QuantRecipes.hpp" #include "aidge/recipes/QuantRecipes.hpp"
namespace Aidge {
namespace Aidge
{
static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
{ {
//std::cout << " GET TENSOR ABS MEAN " << std::endl;
auto valueTensor = (*tensor).abs().mean(); auto valueTensor = (*tensor).abs().mean();
std::shared_ptr<Tensor> fallback; std::shared_ptr<Tensor> fallback;
const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu"); const Tensor& localTensor = valueTensor.refCastFrom(fallback, DataType::Float32, "cpu");
return localTensor.get<float>(0); return localTensor.get<float>(0);
} }
static float getTensorStd(std::shared_ptr<Tensor> tensor)
{
auto valueTensor = (*tensor);
auto skewedTensor = valueTensor - valueTensor.mean();
auto squaredTensor = skewedTensor * skewedTensor;
auto varianceTensor = squaredTensor.mean();
std::shared_ptr<Tensor> fallback;
auto localTensor = varianceTensor.refCastFrom(fallback, DataType::Float32, "cpu");
float variance = localTensor.get<float>(0);
return std::sqrt(variance);
}
// INIT THE STEP SIZE OF A QUANTIZER NODE // INIT THE STEP SIZE OF A QUANTIZER NODE
static bool initStepSize(std::shared_ptr<Node> quantizer) static bool initStepSize(std::shared_ptr<Node> quantizer)
{ {
const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator()); const auto quantizerOp = std::static_pointer_cast<LSQ_Op>(quantizer->getOperator());
float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0)); // This formula is the one proposed in the paper ...
// float inputAbsMean = getTensorAbsMean(quantizerOp->getInput(0));
// float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second));
float stepSize = 2.0f * (inputAbsMean / std::sqrt(quantizerOp->range().second)); // .. but this formula seems to work better !!!
float inputStd = getTensorStd(quantizerOp->getInput(0));
float stepSize = 8.0f * (inputStd / (quantizerOp->range().second));
auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})); auto stepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
...@@ -56,8 +81,6 @@ static bool initStepSize(std::shared_ptr<Node> quantizer) ...@@ -56,8 +81,6 @@ static bool initStepSize(std::shared_ptr<Node> quantizer)
return false; return false;
} }
// INPUT QUANTIZERS INSERTION
static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) static void setupInputQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
{ {
const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)"); const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
...@@ -137,207 +160,9 @@ static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nb ...@@ -137,207 +160,9 @@ static void setupParamQuantizers(std::shared_ptr<GraphView> graphView, size_t nb
void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits) void QuantLSQ::setupQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits)
{ {
sanitizeNodeNames(graphView);
setupInputQuantizers(graphView, nbBits); setupInputQuantizers(graphView, nbBits);
setupParamQuantizers(graphView, nbBits); setupParamQuantizers(graphView, nbBits);
} }
void QuantLSQ::devLSQ(std::shared_ptr<Tensor> tensor) }
{ \ No newline at end of file
float mean = (tensor->mean()).get<float> (0);
std::cout << " MEAN = " << mean << std::endl;
}
}
/*
namespace Aidge {
static void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize)
{
const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
for (const auto& match : matches)
{
auto linearNode = match.graph->rootNode();
std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1};
std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1};
// INPUT QUANTIZERS INSERTION
// TODO : double check this, and use createUniqueName()
auto inputQuantizerName = makeUniqueName(linearNode->name() + "_lsq_i", graphView);
auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName);
// Set the step size
auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator();
auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
inputStepSizeOp->setOutput(0, inputStepSizeTensor);
// Absorb the ReLU when possible ...
// XXX is this safe ???
bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]);
// bool nodeHasParent = (linearNode->getParents().size() != 0);
if (nodeHasParent) {
auto parentNode = linearNode->getParents()[0];
if (parentNode->type() == "ReLU") {
auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator());
inputQuantizerOp->range() = unsignedRange;
graphView->replace({parentNode}, {});
}
}
// We need to handle the case where the linear node is the first one ...
if (nodeHasParent) {
graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0);
} else {
inputQuantizerNode->addChild(graphView);
graphView->add(inputQuantizerNode);
}
// PARAM QUANTIZERS INSERTION
// TODO : double check this, and use createUniqueName()
auto paramQuantizerName = makeUniqueName(linearNode->name() + "_lsq_p", graphView);
auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName);
graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0);
// Set the step size
auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator();
auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
paramStepSizeOp->setOutput(0, paramStepSizeTensor);
}
}
static float getTensorAbsMean(std::shared_ptr<Tensor> tensor)
{
auto backend = tensor->backend();
if (backend == "cuda")
tensor->setBackend("cpu");
float acc = 0;
float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr());
for(std::size_t i = 0; i < tensor->size(); i++)
acc += std::abs(castedTensor[i]);
acc /= static_cast<float> (tensor->size());
if (backend == "cuda")
tensor->setBackend("cuda");
return acc;
}
static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData, bool useCuda)
{
// Propagate the calibration tensor
SequentialScheduler scheduler(graphView);
scheduler.resetScheduling();
scheduler.forward(true, {calibrationData});
// Store the input tensor statistics
if (useCuda)
graphView->setBackend("cpu");
std::map<std::string, float> inputStats;
for (auto node : graphView->getNodes())
{
if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
{
const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
float inputAbsMean = getTensorAbsMean(op->getInput(0));
inputStats.insert(std::make_pair(node->name(), inputAbsMean));
std::cout << node->name() << " -> " << inputAbsMean << std::endl;
}
}
if (useCuda)
graphView->setBackend("cuda");
return inputStats;
}
static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView, bool useCuda)
{
if (useCuda)
graphView->setBackend("cpu");
std::map<std::string, float> paramStats;
for (auto node : graphView->getNodes())
{
if (node->type() == "FC" || node->type() == "Conv2D") // TODO: use graph matching !!!
{
const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator());
float paramAbsMean = getTensorAbsMean(op->getInput(1));
paramStats.insert(std::make_pair(node->name(), paramAbsMean));
std::cout << node->name() << " -> " << paramAbsMean << std::endl;
}
}
if (useCuda)
graphView->setBackend("cuda");
return paramStats;
}
static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats)
{
const auto matches = SinglePassGraphMatching(graphView).match("(Conv2D#|FC#)");
for (const auto& match : matches)
{
auto linearNode = match.graph->rootNode();
// INPUT QUANTIZERS STEP-SIZES
auto inputQuantNode = linearNode->getParent(0);
auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator());
float absMean = inputStats[linearNode->name()];
float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second));
auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator();
// XXX inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
auto inputStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
inputStepSizeOp->setOutput(0, inputStepSizeTensor);
// PARAM QUANTIZERS STEP-SIZES
auto paramQuantNode = linearNode->getParent(1);
auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator());
absMean = paramStats[linearNode->name()];
stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second));
auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator();
// XXX paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}})));
auto paramStepSizeTensor = std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}));
paramStepSizeOp->setOutput(0, paramStepSizeTensor);
}
}
void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData)
{
bool useCuda = (calibrationData->backend() == "cuda");
// Collect the tensor statisics
auto inputStats = collectInputStats(graphView, calibrationData, useCuda);
auto paramStats = collectParamStats(graphView, useCuda);
// Insert the quantizers
insertQuantizers(graphView, nbBits, 1.0);
// Adjust the quantizers step-sizes
adjustQuantizersStepSizes(graphView, inputStats, paramStats);
}
}
*/
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment