From 64fbc46a258fcb5689f46d20da7c542bec9be1da Mon Sep 17 00:00:00 2001 From: cmoineau <cyril.moineau@cea.fr> Date: Mon, 23 Oct 2023 07:58:05 +0000 Subject: [PATCH] Update hook, add comment and fix minor compilation issues. --- aidge_quantization/__init__.py | 1 + aidge_quantization/unit_tests/test_ptq.py | 20 +- include/aidge/QuantPTQ.hpp | 1 - python_binding/pybind_QuantPTQ.cpp | 17 +- src/QuantPTQ.cpp | 324 +++++----------------- unit_tests/hook/Test_execTime.cpp | 8 +- unit_tests/hook/Test_outputRange.cpp | 8 +- 7 files changed, 84 insertions(+), 295 deletions(-) create mode 100644 aidge_quantization/__init__.py diff --git a/aidge_quantization/__init__.py b/aidge_quantization/__init__.py new file mode 100644 index 0000000..b00fae1 --- /dev/null +++ b/aidge_quantization/__init__.py @@ -0,0 +1 @@ +from aidge_quantization.aidge_quantization import * # import so generated by PyBind diff --git a/aidge_quantization/unit_tests/test_ptq.py b/aidge_quantization/unit_tests/test_ptq.py index e2e5180..519fb10 100644 --- a/aidge_quantization/unit_tests/test_ptq.py +++ b/aidge_quantization/unit_tests/test_ptq.py @@ -2,12 +2,9 @@ import aidge_core import aidge_backend_cpu import aidge_onnx +import aidge_quantization import numpy as np import matplotlib.pyplot as plt -from jinja2 import Environment, FileSystemLoader -import os -import pathlib -import onnx model = aidge_onnx.load_onnx("MNIST_model/LeNet_MNIST.onnx") @@ -29,18 +26,7 @@ input_node.get_operator().set_backend("cpu") ## Link Producer to the Graph input_node.add_child(model) -nodesRegexes = {} -nodesRegexes["Flatten"] = aidge_core.NodeRegex("Flatten") - -# Graph Regex -graphRegex = ["Flatten;"] - -graphMatching = aidge_core.GRegex(nodesRegexes, graphRegex) -all_match = graphMatching.match(model) -print('Number of match : ', all_match.get_nb_match()) - -for mn in all_match.get_match_nodes(): - aidge_core.remove_flatten(mn) +aidge_core.remove_flatten(model) model.save("my_supported_LeNet") @@ -73,4 +59,4 @@ for outNode in model.get_output_nodes(): #add PTQ part for testing #created ordered graph, as in cpp and do -#quantizeNetwork(ordered_graph_view, 8, verbose); \ No newline at end of file +aidge_quantization.quantizeNetwork(scheduler.get_static_scheduling(), 8, True) diff --git a/include/aidge/QuantPTQ.hpp b/include/aidge/QuantPTQ.hpp index e1efa9a..21c78f6 100644 --- a/include/aidge/QuantPTQ.hpp +++ b/include/aidge/QuantPTQ.hpp @@ -32,7 +32,6 @@ namespace Aidge{ float getCellThreshold(std::shared_ptr<Node> node); float getMaxParentsScaling(std::shared_ptr<Node> node); void rescaleParentsToScaling(std::shared_ptr<Node> node, std::unordered_map<std::string, long double>& scalingForCells, long double scaling); - long double quantizeFreeParams(std::shared_ptr<Node> node, std::size_t nbBits, std::unordered_map<std::string, long double> biasScalings, bool verbose); long double quantizeActivation(std::shared_ptr<Node> node, std::size_t nbBits, std::unordered_map<std::string, long double> biasScalings, std::unordered_map<std::string, long double> activationScalings, bool verbose); void quantizeNetwork(std::vector<std::shared_ptr<Node>> orderedGraphView, std::size_t nbBits, bool verbose); diff --git a/python_binding/pybind_QuantPTQ.cpp b/python_binding/pybind_QuantPTQ.cpp index c89605e..3722a86 100644 --- a/python_binding/pybind_QuantPTQ.cpp +++ b/python_binding/pybind_QuantPTQ.cpp @@ -15,20 +15,23 @@ #include <string> #include "aidge/QuantPTQ.hpp" +#include "aidge/hook/hook.hpp" namespace py = pybind11; namespace Aidge { void init_QuantPTQ(py::module &m) { m.def("quantize_network", &quantizeNetwork, py::arg("ordered_graph_view"), py::arg("nb_bits")=8, py::arg("verbose")=false, R"mydelimiter( - void quantizeNetwork(std::vector<std::shared_ptr<Node>> orderedGraphView, std::size_t nbBits, bool verbose){ - Parameters - ---------- - std::vector<std::shared_ptr<Node>> orderedGraphView: Ordered Graph View, - std::size_t nbBits : number of bits - bool verbose : print debug messages + Recipie to Fuse MatMul and Add operators into an :py:class:`aidge_core.FC` operator. + + :param ordered_graph_view: Graph view on which we want to apply the recipie + :type ordered_graph_view: List[:py:class:`aidge_core.Node`] + :param nb_bits: Number of bits to quantize on, default=8 + :type nb_bits: int, optional + :param verbose: if True, log informations, default=False + :type verbose: bool, optional )mydelimiter"); - + } PYBIND11_MODULE(aidge_quantization, m) { diff --git a/src/QuantPTQ.cpp b/src/QuantPTQ.cpp index 7704183..528f2b5 100644 --- a/src/QuantPTQ.cpp +++ b/src/QuantPTQ.cpp @@ -18,6 +18,7 @@ #include <cmath> #include <cstdint> #include <unordered_map> +#include <iostream> #include "aidge/QuantPTQ.hpp" //using namespace Aidge; @@ -29,31 +30,23 @@ //namespace Aidge_HELPER{ namespace Aidge{ -//getMaxParentsScaling -long double getMaxParentsScaling(std::shared_ptr<Node> node, +long double getMaxParentsScaling(std::shared_ptr<Node> node, std::unordered_map<std::string, long double>& scalingForCells){ long double maxParentsScaling = 0.0; - //std::cout << " inside getMaxParentsScaling " << std::endl; - const std::vector<std::shared_ptr<Node>> parentsNodes = node->getParents(); - //std::cout << "after parentsNodes " << std::endl; - //std::cout << "size of parents nodes = " << parentsNodes.size() << std::endl; + const std::vector<std::shared_ptr<Node>> parentsNodes = node->getParents(); for(const std::shared_ptr<Node>& parentNode: parentsNodes) { - //std::cout << " parent node type = " << parentNode->type() << std::endl; - //std::cout << " parent node name = " << parentNode->name() << std::endl; - const long double parentScaling = (parentNode->type()=="Producer" || scalingForCells.empty())?1.0:scalingForCells.at(parentNode->name()); - //std::cout << " parentScaling = " << parentScaling << std::endl; + const long double parentScaling = ( + parentNode->type()=="Producer" || scalingForCells.empty()) ? 1.0 : scalingForCells.at(parentNode->name()); maxParentsScaling = std::max(maxParentsScaling, parentScaling); - //std::cout << " maxParentsScaling = " << maxParentsScaling << std::endl; assert(parentScaling > 0.0); } - return maxParentsScaling; } //rescaleParentsToScaling -void rescaleParentsToScaling(std::shared_ptr<Node> node, +void rescaleParentsToScaling(std::shared_ptr<Node> node, std::unordered_map<std::string, long double>& scalingForCells, long double scaling) { @@ -74,10 +67,10 @@ void rescaleParentsToScaling(std::shared_ptr<Node> node, //std::cout << "parentScaling = " << parentScaling << std::endl; //std::cout << "scaling = " << scaling << std::endl; - + assert(parentScaling < scaling); - //IF parentScaling is smaller than scaling + //IF parentScaling is smaller than scaling //create scaling operator and add it to the graphView /* std::shared_ptr<Node> scaling_op = Scaling(scaling, parentCell->name()+"rescale_branch"); @@ -85,7 +78,7 @@ void rescaleParentsToScaling(std::shared_ptr<Node> node, scaling_op->getOperator()->setBackend("cpu"); */ - //TODO : get all graphviews node is attached to and insert a scaling cell before it. + //TODO : get all graphviews node is attached to and insert a scaling cell before it. /* void Aidge::GraphView::insert(Node & newNode, Node & inNode, std::initializer_list<Node> outNodes, @@ -99,11 +92,11 @@ void rescaleParentsToScaling(std::shared_ptr<Node> node, //N2D2 version /* auto scalingCell = Registrar<ScalingCell>::create<Float_T>(getCellModelType(*parentCell)) - (mDeepNet, - mDeepNet.generateNewCellName(parentCell->getName() + "_rescale_branch"), - parentCell->getNbOutputs(), + (mDeepNet, + mDeepNet.generateNewCellName(parentCell->getName() + "_rescale_branch"), + parentCell->getNbOutputs(), Scaling::floatingPointScaling( - std::vector<Float_T>(parentCell->getNbOutputs(), + std::vector<Float_T>(parentCell->getNbOutputs(), parentScaling/scaling), false, std::vector<Float_T>(0.0f)) ); @@ -112,71 +105,17 @@ void rescaleParentsToScaling(std::shared_ptr<Node> node, } } -//N2D2 version -/* -std::unordered_map<std::string, long double> N2D2::DeepNetQuantization::quantizeFreeParemeters(std::size_t nbBits) { -#ifdef VERBOSE_QUANT - std::cout << " Quantizing free parameters:" << std::endl; -#endif - - std::unordered_map<std::string, long double> biasScalings; - - std::vector<std::vector<std::string>> layers = mDeepNet.getLayers(); - for (auto itLayer = layers.begin() + 1; itLayer != layers.end(); ++itLayer) { - for (auto itCell = itLayer->begin(); itCell != itLayer->end(); ++itCell) { - std::shared_ptr<Cell> cell = mDeepNet.getCell(*itCell); - if(!cell) { - throw std::runtime_error("Invalid cell."); - } - - long double biasScaling = getMaxParentsScaling(cell, biasScalings); - rescaleParentsToScaling(cell, biasScalings, biasScaling); - - - const long double wQuantScaling = std::pow(2, nbBits - 1) - 1; - const long double bQuantScaling = DeepNetExport::isCellInputsUnsigned(*cell, mDeepNet)? - wQuantScaling*(std::pow(2, nbBits) - 1): - wQuantScaling*(std::pow(2, nbBits - 1) - 1); - - - const std::pair<Float_T, Float_T> wMinMax - = cell->getFreeParametersRange(Cell::Multiplicative); - const Float_T wScalingCell = Utils::max_abs(wMinMax.first, wMinMax.second); - if(wScalingCell != 0.0) { - cell->processFreeParameters([&](Float_T w) { return w*(wQuantScaling/wScalingCell); }, - Cell::Multiplicative); - - biasScaling *= wScalingCell; - } - - cell->processFreeParameters([&](Float_T b) { return b*(bQuantScaling/biasScaling); }, - Cell::Additive); - biasScalings[cell->getName()] = biasScaling; - -#ifdef VERBOSE_QUANT - std::cout << " - " << cell->getName() << ": " << biasScaling - << std::endl; -#endif - } - } - - fuseScalingCells(); - - return biasScalings; -} -*/ - long double quantizeFreeParams(std::shared_ptr<Node> node, std::size_t nbBits, std::unordered_map<std::string, long double> biasScalings, bool verbose){ - + long double biasScaling = getMaxParentsScaling(node, biasScalings); rescaleParentsToScaling(node, biasScalings, biasScaling); //weights are input 1 std::shared_ptr<Tensor> weight_tensor = node->getOperator()->getInput(1); - if(verbose){ + if(verbose){ printf("Weight init :\n"); - weight_tensor->print(); + weight_tensor->print(); } float max_value = 0.; @@ -188,7 +127,7 @@ long double quantizeFreeParams(std::shared_ptr<Node> node, std::size_t nbBits, s } const long double wQuantScaling = std::pow(2, nbBits - 1) - 1; - //find out if the input to the cell is signed/unsigned, n2d2 example : + //TODO :find out if the input to the cell is signed/unsigned, n2d2 example : //return cellFrame.getOutputsRange().first >= 0.0; /* virtual std::pair<double, double> getOutputsRange() const { @@ -210,15 +149,15 @@ long double quantizeFreeParams(std::shared_ptr<Node> node, std::size_t nbBits, s static_cast<float *>(quant_weight_tensor->getImpl()->rawPtr())[i] = std::round(static_cast<float *>(weight_tensor->getImpl()->rawPtr())[i]*wQuantScaling/max_value); } - if(verbose){ + if(verbose){ printf("Weight quantized :\n"); - quant_weight_tensor->print(); + quant_weight_tensor->print(); } std::shared_ptr<Tensor> bias_tensor = node->getOperator()->getInput(2); - if(verbose){ + if(verbose){ printf("Bias init :\n"); - bias_tensor->print(); + bias_tensor->print(); } std::shared_ptr<Tensor> quant_bias_tensor = std::static_pointer_cast<Tensor>(bias_tensor); @@ -226,9 +165,9 @@ long double quantizeFreeParams(std::shared_ptr<Node> node, std::size_t nbBits, s static_cast<float *>(quant_bias_tensor->getImpl()->rawPtr())[i] = static_cast<float *>(bias_tensor->getImpl()->rawPtr())[i]*bQuantScaling/biasScaling; } - if(verbose){ + if(verbose){ printf("Bias quantized :\n"); - quant_bias_tensor->print(); + quant_bias_tensor->print(); } //replace weights in the node @@ -239,155 +178,14 @@ long double quantizeFreeParams(std::shared_ptr<Node> node, std::size_t nbBits, s return biasScaling; } -//N2D2 version with Histogram -/* - double N2D2::DeepNetQuantization::getCellThreshold(const std::string& cellName, - const std::unordered_map<std::string, Histogram>& outputsHistogram, - const std::unordered_map<std::string, RangeStats>& outputsRange, - std::size_t nbBits, ClippingMode actClippingMode, double quantileValue) - { - switch(actClippingMode) { - case ClippingMode::KL_DIVERGENCE: - return outputsHistogram.at(cellName).calibrateKLDivergence(nbBits); - case ClippingMode::MSE: - return outputsHistogram.at(cellName).calibrateMSE(nbBits); - case ClippingMode::QUANTILE: - return outputsHistogram.at(cellName).getQuantileValue(quantileValue); - default: { - const auto& range = outputsRange.at(cellName); - return Utils::max_abs(range.minVal(), range.maxVal()); - } - } - } -*/ + float getCellThreshold(std::shared_ptr<Node> node){ float max_output = std::static_pointer_cast<OutputRange>(node->getOperator()->getHook("output_range"))->getOutput(0); return max_output; } -//N2D2 quantizeActivation method /* -void N2D2::DeepNetQuantization::quantizeActivations( - const std::unordered_map<std::string, Histogram>& outputsHistogram, - const std::unordered_map<std::string, RangeStats>& outputsRange, - std::unordered_map<std::string, long double>& biasScalings, - std::size_t nbBits, ClippingMode actClippingMode, double quantileValue) -{ -#ifdef VERBOSE_QUANT - std::cout << " Quantizing activations:" << std::endl; -#endif - - std::unordered_map<std::string, long double> activationScalings; - - std::vector<std::vector<std::string>> layers = mDeepNet.getLayers(); - for (auto itLayer = layers.begin() + 1; itLayer != layers.end(); ++itLayer) { - for (auto itCell = itLayer->begin(); itCell != itLayer->end(); ++itCell) { - std::shared_ptr<Cell> cell = mDeepNet.getCell(*itCell); - std::shared_ptr<Cell_Frame_Top> cellFrame = std::dynamic_pointer_cast<Cell_Frame_Top>(cell); - if(!cell || !cellFrame) { - throw std::runtime_error("Invalid cell."); - } - - const long double prevActivationScaling = getMaxParentsScaling(cell, activationScalings); - rescaleParentsToScaling(cell, activationScalings, prevActivationScaling); - - - long double activationScaling; - - const std::shared_ptr<Activation>& activation = cellFrame->getActivation(); - if(cell->getType() == ElemWiseCell::Type) { - activationScaling = getCellThreshold(cell->getName(), - outputsHistogram, outputsRange, - nbBits, ClippingMode::NONE); - } - else if(cell->getType() == PaddingCell::Type || - cell->getType() == PoolCell::Type || - cell->getType() == ResizeCell::Type || - cell->getType() == ScalingCell::Type || - cell->getType() == SoftmaxCell::Type || - cell->getType() == TransposeCell::Type || - cell->getType() == ReshapeCell::Type) - { - activationScalings[cell->getName()] = prevActivationScaling; - continue; - } - else if(activation) { - const bool clip = cell->getNbOutputs() > 2 && - (activation->getType() == RectifierActivation::Type || - activation->getType() == LinearActivation::Type || - activation->getType() == SaturationActivation::Type); - - - auto childrenCells = mDeepNet.getChildCells(cell->getName()); - const bool isNextCellMaxPool = childrenCells.size() == 1 && - childrenCells[0]->getType() == PoolCell::Type && - dynamic_cast<const PoolCell&>(*childrenCells[0]).getPooling() == PoolCell::Max; - - - const std::string cellStatsName = clip && isNextCellMaxPool?childrenCells[0]->getName(): - cell->getName(); - activationScaling = getCellThreshold(cellStatsName, - outputsHistogram, outputsRange, - nbBits, clip?actClippingMode:ClippingMode::NONE, quantileValue); - } - else { - throw std::runtime_error("Quantization of cell '" + cell->getName() + "' of type '" + - cell->getType() + "' is not supported yet."); - } - - const long double biasScaling = biasScalings.at(cell->getName()); - -#ifdef VERBOSE_QUANT - std::cout << " - " << cell->getName() << ": " - << "prev=" << prevActivationScaling - << ", act=" << activationScaling - << ", bias=" << biasScaling << std::endl; -#endif - - activationScaling /= biasScaling; - activationScaling = (activationScaling == 0.0)?1.0:activationScaling; - - activationScalings[cell->getName()] = activationScaling; - - cell->processFreeParameters([&](Float_T d) { return d/prevActivationScaling; }, - Cell::Additive); - - - const long double actQuantScaling = getActivationQuantizationScaling(*cell, nbBits); - auto scalingCell = Registrar<ScalingCell>::create<Float_T>(getCellModelType(*cell)) - (mDeepNet, - mDeepNet.generateNewCellName(cell->getName() + "_rescale_act"), - cell->getNbOutputs(), - Scaling::floatingPointScaling( - std::vector<Float_T>(cell->getNbOutputs(), - (prevActivationScaling/activationScaling)/actQuantScaling - ), - false, - std::vector<Float_T>(0.0f) - ) - ); - - mDeepNet.addCellAfter(scalingCell, cell); - - activationScalings[scalingCell->getName()] = activationScalings[cell->getName()]; - biasScalings[scalingCell->getName()] = biasScaling; - -#ifdef VERBOSE_QUANT - std::cout << " quant=" << actQuantScaling - << ", global scaling=" << Utils::cnotice << activationScaling - << Utils::cdef << " -> cell scaling=" << Utils::cwarning - << ((prevActivationScaling/activationScaling) - /actQuantScaling) - << Utils::cdef << std::endl; -#endif - } - } - - fuseScalingCells(); -} - - //TODO: add this !!! double N2D2::DeepNetQuantization::getActivationQuantizationScaling(const Cell& cell, std::size_t nbBits) const { const double unsignedMax = std::pow(2, nbBits) - 1; @@ -401,9 +199,9 @@ double N2D2::DeepNetQuantization::getActivationQuantizationScaling(const Cell& c } const std::string activationType = activation->getType(); - - if(activationType == LogisticActivation::Type || - activationType == LogisticActivation::TypeWithLoss) + + if(activationType == LogisticActivation::Type || + activationType == LogisticActivation::TypeWithLoss) { return 2*(DeepNetExport::isCellInputsUnsigned(cell, mDeepNet)? signedMax*unsignedMax/signedMax: @@ -425,63 +223,64 @@ double N2D2::DeepNetQuantization::getActivationQuantizationScaling(const Cell& c long double quantizeActivation(std::shared_ptr<Node> node, std::size_t /*nbBits*/, std::unordered_map<std::string, long double> biasScalings, std::unordered_map<std::string, long double> activationScalings, bool /*verbose*/){ const long double prevActivationScaling = getMaxParentsScaling(node, activationScalings); + rescaleParentsToScaling(node, activationScalings, prevActivationScaling); long double activationScaling; long double biasScaling = 1.0; if(node->type() == "ElemWise") { - /* - activationScaling = getCellThreshold(node->name(), - outputsHistogram, outputsRange, - nbBits, ClippingMode::NONE); - */ activationScaling = getCellThreshold(node); } - else if(node->type() == "Padding" || - node->type() == "Pool" || - node->type() == "Resize" || - node->type() == "Scaling" || - node->type() == "Softmax" || + else if(node->type() == "Padding" || + node->type() == "Pool" || + node->type() == "Resize" || + node->type() == "Scaling" || + node->type() == "Softmax" || node->type() == "Transpose" || node->type() == "Reshape") { return prevActivationScaling; } - else if(node->type() == "ReLU" - || node->type() == "Linear" + else if(node->type() == "ReLU" + || node->type() == "Linear" || node->type() == "Saturation") { - //TODO :: nbOutputs > 2 is relevant for clip, check it + //TODO :: nbOutputs > 2 is relevant for clip, check it //const bool clip = node->nbOutputs() > 2 && isLinearActivation; activationScaling = getCellThreshold(node); } else { - throw std::runtime_error("Quantization of cell '" + node->name() + "' of type '" + + throw std::runtime_error("Quantization of cell '" + node->name() + "' of type '" + node->type() + "' is not supported yet."); } //get the parent conv/fc bias - const std::vector<std::shared_ptr<Node>> parentsNodes = node->getParents(); + const std::vector<std::shared_ptr<Node>> parentsNodes = node->getParents(); for(const std::shared_ptr<Node>& parentNode: parentsNodes) { if(parentNode->type() == "Conv"){ biasScaling = biasScalings[parentNode->name()]; + }else{ + throw std::runtime_error("Quantization of activation with parent: '" + parentNode->type() + "' is not supported yet."); } } activationScaling /= biasScaling; - activationScaling = (activationScaling == 0.0)?1.0:activationScaling; + activationScaling = (activationScaling == 0.0) ? 1.0 : activationScaling; return activationScaling; } +// TODO: Give directly a scheduler object ? void quantizeNetwork(std::vector<std::shared_ptr<Node>> orderedGraphView, std::size_t nbBits, bool verbose){ //void quantizeNetwork(std::shared_ptr<GraphView> g1, std::size_t nbBits, bool verbose){ //keep all bias scalings here + // Map: node name <-> bias scaling std::unordered_map<std::string, long double> biasScalings; - //and activations + //and activations + // Map: node name <-> activation scaling std::unordered_map<std::string, long double> activationScalings; //loop on all nodes for weights/bias quantization @@ -493,28 +292,29 @@ void quantizeNetwork(std::vector<std::shared_ptr<Node>> orderedGraphView, std::s std::set<std::shared_ptr<Node>> childrenNodes = nodePtr->getChildren(); for (const std::shared_ptr<Node>& child_node : childrenNodes) { - printf("- type %s, and name %s \n", child_node->type().c_str(), child_node->name().c_str()); + printf("\t- type %s, and name %s \n", child_node->type().c_str(), child_node->name().c_str()); } const std::vector<std::shared_ptr<Node>> parentsNodes = nodePtr->getParents(); printf("And %zu parents: \n", parentsNodes.size()); for(const std::shared_ptr<Node>& parent_node: parentsNodes) { - printf("- name %s : \n", parent_node->name().c_str()); + printf("\t- name %s : \n", parent_node->name().c_str()); } } if (nodePtr->type() == "Conv") { + // TODO: pass biasScaling by reference and update it in quantizeFreeParams biasScalings[nodePtr->name()] = quantizeFreeParams(nodePtr, nbBits, biasScalings, verbose); - if(verbose){ + if(verbose){ std::cout << "outside quantizeFreeParams :: biasScalings[node->name()] = " << biasScalings[nodePtr->name()] << std::endl; } } else if(nodePtr->type() == "ReLU") { activationScalings[nodePtr->name()] = quantizeActivation(nodePtr, nbBits, biasScalings, activationScalings, verbose); - if(verbose){ + if(verbose){ std::cout << "outside quantizeActivation :: activationScalings[node->name()] = " << activationScalings[nodePtr->name()] << std::endl; } - //TODO : - //correct bias for previous activation scaling here ... + //TODO : + //correct bias for previous activation scaling here ... /* cell->processFreeParameters([&](Float_T d) { return d/prevActivationScaling; }, Cell::Additive); @@ -524,18 +324,18 @@ void quantizeNetwork(std::vector<std::shared_ptr<Node>> orderedGraphView, std::s scaling_node->getOperator()->setDatatype(DataType::Float32); scaling_node->getOperator()->setBackend("cpu"); - for (auto& graphPtr : nodePtr->views()) { + for (auto& graphPtr : nodePtr->views()) { // TODO : use maxence/thibualt insert ! graphPtr->addChild(scaling_node); } //add scaling cell /* const long double actQuantScaling = getActivationQuantizationScaling(*cell, nbBits); auto scalingCell = Registrar<ScalingCell>::create<Float_T>(getCellModelType(*cell)) - (mDeepNet, - mDeepNet.generateNewCellName(cell->getName() + "_rescale_act"), - cell->getNbOutputs(), + (mDeepNet, + mDeepNet.generateNewCellName(cell->getName() + "_rescale_act"), + cell->getNbOutputs(), Scaling::floatingPointScaling( - std::vector<Float_T>(cell->getNbOutputs(), + std::vector<Float_T>(cell->getNbOutputs(), (prevActivationScaling/activationScaling)/actQuantScaling ), false, @@ -552,13 +352,13 @@ void quantizeNetwork(std::vector<std::shared_ptr<Node>> orderedGraphView, std::s } else{ std::cout << "this is not Conv or ReLu, moving on" << std::endl; - } + } } - + // TODO : add fuseScaling recipes ? } } -//TODO : -//Scaling cell insert missing - for now it's a problem for activation quantization, as in simple cases there is no need to additional rescaling in rescaleParentsToScaling ... -//activation is no more attached to cell, now it's the child of conv/fc - quantize only after Relu/? +//TODO : +//Scaling cell insert missing - for now it's a problem for activation quantization, as in simple cases there is no need to additional rescaling in rescaleParentsToScaling ... +//activation is no more attached to cell, now it's the child of conv/fc - quantize only after Relu/? //Figure out how to check the range of operator : need to add the function to every possible operator ? diff --git a/unit_tests/hook/Test_execTime.cpp b/unit_tests/hook/Test_execTime.cpp index a9fb8bd..c71d69f 100644 --- a/unit_tests/hook/Test_execTime.cpp +++ b/unit_tests/hook/Test_execTime.cpp @@ -67,7 +67,7 @@ TEST_CASE("[hook] ExecTime(forward)") { } }; Tensor myBias = Array1D<float,4> {{7.,0.,9.,0.}}; - Tensor myInput = Array4D<float,2,3,5,5> { + Tensor myInput = Array4D<float,2,3,5,5> { { { {{ 0., 1., 2., 3., 4.}, @@ -109,7 +109,7 @@ TEST_CASE("[hook] ExecTime(forward)") { } } }; - Tensor myOutput = Array4D<float,2,4,3,3> { + Tensor myOutput = Array4D<float,2,4,3,3> { { { {{ 15226., 15577., 15928.}, @@ -147,7 +147,7 @@ TEST_CASE("[hook] ExecTime(forward)") { myConv1->getOperator()->computeOutputDims(); myConv1->getOperator()->addHook("execution_time"); myConv1->forward(); - + //std::static_pointer_cast<Tensor>(myConv1->getOperator()->getOutput(0))->print(); assert(*std::static_pointer_cast<Tensor>(myConv1->getOperator()->getOutput(0)) == myOutput); //std::static_pointer_cast<Tensor>(myConv1->getOperator()->getInput(1))->print(); @@ -156,4 +156,4 @@ TEST_CASE("[hook] ExecTime(forward)") { const std::time_t t_c = std::chrono::system_clock::to_time_t(time_conv); //std::cout << "the execution time of the module was " << std::put_time(std::localtime(&t_c), "%F %T.\n") << std::flush; -} \ No newline at end of file +} diff --git a/unit_tests/hook/Test_outputRange.cpp b/unit_tests/hook/Test_outputRange.cpp index 18916e9..9a7de72 100644 --- a/unit_tests/hook/Test_outputRange.cpp +++ b/unit_tests/hook/Test_outputRange.cpp @@ -66,7 +66,7 @@ TEST_CASE("[hook] OutputRange(forward)") { } }; Tensor myBias = Array1D<float,4> {{7.,0.,9.,0.}}; - Tensor myInput = Array4D<float,2,3,5,5> { + Tensor myInput = Array4D<float,2,3,5,5> { { { {{ 0., 1., 2., 3., 4.}, @@ -108,7 +108,7 @@ TEST_CASE("[hook] OutputRange(forward)") { } } }; - Tensor myOutput = Array4D<float,2,4,3,3> { + Tensor myOutput = Array4D<float,2,4,3,3> { { { {{ 15226., 15577., 15928.}, @@ -146,11 +146,11 @@ TEST_CASE("[hook] OutputRange(forward)") { myConv1->getOperator()->computeOutputDims(); myConv1->getOperator()->addHook("output_range"); myConv1->forward(); - + //std::static_pointer_cast<Tensor>(myConv->getOperator()->getOutput(0))->print(); assert(*std::static_pointer_cast<Tensor>(myConv1->getOperator()->getOutput(0)) == myOutput); float max_output = std::static_pointer_cast<OutputRange>(myConv1->getOperator()->getHook("output_range"))->getOutput(0); //std::cout << "the output of the conv was " << max_output << std::flush; -} \ No newline at end of file +} -- GitLab