diff --git a/aidge_quantization/unit_tests/test_ptq.py b/aidge_quantization/unit_tests/test_ptq.py index dfdedd8394913c0b205bbb1084b4dfb3c95b24a3..56080bff0d1f4a95248fa983316dbafd35565501 100644 --- a/aidge_quantization/unit_tests/test_ptq.py +++ b/aidge_quantization/unit_tests/test_ptq.py @@ -21,7 +21,7 @@ ACCURACIES = (95.4, 94.4) # (97.9, 97.7) NB_BITS = 4 # -------------------------------------------------------------- -# UTILS +# UTILS # -------------------------------------------------------------- def propagate(model, scheduler, sample): @@ -50,7 +50,7 @@ def compute_accuracy(model, samples, labels): # -------------------------------------------------------------- class test_ptq(unittest.TestCase): - + def setUp(self): # load the samples / labels (numpy) @@ -70,19 +70,20 @@ class test_ptq(unittest.TestCase): def tearDown(self): pass - + def test_model(self): Log.set_console_level(Level.Info) # compute the base accuracy accuracy = compute_accuracy(self.model, self.samples[0:NB_SAMPLES], self.labels) self.assertAlmostEqual(accuracy * 100, ACCURACIES[0], msg='base accuracy does not meet the baseline !', delta=0.1) - + def test_quant_model(self): - Log.set_console_level(Level.Info) + Log.set_console_level(Level.Debug) # create the calibration dataset + tensors = [] for sample in self.samples[0:NB_SAMPLES]: sample = prepare_sample(sample) @@ -91,14 +92,13 @@ class test_ptq(unittest.TestCase): # quantize the model - aidge_quantization.quantize_network( - self.model, - NB_BITS, - tensors, - clipping_mode=aidge_quantization.Clipping.MSE, + self.model, + NB_BITS, + tensors, + clipping_mode=aidge_quantization.Clipping.MSE, no_quantization=False, - optimize_signs=True, + optimize_signs=True, single_shift=False ) diff --git a/include/aidge/operator/FixedQ.hpp b/include/aidge/operator/FixedQ.hpp index 96a52b4592bc05f34a47e04e664df27847a48e85..3d46dcfacc59e98ae193a9238a9474c6df015b7d 100644 --- a/include/aidge/operator/FixedQ.hpp +++ b/include/aidge/operator/FixedQ.hpp @@ -9,11 +9,12 @@ * ********************************************************************************/ -#ifndef AIDGE_CORE_OPERATOR_FIXEDQ_H_ -#define AIDGE_CORE_OPERATOR_FIXEDQ_H_ +#ifndef AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_ +#define AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_ -#include <cassert> +#include <cstddef> // std::size_t #include <memory> +#include <string> #include <vector> #include "aidge/backend/OperatorImpl.hpp" @@ -21,8 +22,8 @@ #include "aidge/operator/OperatorTensor.hpp" #include "aidge/utils/ErrorHandling.hpp" #include "aidge/utils/Registrar.hpp" -#include "aidge/utils/Types.h" #include "aidge/utils/StaticAttributes.hpp" +#include "aidge/utils/Types.h" namespace Aidge { @@ -43,24 +44,20 @@ private: public: - FixedQ_Op(std::size_t nbBits, float span, bool isOutputUnsigned) : - OperatorTensor(Type, {InputCategory::Data}, 1), - mAttributes(std::make_shared<Attributes_>(attr<FixedQAttr::NbBits>(nbBits), attr<FixedQAttr::Span>(span), attr<FixedQAttr::IsOutputUnsigned>(isOutputUnsigned))) + FixedQ_Op(std::size_t nbBits = 8, float span = 4.0f, bool isOutputUnsigned = false) : + OperatorTensor(Type, {InputCategory::Data}, 1), + mAttributes(std::make_shared<Attributes_>( + attr<FixedQAttr::NbBits>(nbBits), + attr<FixedQAttr::Span>(span), + attr<FixedQAttr::IsOutputUnsigned>(isOutputUnsigned))) {} /** - * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). + * @brief Copy-constructor. Copy the operator attributes and its output + * tensor(s), but not its input tensors (the new operator has no input associated). * @param op Operator to copy. */ - FixedQ_Op(const FixedQ_Op& op) - : OperatorTensor(op), mAttributes(op.mAttributes) - { - if (op.mImpl){ - SET_IMPL_MACRO(FixedQ_Op, *this, op.backend()); - }else{ - mImpl = nullptr; - } - } + FixedQ_Op(const FixedQ_Op& op); /** * @brief Clone the operator using its copy-constructor. @@ -88,14 +85,16 @@ public: }; -inline std::shared_ptr<Node> FixedQ(std::size_t nbBits = 8, float span = 4.0f, bool isOutputUnsigned = false, const std::string& name = "") { - return std::make_shared<Node>(std::make_shared<FixedQ_Op>(nbBits, span, isOutputUnsigned), name); -} -} +std::shared_ptr<Node> FixedQ(std::size_t nbBits = 8, + float span = 4.0f, + bool isOutputUnsigned = false, + const std::string& name = ""); + +} // namespace Aidge namespace { template <> const char* const EnumStrings<Aidge::FixedQAttr>::data[] = {"nb_bits", "span", "is_output_unsigned"}; } -#endif /* AIDGE_CORE_OPERATOR_FIXEDQ_H_ */ +#endif /* AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_ */ diff --git a/include/aidge/operator/LSQ.hpp b/include/aidge/operator/LSQ.hpp index eb266bc4fe21820faa5d1cc9d843251236c041b9..970c476cb7be18b8d001edb27d60079de85b9349 100644 --- a/include/aidge/operator/LSQ.hpp +++ b/include/aidge/operator/LSQ.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CORE_OPERATOR_LSQ_H_ -#define AIDGE_CORE_OPERATOR_LSQ_H_ +#ifndef AIDGE_QUANTIZATION_OPERATOR_LSQ_H_ +#define AIDGE_QUANTIZATION_OPERATOR_LSQ_H_ #include <cassert> #include <memory> @@ -105,4 +105,4 @@ template <> const char *const EnumStrings<Aidge::LSQAttr>::data[] = {"range"}; } -#endif /* AIDGE_CORE_OPERATOR_LSQ_H_ */ +#endif /* AIDGE_QUANTIZATION_OPERATOR_LSQ_H_ */ diff --git a/include/aidge/operator/SAT/DoReFa.hpp b/include/aidge/operator/SAT/DoReFa.hpp index 92ce1677b1b28e303c8488b55dd00cfafb519457..d168c38bf4f21a64f0007f2f65b0dfc4820d8297 100644 --- a/include/aidge/operator/SAT/DoReFa.hpp +++ b/include/aidge/operator/SAT/DoReFa.hpp @@ -9,17 +9,15 @@ * ********************************************************************************/ -#ifndef AIDGE_CORE_OPERATOR_DOREFA_H_ -#define AIDGE_CORE_OPERATOR_DOREFA_H_ +#ifndef AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_ +#define AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_ -#include <cassert> #include <memory> #include <vector> #include "aidge/backend/OperatorImpl.hpp" #include "aidge/graph/Node.hpp" #include "aidge/operator/OperatorTensor.hpp" -#include "aidge/utils/ErrorHandling.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/StaticAttributes.hpp" #include "aidge/utils/Types.h" @@ -43,12 +41,17 @@ public: static const std::string Type; private: - using Attributes_ = StaticAttributes<DoReFaAttr, size_t, DoReFaMode>; + using Attributes_ = StaticAttributes<DoReFaAttr, std::size_t, DoReFaMode>; template <DoReFaAttr e> using attr = typename Attributes_::template attr<e>; const std::shared_ptr<Attributes_> mAttributes; public: - DoReFa_Op(size_t range = 255, DoReFaMode mode = DoReFaMode::Default) + /** + * @brief Constructor for DoReFa_Op + * @param range The quantization range (default: 255) + * @param mode The quantization mode (default: Default) + */ + DoReFa_Op(std::size_t range = 255, DoReFaMode mode = DoReFaMode::Default) : OperatorTensor(Type, {InputCategory::Param}, 1), mAttributes(std::make_shared<Attributes_>( attr<DoReFaAttr::Range>(range), @@ -59,30 +62,34 @@ public: * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). * @param op Operator to copy. */ - DoReFa_Op(const DoReFa_Op& op) - : OperatorTensor(op), - mAttributes(op.mAttributes) - { - if (op.mImpl){ - SET_IMPL_MACRO(DoReFa_Op, *this, op.backend()); - }else{ - mImpl = nullptr; - } - } + DoReFa_Op(const DoReFa_Op& op); /** * @brief Clone the operator using its copy-constructor. * @see Operator::DoReFa_Op + * @return std::shared_ptr<Operator> A deep copy of the operator */ - std::shared_ptr<Operator> clone() const override { - return std::make_shared<DoReFa_Op>(*this); - } + std::shared_ptr<Operator> clone() const override; + /** + * @brief Get available backends for this operator + * @return std::set<std::string> Set of supported backend names + */ std::set<std::string> getAvailableBackends() const override final; + + /** + * @brief Set the backend for this operator + * @param name Backend name + * @param device Device index (default: 0) + */ void setBackend(const std::string& name, DeviceIdx_t device = 0) override final; + /** + * @brief Get operator attributes + * @return std::shared_ptr<Attributes> Shared pointer to operator attributes + */ inline std::shared_ptr<Attributes> attributes() const override { return mAttributes; } - inline size_t& range() const noexcept { return mAttributes->getAttr<DoReFaAttr::Range>(); } + inline std::size_t& range() const noexcept { return mAttributes->getAttr<DoReFaAttr::Range>(); } inline DoReFaMode& mode() const noexcept { return mAttributes->getAttr<DoReFaAttr::Mode>(); } static const std::vector<std::string> getInputsName(){ @@ -93,10 +100,20 @@ public: } }; -inline std::shared_ptr<Node> DoReFa(size_t range = 255, DoReFaMode mode = DoReFaMode::Default, const std::string& name = "") { - return std::make_shared<Node>(std::make_shared<DoReFa_Op>(range, mode), name); -} -} +/** + * @brief Factory function to create a DoReFa operator node + * + * @param range Quantization range (default: 255) + * @param mode Quantization mode (default: Default) + * @param name Node name (default: empty) + * + * @return std::shared_ptr<Node> Shared pointer to the created node + */ +std::shared_ptr<Node> DoReFa(std::size_t range = 255, + DoReFaMode mode = DoReFaMode::Default, + const std::string& name = ""); + +} // namespace Aidge namespace { template <> @@ -106,4 +123,4 @@ template <> const char *const EnumStrings<Aidge::DoReFaMode>::data[] = {"default", "symmetric", "asymmetric", "full_range"}; } -#endif /* AIDGE_CORE_OPERATOR_DOREFA_H_ */ +#endif /* AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_ */ diff --git a/include/aidge/operator/SAT/TanhClamp.hpp b/include/aidge/operator/SAT/TanhClamp.hpp index def43b872c021e539efe5658b592ceec9b3b5d4d..9d99d7024905332ff7336c62aaaa14d09c51e6d1 100644 --- a/include/aidge/operator/SAT/TanhClamp.hpp +++ b/include/aidge/operator/SAT/TanhClamp.hpp @@ -9,20 +9,18 @@ * ********************************************************************************/ -#ifndef AIDGE_CORE_OPERATOR_TANHCLAMP_H_ -#define AIDGE_CORE_OPERATOR_TANHCLAMP_H_ +#ifndef AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_ +#define AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_ -#include <cassert> #include <memory> +#include <set> +#include <string> #include <vector> #include "aidge/backend/OperatorImpl.hpp" #include "aidge/graph/Node.hpp" #include "aidge/operator/OperatorTensor.hpp" -#include "aidge/operator/Producer.hpp" -#include "aidge/utils/ErrorHandling.hpp" #include "aidge/utils/Registrar.hpp" -#include "aidge/utils/StaticAttributes.hpp" #include "aidge/utils/Types.h" namespace Aidge { @@ -44,23 +42,13 @@ public: * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). * @param op Operator to copy. */ - TanhClamp_Op(const TanhClamp_Op& op) - : OperatorTensor(op) - { - if (op.mImpl){ - SET_IMPL_MACRO(TanhClamp_Op, *this, op.backend()); - }else{ - mImpl = nullptr; - } - } + TanhClamp_Op(const TanhClamp_Op& op); /** * @brief Clone the operator using its copy-constructor. * @see Operator::TanhClamp_Op */ - std::shared_ptr<Operator> clone() const override { - return std::make_shared<TanhClamp_Op>(*this); - } + std::shared_ptr<Operator> clone() const override; bool forwardDims(bool allowDataDependency = false) override final; std::set<std::string> getAvailableBackends() const override final; @@ -75,9 +63,8 @@ public: } }; -inline std::shared_ptr<Node> TanhClamp(const std::string& name = "") { - return std::make_shared<Node>(std::make_shared<TanhClamp_Op>(), name); -} -} +std::shared_ptr<Node> TanhClamp(const std::string& name = ""); + +} // namespace Aidge -#endif /* AIDGE_CORE_OPERATOR_TANHCLAMP_H_ */ +#endif /* AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_ */ diff --git a/include/aidge/quantization/PTQ/CLE.hpp b/include/aidge/quantization/PTQ/CLE.hpp index 77eaf7ff36168add1f4e815cf318286ec82cc046..f4dc073ee5ed02799a75505a2dc0a3a519e66548 100644 --- a/include/aidge/quantization/PTQ/CLE.hpp +++ b/include/aidge/quantization/PTQ/CLE.hpp @@ -9,29 +9,33 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_PTQ_CLE_H_ -#define AIDGE_QUANTIZATION_PTQ_CLE_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_ -//#include <cstdint> -//#include <map> -//#include <memory> -//#include <string> -//#include <vector> +#include <memory> -#include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" namespace Aidge { /** - * @brief Equalize the ranges of the nodes parameters by proceding iteratively. - * Can only be applied to single branch networks (otherwise does not edit the graphView). + * @brief Equalize the ranges of the nodes parameters by proceding iteratively. + * Can only be applied to single branch networks (otherwise does not edit the GraphView). + * + * Cross Layer Equalization (CLE) is used to balance the weights between consecutive + * layers to improve quantization performance. It works by iteratively scaling weights + * and biases of adjacent layers while preserving the overall function of the network. + * + * @note The operation modifies weights and biases in-place but preserves the mathematical + * function computed by the network. + * * @param graphView The GraphView to process. - * @param targetDelta the stopping criterion (typical value : 0.01) + * @param targetDelta the stopping criterion (typical value : 0.01). Smaller values lead + * to more precise equalization but may require more iterations. */ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta = 0.01); -} +} // namespace Aidge -#endif /* AIDGE_QUANTIZATION_PTQ_CLE_H_ */ \ No newline at end of file +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_ */ diff --git a/include/aidge/quantization/PTQ/Clipping.hpp b/include/aidge/quantization/PTQ/Clipping.hpp index d0622f48be4f0cf6d39de60adb88d79ddd83e55b..3f65c42eb2032da10c4d337b53fb1bdd08a7aa55 100644 --- a/include/aidge/quantization/PTQ/Clipping.hpp +++ b/include/aidge/quantization/PTQ/Clipping.hpp @@ -9,14 +9,14 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_PTQ_CLIP_H_ -#define AIDGE_QUANTIZATION_PTQ_CLIP_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_ -//#include <cstdint> -//#include <map> -//#include <memory> -//#include <string> -//#include <vector> +#include <cstdint> // std::uint8_t +#include <map> +#include <memory> +#include <string> +#include <vector> #include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" @@ -56,9 +56,9 @@ namespace Aidge double computeKLClipping(std::vector<int> histogram, std::uint8_t nbBits); /** - * @brief Return a corrected map of the provided activation ranges. - * To do so compute the optimal clipping values for every node and multiply the input ranges by those values. - * The method used to compute the clippings can be eihter 'MSE', 'AA', 'KL' or 'MAX'. + * @brief Return a corrected map of the provided activation ranges. + * To do so compute the optimal clipping values for every node and multiply the input ranges by those values. + * The method used to compute the clippings can be eihter 'MSE', 'AA', 'KL' or 'MAX'. * @param clippingMode The method used to compute the optimal clippings. * @param valueRanges The map associating each affine node to its output range. * @param nbBits The quantization number of bits. @@ -71,5 +71,5 @@ namespace Aidge } -#endif /* AIDGE_QUANTIZATION_PTQ_CLIP_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_ */ diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index d2b8b7f78fccc15cf4afd598b02f0f7b391375e9..4fc38bc3b959ec8264ddaddbd4673fbe1f75e4ab 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -9,16 +9,19 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_PTQ_PTQ_H_ -#define AIDGE_QUANTIZATION_PTQ_PTQ_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_ -//#include <cstdint> -//#include <map> -//#include <memory> -//#include <string> -//#include <vector> +#include <cstdint> // std::uint8_t +#include <map> +#include <memory> +#include <set> +#include <string> +#include <utility> // std::pair +#include <vector> #include "aidge/data/Tensor.hpp" +#include "aidge/quantization/PTQ/Clipping.hpp" #include "aidge/graph/GraphView.hpp" namespace Aidge { @@ -104,12 +107,12 @@ namespace Aidge { * @brief Normalize the activations of each affine node so that they fit in the [-1:1] range. * This is done by reconfiguring the scaling nodes, as well as rescaling the weights and biases tensors. * @param graphView The GraphView containing the affine nodes. - * @param valueRanges The node output value ranges computed over the calibration dataset. + * @param valueRanges The node output value ranges computed over the calibration dataset. */ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges); /** - * @brief For each node, compute the sign of its input and output values. + * @brief For each node, compute the sign of its input and output values. * The goal of the routine is to maximize the number of unsigned IOs in order to double the value resolution when possible. * @param graphView The GraphView to analyze. * @param verbose Whether to print the sign map or not. @@ -135,7 +138,7 @@ namespace Aidge { * @param clippingMode: Type of the clipping optimization. Can be either 'MAX', 'MSE', 'AA' or 'KL'. * @param applyRounding Whether to apply the rounding operations or not. * @param optimizeSigns Whether to take account of the IO signs of the operators or not. - * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights. + * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights. * @param verbose Whether to print internal informations about the quantization process. */ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool applyRounding, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose); @@ -157,8 +160,8 @@ namespace Aidge { * @brief Developement and test routine. * @param graphView The GraphView under test. */ - void devPTQ(std::shared_ptr<GraphView> graphView); + void devPTQ(std::shared_ptr<GraphView> graphView); } -#endif /* AIDGE_QUANTIZATION_PTQ_PTQ_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_ */ diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/quantization/PTQ/PTQMetaOps.hpp index 62fac873235f2b89a242042de9260fc350ad6aa8..b9bad0d18f099e94d4c52254b08629c7f947db6a 100644 --- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp +++ b/include/aidge/quantization/PTQ/PTQMetaOps.hpp @@ -8,22 +8,14 @@ * SPDX-License-Identifier: EPL-2.0 * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ -#define AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_ -#include <array> #include <memory> #include <string> -#include <utility> - -#include "aidge/operator/Clip.hpp" -#include "aidge/operator/Mul.hpp" -#include "aidge/operator/Round.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/graph/Node.hpp" -#include "aidge/graph/OpArgs.hpp" // Sequential -#include "aidge/operator/MetaOperator.hpp" namespace Aidge { @@ -55,7 +47,7 @@ std::shared_ptr<Aidge::Node> Scaling(double scalingFactor, const std::string& na void updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor); /// @brief Retrieves the current scaling factor of a PTQ meta-operator node. -/// This function returns the scaling factor associated with the specified PTQ meta-operator node, +/// This function returns the scaling factor associated with the specified PTQ meta-operator node, /// allowing inspection of the current scalar applied in the [Mul] operation. /// /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor is being queried. @@ -66,7 +58,7 @@ double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode); /// This function modifies the clip range of a Quantizer node, allowing adjustment of the range within which values are clipped /// in the [Clip] operation of the Quantizer sequence. /// -/// @param QuantizerNode A shared pointer to the Quantizer node whose clip range is being set. +/// @param QuantizerNode A shared pointer to the Quantizer node whose clip range is being set. /// This node should have been created using the Quantizer function. /// @param min The minimum value for the clip range. Values below this will be clipped to this minimum. /// @param max The maximum value for the clip range. Values above this will be clipped to this maximum. @@ -75,4 +67,4 @@ void setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, double min, double } -#endif /* AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_ */ diff --git a/include/aidge/quantization/QAT/QAT_FixedQ.hpp b/include/aidge/quantization/QAT/QAT_FixedQ.hpp index ecbe7422ea85db1771d91e161c93740993ebbe2b..6a2aa249892d58fcbd5a45a8d7bb8de67effabaf 100644 --- a/include/aidge/quantization/QAT/QAT_FixedQ.hpp +++ b/include/aidge/quantization/QAT/QAT_FixedQ.hpp @@ -9,8 +9,10 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ -#define AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_ + +#include <memory> #include "aidge/graph/Node.hpp" #include "aidge/graph/GraphView.hpp" @@ -41,10 +43,10 @@ void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits * @brief Developement and test routine. * @param graphView The GraphView under test. */ -void devQAT(std::shared_ptr<GraphView> graphView); +void devQAT(std::shared_ptr<GraphView> graphView); } } -#endif /* AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_ */ diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp index 4970be07fae8737a1c2863600757bb81ff3a65f9..a44c71b04ca9e9c6a8fba27c615c99b4893d3d8c 100644 --- a/include/aidge/quantization/QAT/QAT_LSQ.hpp +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -9,12 +9,14 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_QAT_LSQ_H_ -#define AIDGE_QUANTIZATION_QAT_LSQ_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ + +#include <cstddef> // std::size_t +#include <memory> -#include "aidge/graph/Node.hpp" -#include "aidge/graph/GraphView.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/graph/GraphView.hpp" namespace Aidge { namespace QuantLSQ { @@ -25,7 +27,7 @@ namespace QuantLSQ { * @param nbBits Number of quantization bits. * @param span Fixed output span of the quantizers. */ -void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float step_size); +void insertQuantizers(std::shared_ptr<GraphView> graphView, std::size_t nbBits, float step_size); /** * @brief Given a GraphView with parameters properly initialized and some calibration data, @@ -35,10 +37,10 @@ void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float * @param calibrationData Calibration data used to adjust the spans. * @param scale Multiplicative constant applied to the spans. */ -void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData); +void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, std::size_t nbBits, std::shared_ptr<Tensor> calibrationData); -} -} +} // namespace QuantLSQ +} // namespace Aidge -#endif /* AIDGE_QUANTIZATION_QAT_LSQ_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ */ diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 2c818155877349ad5e5a141469de9f6657873be7..5265d9c9b1326e73ee4080fe5f69fed5047a0dbb 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -10,14 +10,19 @@ ********************************************************************************/ #include "aidge/quantization/PTQ/CLE.hpp" + +#include <cmath> // std::abs, std::fabs, std::sqrt +#include <cstddef> // std::size_t +#include <memory> +#include <vector> + #include "aidge/quantization/PTQ/Clipping.hpp" -#include "aidge/quantization/PTQ/PTQ.hpp" +#include "aidge/quantization/PTQ/PTQ.hpp" // retrieveNodeVector #include "aidge/graph/GraphView.hpp" -#include "aidge/scheduler/SequentialScheduler.hpp" -#include "aidge/scheduler/Scheduler.hpp" -#include "aidge/utils/Log.hpp" +#include "aidge/graph/Node.hpp" #include "aidge/operator/OperatorTensor.hpp" +#include "aidge/utils/Log.hpp" namespace Aidge { @@ -42,13 +47,13 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) castedTensor[i] *= scaling; } -static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) { // Get the tensor data pointer and edit it double * castedTensor = static_cast<double*> (tensor->getImpl()->rawPtr()); // Get the tensor absolute max value - double maxValue = 0.0f; + double maxValue = 0.0; for(std::size_t i = 0; i < tensor->size(); ++i) { if(std::fabs(castedTensor[i]) > maxValue) { maxValue = std::fabs(castedTensor[i]); @@ -62,15 +67,14 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); // Check if the CLE can be applied ... - for (std::shared_ptr<Node> node : nodeVector) if (node->getChildren().size() > 1) { - Log::info(" Network have multiple branches, skipping the CLE ... "); + Log::notice("Network have multiple branches, skipping the CLE ... "); return; - } + } - Log::info(" Applying the Cross-Layer Equalization ... "); + Log::info("Applying the Cross-Layer Equalization ... "); // Get the vector of affine nodes @@ -79,17 +83,22 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD if (isAffine(node)) affineNodeVector.push_back(node); + if (affineNodeVector.empty()) { + Log::notice("No affine nodes found in the network. CLE cannot be applied."); + return; + } double maxRangeDelta; + int iteration = 0; - do + do { + ++iteration; maxRangeDelta = 0.0; - //std::cout << " ----- " << std::endl; //for (std::shared_ptr<Node> node : affineNodeVector) // std::cout << getTensorAbsoluteMax(getWeightTensor(node)) << std::endl; - - for (size_t i = 0; i < (affineNodeVector.size() - 1); i++) + + for (std::size_t i = 0; i < (affineNodeVector.size() - 1); i++) { std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; @@ -111,6 +120,9 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD } } while (maxRangeDelta > targetDelta); + + Log::notice("CLE completed after {} iterations. Final max range delta: {:.6f}", + iteration, maxRangeDelta); } } \ No newline at end of file diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp index 527d8534ae4981471e1fa7dca04f08b4e668677b..77018c23aee2f1ef6f430389393fd35e97baa0f6 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/PTQ/PTQMetaOps.cpp @@ -11,8 +11,8 @@ #include "aidge/quantization/PTQ/PTQMetaOps.hpp" -#include <array> #include <memory> +#include <string> #include <utility> //Operator @@ -32,7 +32,7 @@ #include "aidge/utils/Log.hpp" -namespace Aidge +namespace Aidge { std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name) @@ -46,19 +46,19 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli // connect the scaling factor producer std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); - std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); + std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); - + // create the metaop graph std::shared_ptr<GraphView> graphView = Sequential({mulNode, roundNode, clipNode}); std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ??? - // return the metaop + // return the metaop std::shared_ptr<Node> metaopNode = MetaOperator("Quantizer", connectedGraphView, {}, name); // XXX alternative prototype - return metaopNode; + return metaopNode; } std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name) @@ -67,7 +67,7 @@ std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name) std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_Scaling" : ""); - std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); + std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); std::shared_ptr<GraphView> graphView = Sequential({mulNode}); @@ -96,7 +96,7 @@ void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(metaOpNode->getOperator()); - + std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); if (!mulNode) @@ -113,7 +113,7 @@ double getScalingFactor(std::shared_ptr<Node> MetaOpNode) } std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(MetaOpNode->getOperator()); - + std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); if (!mulNode) { @@ -123,8 +123,8 @@ double getScalingFactor(std::shared_ptr<Node> MetaOpNode) auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1); std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); - + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + return localTensor.get<double>(0); } diff --git a/src/operator/FixedQ.cpp b/src/operator/FixedQ.cpp index 879174032bfcf5b2958b0950d0ed7410ba83331c..9828ce98f4918b3d2336c57fe018c9129804cf01 100644 --- a/src/operator/FixedQ.cpp +++ b/src/operator/FixedQ.cpp @@ -20,6 +20,17 @@ const std::string Aidge::FixedQ_Op::Type = "FixedQ"; +Aidge::FixedQ_Op::FixedQ_Op(const Aidge::FixedQ_Op& op) + : OperatorTensor(op), + mAttributes(op.mAttributes) +{ + if (op.mImpl){ + SET_IMPL_MACRO(FixedQ_Op, *this, op.backend()); + }else{ + mImpl = nullptr; + } +} + std::set<std::string> Aidge::FixedQ_Op::getAvailableBackends() const { return Registrar<FixedQ_Op>::getKeys(); } @@ -28,3 +39,12 @@ void Aidge::FixedQ_Op::setBackend(const std::string& name, DeviceIdx_t device) { SET_IMPL_MACRO(FixedQ_Op, *this, name); mOutputs[0]->setBackend(name, device); } + +//////////////////////////////////////////////////////////////////////////////// + +std::shared_ptr<Aidge::Node> Aidge::FixedQ(std::size_t nbBits, + float span, + bool isOutputUnsigned, + const std::string& name) { + return std::make_shared<Node>(std::make_shared<FixedQ_Op>(nbBits, span, isOutputUnsigned), name); +} \ No newline at end of file diff --git a/src/operator/SAT/DoReFa.cpp b/src/operator/SAT/DoReFa.cpp index b6124bad0e5f04c8e22e2d16c48dd4fe5de7945a..426e330e7f8426d256ca76a843548a91a62b036a 100644 --- a/src/operator/SAT/DoReFa.cpp +++ b/src/operator/SAT/DoReFa.cpp @@ -17,13 +17,38 @@ #include "aidge/data/Tensor.hpp" #include "aidge/utils/Types.h" -const std::string Aidge::DoReFa_Op::Type = "DoReFa"; +namespace Aidge { -std::set<std::string> Aidge::DoReFa_Op::getAvailableBackends() const { +const std::string DoReFa_Op::Type = "DoReFa"; + +DoReFa_Op::DoReFa_Op(const DoReFa_Op& op) + : OperatorTensor(op), + mAttributes(op.mAttributes) +{ + if (op.mImpl) { + SET_IMPL_MACRO(DoReFa_Op, *this, op.backend()); + } else { + mImpl = nullptr; + } +} + +std::shared_ptr<Operator> DoReFa_Op::clone() const { + return std::make_shared<DoReFa_Op>(*this); +} + +std::set<std::string> DoReFa_Op::getAvailableBackends() const { return Registrar<DoReFa_Op>::getKeys(); } -void Aidge::DoReFa_Op::setBackend(const std::string& name, DeviceIdx_t device) { +void DoReFa_Op::setBackend(const std::string& name, DeviceIdx_t device) { SET_IMPL_MACRO(DoReFa_Op, *this, name); mOutputs[0]->setBackend(name, device); -} \ No newline at end of file +} + +//////////////////////////////////////////////////////////////////////////////// + +std::shared_ptr<Node> DoReFa(size_t range, DoReFaMode mode, const std::string& name) { + return std::make_shared<Node>(std::make_shared<DoReFa_Op>(range, mode), name); +} + +} // namespace Aidge \ No newline at end of file diff --git a/src/operator/SAT/TanhClamp.cpp b/src/operator/SAT/TanhClamp.cpp index 2b8d63d7136c45589cba92018d2ecafe17d54e4e..a03fc7d3c602c3ff86551da19defe083a5cc6e3a 100644 --- a/src/operator/SAT/TanhClamp.cpp +++ b/src/operator/SAT/TanhClamp.cpp @@ -20,6 +20,20 @@ const std::string Aidge::TanhClamp_Op::Type = "TanhClamp"; +Aidge::TanhClamp_Op::TanhClamp_Op(const Aidge::TanhClamp_Op& op) + : OperatorTensor(op) +{ + if (op.mImpl) { + SET_IMPL_MACRO(TanhClamp_Op, *this, op.backend()); + } else { + mImpl = nullptr; + } +} + +std::shared_ptr<Aidge::Operator> Aidge::TanhClamp_Op::clone() const { + return std::make_shared<TanhClamp_Op>(*this); +} + bool Aidge::TanhClamp_Op::forwardDims(bool /*allowDataDependency*/) { if (inputsAssociated()) { @@ -40,5 +54,11 @@ void Aidge::TanhClamp_Op::setBackend(const std::string& name, DeviceIdx_t device mOutputs[0]->setBackend(name, device); // Scale output is always on CPU for now - mOutputs[1]->setBackend("cpu"); // XXX why ? + mOutputs[1]->setBackend("cpu"); // XXX why ? +} + +//////////////////////////////////////////////////////////////////////////////// + +std::shared_ptr<Aidge::Node> Aidge::TanhClamp(const std::string& name) { + return std::make_shared<Node>(std::make_shared<TanhClamp_Op>(), name); } \ No newline at end of file