From 4ccc769df3dc3b7f1c825908e2b0ed470463ab0b Mon Sep 17 00:00:00 2001 From: thibault allenet <thibault.allenet@cea.fr> Date: Mon, 9 Dec 2024 16:40:56 +0000 Subject: [PATCH 01/21] Fix Optimized Signs --- src/PTQ/PTQ.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 76fe8f2..4f88aed 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -699,8 +699,12 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap { // Thoses nodes always have a single parent std::shared_ptr<Node> parent = node->getParent(0); - signMap[node->name()].first = signMap[parent->name()].second; - signMap[node->name()].second = signMap[node->name()].first; + if (parent) + { + signMap[node->name()].first = signMap[parent->name()].second; + signMap[node->name()].second = signMap[node->name()].first; + } + } } -- GitLab From f9ab3a932391bebc97ceda5f7d47ec6e841d9e4d Mon Sep 17 00:00:00 2001 From: cmoineau <cyril.moineau@cea.fr> Date: Wed, 11 Dec 2024 10:08:53 +0000 Subject: [PATCH 02/21] Update quantization with https://gitlab.eclipse.org/eclipse/aidge/aidge_core/-/merge_requests/277 --- .gitignore | 3 +- CMakeLists.txt | 26 +++++++++++++ include/aidge/quantization_version.h | 11 ++++++ .../sys_info/QuantizationVersionInfo.hpp | 38 +++++++++++++++++++ include/aidge/version.h.in | 11 ++++++ pyproject.toml | 18 +++++---- python_binding/pybind_Quantization.cpp | 6 ++- .../pybind_QuantizationVersionInfo.cpp | 11 ++++++ setup.cfg | 3 ++ 9 files changed, 115 insertions(+), 12 deletions(-) create mode 100644 include/aidge/quantization_version.h create mode 100644 include/aidge/utils/sys_info/QuantizationVersionInfo.hpp create mode 100644 include/aidge/version.h.in create mode 100644 python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp create mode 100644 setup.cfg diff --git a/.gitignore b/.gitignore index 18f1583..ba5c593 100644 --- a/.gitignore +++ b/.gitignore @@ -4,17 +4,16 @@ # C++ Build build*/ install*/ +include/aidge/backend/quantization_version.h # VSCode .vscode # Python -aidge_quantization/_version.py *.so __pycache__ *.pyc *.egg-info -aidge_quantization/_version.py wheelhouse/* # Mermaid diff --git a/CMakeLists.txt b/CMakeLists.txt index 905a2a2..7a2b168 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,13 +5,39 @@ set(CXX_STANDARD 14) file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version) file(STRINGS "${CMAKE_SOURCE_DIR}/project_name.txt" project) +# Parse version.txt to retrieve Major, Minor and Path +string(REGEX MATCH "([0-9]+)\\.([0-9]+)\\.([0-9]+)" _ MATCHES ${version}) +set(PROJECT_VERSION_MAJOR ${CMAKE_MATCH_1}) +set(PROJECT_VERSION_MINOR ${CMAKE_MATCH_2}) +set(PROJECT_VERSION_PATCH ${CMAKE_MATCH_3}) + +# Retrieve latest git commit +execute_process( + COMMAND git rev-parse --short HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_HASH + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET +) + message(STATUS "Project name: ${project}") message(STATUS "Project version: ${version}") +message(STATUS "Latest git commit: ${GIT_COMMIT_HASH}") + +message(STATUS "Creating ${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h") project(${project} VERSION ${version} DESCRIPTION "Quantization methods for the Aidge framework." LANGUAGES CXX) +# Note: Using configure_file later in the code make so that version variables are lost... +# I tried to set in internal cache but it failed. +# Current code is working, but there might be a scope issue. +# Generate version.h file from config file version.h.in +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/version.h.in" + "${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h" +) # Note : project name is {project} and python module name is also {project} set(module_name _${project}) # target name diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h new file mode 100644 index 0000000..546263a --- /dev/null +++ b/include/aidge/quantization_version.h @@ -0,0 +1,11 @@ +#ifndef VERSION_H +#define VERSION_H + +namespace Aidge { +static constexpr const int PROJECT_VERSION_MAJOR = 0; +static constexpr const int PROJECT_VERSION_MINOR = 2; +static constexpr const int PROJECT_VERSION_PATCH = 0; +static constexpr const char * PROJECT_VERSION = "0.2.0"; +static constexpr const char * PROJECT_GIT_HASH = "f50c860"; +} +#endif // VERSION_H diff --git a/include/aidge/utils/sys_info/QuantizationVersionInfo.hpp b/include/aidge/utils/sys_info/QuantizationVersionInfo.hpp new file mode 100644 index 0000000..6b4deb8 --- /dev/null +++ b/include/aidge/utils/sys_info/QuantizationVersionInfo.hpp @@ -0,0 +1,38 @@ +#ifndef AIDGE_UTILS_SYS_INFO_OPENCV_VERSION_INFO_H +#define AIDGE_UTILS_SYS_INFO_OPENCV_VERSION_INFO_H + +#include "aidge/utils/Log.hpp" +#include "aidge/quantization_version.h" + +namespace Aidge { + +constexpr inline const char * getQuantizationProjectVersion(){ + return PROJECT_VERSION; +} + +constexpr inline const char * getQuantizationGitHash(){ + return PROJECT_GIT_HASH; +} + +void showQuantizationVersion() { + Log::info("Aidge quantization: {} ({}), {} {}", getQuantizationProjectVersion(), getQuantizationGitHash(), __DATE__, __TIME__); + // Compiler version + #if defined(__clang__) + /* Clang/LLVM. ---------------------------------------------- */ + Log::info("Clang/LLVM compiler version: {}.{}.{}\n", __clang_major__ , __clang_minor__, __clang_patchlevel__); + #elif defined(__ICC) || defined(__INTEL_COMPILER) + /* Intel ICC/ICPC. ------------------------------------------ */ + Log::info("Intel ICC/ICPC compiler version: {}\n", __INTEL_COMPILER); + #elif defined(__GNUC__) || defined(__GNUG__) + /* GNU GCC/G++. --------------------------------------------- */ + Log::info("GNU GCC/G++ compiler version: {}.{}.{}", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); + #elif defined(_MSC_VER) + /* Microsoft Visual Studio. --------------------------------- */ + Log::info("Microsoft Visual Studio compiler version: {}\n", _MSC_VER); + #else + Log::info("Unknown compiler\n"); + #endif + +} +} // namespace Aidge +#endif // AIDGE_UTILS_SYS_INFO_OPENCV_VERSION_INFO_H diff --git a/include/aidge/version.h.in b/include/aidge/version.h.in new file mode 100644 index 0000000..4b876f6 --- /dev/null +++ b/include/aidge/version.h.in @@ -0,0 +1,11 @@ +#ifndef VERSION_H +#define VERSION_H + +namespace Aidge { +static constexpr const int PROJECT_VERSION_MAJOR = @PROJECT_VERSION_MAJOR@; +static constexpr const int PROJECT_VERSION_MINOR = @PROJECT_VERSION_MINOR@; +static constexpr const int PROJECT_VERSION_PATCH = @PROJECT_VERSION_PATCH@; +static constexpr const char * PROJECT_VERSION = "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@"; +static constexpr const char * PROJECT_GIT_HASH = "@GIT_COMMIT_HASH@"; +} +#endif // VERSION_H diff --git a/pyproject.toml b/pyproject.toml index fc745eb..deb91c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,18 +11,24 @@ classifiers = [ "Development Status :: 2 - Pre-Alpha", "Programming Language :: Python :: 3" ] -dynamic = ["version"] #Â defined in tool.setuptools_scm -# version="1" +dynamic = ["version"] #Â defined by pbr [build-system] requires = [ "setuptools>=64", - "setuptools_scm[toml]==7.1.0", "cmake>=3.15.3.post1", - "toml" + "toml", + "pbr" ] build-backend = "setuptools.build_meta" +[project.urls] +Homepage = "https://www.deepgreen.ai/en/platform" +Documentation = "https://eclipse-aidge.readthedocs.io/en/latest/" +Repository = "https://gitlab.eclipse.org/eclipse/aidge/aidge_quantization" +Issues = "https://gitlab.eclipse.org/eclipse/aidge/aidge_quantization/-/issues/" +Changelog = "https://gitlab.eclipse.org/eclipse/aidge/aidge_quantization/-/releases" + ##################################################### # SETUPTOOLS [tool.setuptools] @@ -35,10 +41,6 @@ exclude = [ "aidge_quantization.unit_tests.assets" ] # exclude packages matching these glob patterns (empty by default) -# SETUPTOOLS_SCM -[tool.setuptools_scm] -write_to = "aidge_quantization/_version.py" - ##################################################### # CIBUILDWHEEL [tool.cibuildwheel] diff --git a/python_binding/pybind_Quantization.cpp b/python_binding/pybind_Quantization.cpp index cd18cf8..7ac344d 100644 --- a/python_binding/pybind_Quantization.cpp +++ b/python_binding/pybind_Quantization.cpp @@ -20,7 +20,7 @@ namespace py = pybind11; -namespace Aidge +namespace Aidge { // operators @@ -35,8 +35,9 @@ void init_QAT_FixedQ(py::module &m); void init_QAT_LSQ(py::module &m); void init_QuantRecipes(py::module &m); +void init_QuantizationVersionInfo(py::module &m); -PYBIND11_MODULE(aidge_quantization, m) +PYBIND11_MODULE(aidge_quantization, m) { init_FixedQ(m); init_LSQ(m); @@ -47,6 +48,7 @@ PYBIND11_MODULE(aidge_quantization, m) init_QAT_FixedQ(m); init_QAT_LSQ(m); init_QuantRecipes(m); + init_QuantizationVersionInfo(m); } } // namespace Aidge diff --git a/python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp b/python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp new file mode 100644 index 0000000..abed12b --- /dev/null +++ b/python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp @@ -0,0 +1,11 @@ +#include <pybind11/pybind11.h> +#include "aidge/utils/sys_info/QuantizationVersionInfo.hpp" + +namespace py = pybind11; +namespace Aidge { +void init_QuantizationVersionInfo(py::module& m){ + m.def("show_version", &showQuantizationVersion); + m.def("get_project_version", &getQuantizationProjectVersion); + m.def("get_git_hash", &getQuantizationGitHash); +} +} diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..aa0f227 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,3 @@ +# pbr file +[metadata] +version = file: version.txt -- GitLab From 8936e1650e4b853ed0ec9c767b68c3d2b7ca2c0f Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Tue, 17 Dec 2024 15:40:19 +0000 Subject: [PATCH 03/21] fix the PTQ for float64 support and multi-outputs handling --- include/aidge/quantization/PTQ/Clipping.hpp | 8 +- include/aidge/quantization/PTQ/PTQ.hpp | 8 +- include/aidge/quantization/PTQ/PTQMetaOps.hpp | 10 +- src/PTQ/Clipping.cpp | 69 +++--- src/PTQ/PTQ.cpp | 202 +++++++++++------- src/PTQ/PTQMetaOps.cpp | 37 ++-- 6 files changed, 198 insertions(+), 136 deletions(-) diff --git a/include/aidge/quantization/PTQ/Clipping.hpp b/include/aidge/quantization/PTQ/Clipping.hpp index 08a0b0a..d0622f4 100644 --- a/include/aidge/quantization/PTQ/Clipping.hpp +++ b/include/aidge/quantization/PTQ/Clipping.hpp @@ -36,7 +36,7 @@ namespace Aidge * @param inputDataSet The input dataset, consisting of a vector of input samples. * @return A map associating each node name to it's corresponding activation histogram. */ - std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, float> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda); + std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda); /** * @brief Given an input activation histogram, compute the optimal clipping value in the sense of the Lp norm. @@ -45,7 +45,7 @@ namespace Aidge * @param exponent: The exponent of the Lp norm (e.g. 2 for the MSE). * @return The optimal clipping value. */ - float computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, float exponent); + double computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, double exponent); /** * @brief Given an input activation histogram, compute the optimal clipping value in the sense of the KL divergence. @@ -53,7 +53,7 @@ namespace Aidge * @param nbBits: The quantization number of bits. * @return The optimal clipping value. */ - float computeKLClipping(std::vector<int> histogram, std::uint8_t nbBits); + double computeKLClipping(std::vector<int> histogram, std::uint8_t nbBits); /** * @brief Return a corrected map of the provided activation ranges. @@ -67,7 +67,7 @@ namespace Aidge * @param verbose Whether to print the clipping values or not. * @return The corrected map associating each provided node to its clipped range. */ - std::map<std::string, float> adjustRanges(Clipping clippingMode, std::map<std::string, float> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose); + std::map<std::string, double> adjustRanges(Clipping clippingMode, std::map<std::string, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose); } diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index 52d83d6..d2b8b7f 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -31,7 +31,7 @@ namespace Aidge { /** * @brief Set of the types of the nodes which does not affect the PTQ process */ - static const std::set<std::string> seamlessNodeTypes({"LeakyReLU", "Pad2D", "MaxPooling2D", "AvgPooling2D", "PaddedMaxPooling2D", "PaddedAvgPooling2D", "GlobalAveragePooling", "Reshape", "Transpose", "Gather"}); + static const std::set<std::string> seamlessNodeTypes({"LeakyReLU", "Pad2D", "MaxPooling2D", "AvgPooling2D", "PaddedMaxPooling2D", "PaddedAvgPooling2D", "GlobalAveragePooling", "Reshape", "Transpose", "Gather", "Resize"}); /** * @brief Set of the types of the nodes that merge multiple branches into one @@ -98,7 +98,7 @@ namespace Aidge { * @param scalingNodesOnly Whether to restrain the retreival of the ranges to scaling nodes only or not. * @return A map associating each affine node name to it's corresponding output range. */ - std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda); + std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda); /** * @brief Normalize the activations of each affine node so that they fit in the [-1:1] range. @@ -106,7 +106,7 @@ namespace Aidge { * @param graphView The GraphView containing the affine nodes. * @param valueRanges The node output value ranges computed over the calibration dataset. */ - void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, float> valueRanges); + void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges); /** * @brief For each node, compute the sign of its input and output values. @@ -145,7 +145,7 @@ namespace Aidge { * @param graphView The GraphView containing the affine nodes. * @return A map associating each affine node name to it's corresponding weight range. */ - std::map<std::string, float> getWeightRanges(std::shared_ptr<GraphView> graphView); + std::map<std::string, double> getWeightRanges(std::shared_ptr<GraphView> graphView); /** * @brief Clear the affine nodes biases. Provided form debugging purposes. diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/quantization/PTQ/PTQMetaOps.hpp index c4f2ac7..29bb7f2 100644 --- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp +++ b/include/aidge/quantization/PTQ/PTQMetaOps.hpp @@ -33,7 +33,7 @@ /// @param clip_max The maximum value for the clip operation. /// @param name The name of the meta-operator node created. /// @return A shared pointer to an instance of the meta-operator node. -std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float clip_max,const std::string& name); +std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,double clip_max,const std::string& name); /// @brief The purpose of Scaling is to encapsulate the Mul operator and tag it as a PTQ node rather than a regular Mul operator. /// Therefore, this meta-operator consists solely of a [Mul] operation. @@ -41,7 +41,7 @@ std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float /// @param scalingFactor The scaling factor to apply to the input (a scalar to multiply the input with). /// @param name The name of the meta-operator node created. /// @return A shared pointer to an instance of the scaling node. -std::shared_ptr<Aidge::Node> Scaling(float scalingFactor,const std::string& name = ""); +std::shared_ptr<Aidge::Node> Scaling(double scalingFactor,const std::string& name = ""); /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter. /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation. @@ -50,7 +50,7 @@ std::shared_ptr<Aidge::Node> Scaling(float scalingFactor,const std::string& name /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor will be updated. /// @param newScalingFactor The new scaling factor to apply to the meta-operator node. /// @return True if the scaling factor was successfully updated, false if the operation failed (e.g., if MetaOpNode is null or incompatible). -bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, float newScalingFactor); +bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor); /// @brief Retrieves the current scaling factor of a PTQ meta-operator node. /// This function returns the scaling factor associated with the specified PTQ meta-operator node, @@ -58,7 +58,7 @@ bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, float newScali /// /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor is being queried. /// @return The scaling factor currently applied to the meta-operator node, or -1 if the operation fails (e.g., if MetaOpNode is null or incompatible). -float getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode); +double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode); /// @brief Sets the clip range for an existing Quantizer node by specifying minimum and maximum clipping values. /// This function modifies the clip range of a Quantizer node, allowing adjustment of the range within which values are clipped @@ -69,6 +69,6 @@ float getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode); /// @param min The minimum value for the clip range. Values below this will be clipped to this minimum. /// @param max The maximum value for the clip range. Values above this will be clipped to this maximum. /// @return True if the clip range was successfully set, false if the operation failed (e.g., if QuantizerNode is null). -bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, float min, float max); +bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, double min, double max); #endif /* AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ */ diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp index e001408..f8765f3 100644 --- a/src/PTQ/Clipping.cpp +++ b/src/PTQ/Clipping.cpp @@ -19,7 +19,7 @@ namespace Aidge { -std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, float> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda) +std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda) { if (useCuda) graphView->setBackend("cuda"); @@ -72,7 +72,7 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, bool isInsideRanges = (valueRanges.find(node->name()) != valueRanges.end()); if (isInsideRanges) { - float valueRange = valueRanges[node->name()]; + double valueRange = valueRanges[node->name()]; std::shared_ptr<Operator> nodeOperator = node->getOperator(); std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0)); @@ -80,15 +80,17 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, if (useCuda) valueTensor->setBackend("cpu"); - float * castedTensor = static_cast<float *> (valueTensor->getImpl()->rawPtr()); + double * castedTensor = static_cast<double *> (valueTensor->getImpl()->rawPtr()); std::vector<int> nodeHistogram = histograms[node->name()]; for(std::size_t i = 0; i < valueTensor->size(); i++) { - int bin = std::round(std::abs(castedTensor[i] / valueRange * nbBins)); + std::size_t bin = std::round(std::abs(castedTensor[i] / valueRange * nbBins)); + bin = std::min(bin, nodeHistogram.size() - 1); nodeHistogram[bin]++; } - histograms[node->name()] = nodeHistogram; + + histograms[node->name()] = nodeHistogram; if (useCuda) valueTensor->setBackend("cuda"); @@ -105,52 +107,52 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, return histograms; } -float computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, float exponent) +double computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, double exponent) { int nbBins = histogram.size(); int nbIter = 100; int signedMax = (1 << (nbBits - 1)) - 1; - std::vector<float> clippingErrors; + std::vector<double> clippingErrors; for (int it = 1; it < nbIter; it++) { // Compute the rounding cost of this particular clipping ... - float accumulatedError = 0.0; - float clipping = it / static_cast<float> (nbIter); + double accumulatedError = 0.0; + double clipping = it / static_cast<double> (nbIter); for (int bin = 0; bin < nbBins; bin++) { - float value = (bin + 0.5) / nbBins; - float scaling = signedMax / clipping; - float rounded = std::round(value * scaling) / scaling; - float clipped = std::min(clipping, rounded); + double value = (bin + 0.5) / nbBins; + double scaling = signedMax / clipping; + double rounded = std::round(value * scaling) / scaling; + double clipped = std::min(clipping, rounded); - float approxError = std::abs(clipped - value); + double approxError = std::abs(clipped - value); accumulatedError += std::pow(approxError, exponent) * histogram[bin]; } clippingErrors.push_back(accumulatedError); } - std::vector<float>::iterator it = std::min_element(clippingErrors.begin(), clippingErrors.end()); - float bestClipping = static_cast<float> (std::distance(clippingErrors.begin(), it)) / static_cast<float> (nbIter); + std::vector<double>::iterator it = std::min_element(clippingErrors.begin(), clippingErrors.end()); + double bestClipping = static_cast<double> (std::distance(clippingErrors.begin(), it)) / static_cast<double> (nbIter); return bestClipping; } -float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) +double computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) { // KL Clipping int nbIter = 100; int signedMax = (1 << (nbBits - 1)) - 1; - float refNorm = 0; + double refNorm = 0; for (int n : refHistogram) - refNorm += static_cast<float> (n); + refNorm += static_cast<double> (n); - std::vector<float> clippingErrors; + std::vector<double> clippingErrors; for (int it = 1; it < nbIter; it++) { - float clipping = it / static_cast<float> (nbIter); + double clipping = it / static_cast<double> (nbIter); // Create the histogram for this particular clipping ... @@ -160,7 +162,7 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) for (std::size_t refBin = 0; refBin < refHistogram.size(); refBin++) { - float value = (static_cast<float> (refBin) + 0.5f) / static_cast<float> (refHistogram.size()); + double value = (static_cast<double> (refBin) + 0.5f) / static_cast<double> (refHistogram.size()); int quantBin = std::floor(value / clipping * signedMax); quantBin = std::min(quantBin, signedMax-1); quantHistogram[quantBin] += refHistogram[refBin]; @@ -168,10 +170,10 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) // Compute the mass of the histogram - float quantNorm = 0; + double quantNorm = 0; for (std::size_t refBin = 0; refBin < refHistogram.size(); refBin++) { - float value = (static_cast<float> (refBin) + 0.5f) / static_cast<float> (refHistogram.size()); + double value = (static_cast<double> (refBin) + 0.5f) / static_cast<double> (refHistogram.size()); int quantBin = std::floor(value / clipping * signedMax); if (quantBin < static_cast<int> (quantHistogram.size())) quantNorm += quantHistogram[quantBin]; @@ -179,15 +181,15 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) // Compute the KL divergence - float accumulatedError = 0.0; + double accumulatedError = 0.0; for (std::size_t refBin = 0; refBin < refHistogram.size(); refBin++) { - float value = (static_cast<float> (refBin) + 0.5f) / static_cast<float> (refHistogram.size()); + double value = (static_cast<double> (refBin) + 0.5f) / static_cast<double> (refHistogram.size()); int quantBin = std::floor(value / clipping * signedMax); - float p = static_cast<float> (refHistogram[refBin]) / refNorm; - float q = (quantBin < static_cast<int> (quantHistogram.size())) ? - static_cast<float> (quantHistogram[quantBin]) / quantNorm : 0; + double p = static_cast<double> (refHistogram[refBin]) / refNorm; + double q = (quantBin < static_cast<int> (quantHistogram.size())) ? + static_cast<double> (quantHistogram[quantBin]) / quantNorm : 0; if (p != 0 && q != 0) accumulatedError += q * std::log(q / p); @@ -196,16 +198,16 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) clippingErrors.push_back(accumulatedError); } - std::vector<float>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end()); - float bestClipping = static_cast<float> (std::distance(clippingErrors.begin(), it)) / static_cast<float> (nbIter); + std::vector<double>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end()); + double bestClipping = static_cast<double> (std::distance(clippingErrors.begin(), it)) / static_cast<double> (nbIter); return bestClipping; } -std::map<std::string, float> adjustRanges(Clipping clippingMode, std::map<std::string, float> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose) +std::map<std::string, double> adjustRanges(Clipping clippingMode, std::map<std::string, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose) { - float clipping = 1.0f; + double clipping = 1.0f; int nbBins = (1 << (nbBits + 4)) ; // XXX Enhance this !!! @@ -213,6 +215,7 @@ std::map<std::string, float> adjustRanges(Clipping clippingMode, std::map<std::s { if (verbose) Log::info(" === CLIPPING VALUES === "); + std::map<std::string, std::vector<int>> histograms = computeHistograms(valueRanges, nbBins, graphView, inputDataSet, useCuda); for (std::shared_ptr<Node> node : graphView->getNodes()) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 4f88aed..c3e02dc 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -66,20 +66,20 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } -static void fillTensor(std::shared_ptr<Tensor> tensor, float value) +static void fillTensor(std::shared_ptr<Tensor> tensor, double value) { // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); // Fill the tensor for(std::size_t i = 0; i < tensor->size(); i++) castedTensor[i] = value; } -static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) { // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) @@ -89,20 +89,20 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) static void roundTensor(std::shared_ptr<Tensor> tensor) { // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) castedTensor[i] = std::nearbyint(castedTensor[i]);//Round } -static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) { // Get the tensor data pointer and edit it - float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr()); // Get the tensor absolute max value - float maxValue = 0.0f; + double maxValue = 0.0f; for(std::size_t i = 0; i < tensor->size(); ++i) { if(std::fabs(castedTensor[i]) > maxValue) { maxValue = std::fabs(castedTensor[i]); @@ -187,6 +187,7 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView) bool containsBatchNorm = false; std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + for (std::shared_ptr<Node> node : nodeVector) if (node->type() == "BatchNorm") { @@ -200,6 +201,12 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView) popSoftMax(graphView); } +// TODO : enhance this by modifying OperatorImpl in "core" ... +static DataType getDataType(std::shared_ptr<Node> node) +{ + auto op = std::static_pointer_cast<OperatorTensor>(node->getOperator()); + return op->getOutput(0)->dataType(); +} // XXX HERE : Branches containing only Seamless nodes should be considered as residual too !!! void insertResidualNodes(std::shared_ptr<GraphView> graphView) @@ -217,6 +224,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) { std::shared_ptr<Node> parentNode = node->getParent(i); bool parentIsForking = (parentNode->getChildren().size() > 1); + if (parentIsForking) { // temporary verbose ... @@ -224,8 +232,9 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) Log::info(" ### inserting multiplicative node ..."); std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); - std::shared_ptr<Node> residualNode = Scaling(1.0,residualNodeName); - residualNode->getOperator()->setDataType(DataType::Float32); + std::shared_ptr<Node> residualNode = Scaling(1.0, residualNodeName); + + residualNode->getOperator()->setDataType(DataType::Float64); //getDataType(parentNode) residualNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, residualNode, i, 0, 0); @@ -255,7 +264,8 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) { std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName); - scalingNode->getOperator()->setDataType(DataType::Float32); + + scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) scalingNode->getOperator()->setBackend("cpu"); if (parentNode->getChildren().size() > 0) @@ -283,7 +293,8 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) else { // Log::info(" last node reached ! "); - graphView->addChild(scalingNode); + parentNode->addChild(scalingNode, 0, 0); + graphView->add(scalingNode); } } } @@ -322,7 +333,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); - std::map<std::string, float> accumulatedRatios; + std::map<std::string, double> accumulatedRatios; for (std::shared_ptr<Node> node : nodeVector) { accumulatedRatios.insert(std::make_pair(node->name(), 1.0)); @@ -349,8 +360,8 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) { // Rescale the weight tensor std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - float scaling = getTensorAbsoluteMax(weightTensor); - float ratio = 1.0 / scaling; + double scaling = getTensorAbsoluteMax(weightTensor); + double ratio = 1.0 / scaling; rescaleTensor(weightTensor, ratio); // Accumulate the ratio @@ -378,10 +389,10 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::vector<std::shared_ptr<Node>> mergingNodes = node->getParents(); // Compute the max ratio ... - float maxRatio = 0; + double maxRatio = 0; for (std::shared_ptr<Node> mergingNode : mergingNodes) { - float merginNodeRatio = accumulatedRatios[mergingNode->name()]; + double merginNodeRatio = accumulatedRatios[mergingNode->name()]; if (merginNodeRatio > maxRatio) maxRatio = merginNodeRatio; } @@ -391,12 +402,12 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) // Rescale the previous scaling Nodes for (std::shared_ptr<Node> mergingNode : mergingNodes) { - float mergingNodeRatio = accumulatedRatios[mergingNode->name()]; - float rescaling = mergingNodeRatio / maxRatio; + double mergingNodeRatio = accumulatedRatios[mergingNode->name()]; + double rescaling = mergingNodeRatio / maxRatio; std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); - float scaling_factor = getScalingFactor(scalingNode); + double scaling_factor = getScalingFactor(scalingNode); updateScalingFactor(scalingNode,scaling_factor / rescaling); accumulatedRatios[mergingNode->name()] /= rescaling; // optional ... } @@ -405,9 +416,9 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) } // XXX TODO : take care of the CUDA backend for this too !!! -std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> inputTensor, bool scalingNodesOnly) +std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> inputTensor, bool scalingNodesOnly) { - std::map<std::string, float> valueRanges; + std::map<std::string, double> valueRanges; SequentialScheduler scheduler(graphView); scheduler.resetScheduling(); @@ -425,7 +436,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, { std::shared_ptr<Operator> nodeOperator = node->getOperator(); std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0)); - float range = getTensorAbsoluteMax(valueTensor); + double range = getTensorAbsoluteMax(valueTensor); // Associate the value to the scaling node ... valueRanges.insert(std::make_pair(node->name(), range)); @@ -435,9 +446,9 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, return valueRanges; } -std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda) +std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda) { - std::map<std::string, float> valueRanges; + std::map<std::string, double> valueRanges; std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); // std::shared_ptr<Node> inputNode = getFirstNode(graphView); @@ -467,7 +478,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, // Gather the sample ranges ... - std::map<std::string, float> sampleRanges; + std::map<std::string, double> sampleRanges; for (std::shared_ptr<Node> node : nodeSet) { if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) @@ -478,7 +489,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, if (useCuda) valueTensor->setBackend("cpu"); - float range = getTensorAbsoluteMax(valueTensor); + double range = getTensorAbsoluteMax(valueTensor); // Associate the value to the scaling node ... sampleRanges.insert(std::make_pair(node->name(), range)); @@ -510,7 +521,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, return valueRanges; } -void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, float> valueRanges) +void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges) { std::shared_ptr<Node> firstNode = getFirstNode(graphView); @@ -518,7 +529,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); - std::map<std::string, float> scalingFactors; + std::map<std::string, double> scalingFactors; for (std::shared_ptr<Node> node : nodeVector) scalingFactors.insert(std::make_pair(node->name(), 1.0)); @@ -549,12 +560,12 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st { // retrieve the previous scaling factor ... std::shared_ptr<Node> prevNode = node->getParent(0); - float prevScalingFactor = scalingFactors[prevNode->name()]; + double prevScalingFactor = scalingFactors[prevNode->name()]; // ValueRanges must contains all the scaling nodes !!! - float scalingFactor = valueRanges[node->name()]; + double scalingFactor = valueRanges[node->name()]; - float scaling_factor = getScalingFactor(node); + double scaling_factor = getScalingFactor(node); updateScalingFactor(node, (scaling_factor) / (scalingFactor / prevScalingFactor)); scalingFactors[node->name()] = scalingFactor; @@ -579,10 +590,10 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st std::vector<std::shared_ptr<Node>> mergingNodes = node->getParents(); // Compute the max scaling ... - float maxScaling = 0; + double maxScaling = 0; for (std::size_t i = 0; i < mergingNodes.size(); i++) { - float merginNodeScaling = scalingFactors[mergingNodes[i]->name()]; + double merginNodeScaling = scalingFactors[mergingNodes[i]->name()]; if (merginNodeScaling > maxScaling) { maxScaling = merginNodeScaling; } @@ -592,12 +603,12 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st for (std::shared_ptr<Node> mergingNode : mergingNodes) { - float mergingNodeScaling = scalingFactors[mergingNode->name()]; - float rescaling = mergingNodeScaling / maxScaling; + double mergingNodeScaling = scalingFactors[mergingNode->name()]; + double rescaling = mergingNodeScaling / maxScaling; std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name()); - float scaling_factor = getScalingFactor(scalingNode); + double scaling_factor = getScalingFactor(scalingNode); updateScalingFactor(scalingNode, scaling_factor * rescaling); } } @@ -739,8 +750,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ AIDGE_THROW_OR_ABORT(std::runtime_error,"Signs optimization can not be applied if network is not fully quantized ..."); } - float signedMax = (1 << (nbBits - 1)) - 1; - float unsignedMax = (1 << nbBits) - 1; + double signedMax = (1 << (nbBits - 1)) - 1; + double unsignedMax = (1 << nbBits) - 1; std::map<std::string, std::pair<bool, bool>> signMap; @@ -775,7 +786,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ if (nodeHasBias(node)) { bool inputIsUnsigned = signMap[node->name()].first; - float rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax; + double rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax; std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); @@ -787,7 +798,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ // Compensate the rescaling using the next Scaling node - float rescaling = 1.0 / signedMax; + double rescaling = 1.0 / signedMax; bool inputIsUnsigned = signMap[node->name()].first; bool outputIsUnsigned = signMap[node->name()].second; @@ -796,13 +807,13 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ rescaling *= outputIsUnsigned ? unsignedMax : signedMax; std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - float scaling_factor = getScalingFactor(scalingNode); + double scaling_factor = getScalingFactor(scalingNode); updateScalingFactor(scalingNode, scaling_factor * rescaling); } if (isMerging(node)) { - float rescaling = 1.0; + double rescaling = 1.0; bool inputIsUnsigned = signMap[node->name()].first; bool outputIsUnsigned = signMap[node->name()].second; @@ -812,9 +823,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - - float scaling_factor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode,scaling_factor * rescaling); + double scaling_factor = getScalingFactor(scalingNode); // XXX bad naming + updateScalingFactor(scalingNode, scaling_factor * rescaling); } // Handle the Scaling Nodes ... @@ -823,18 +833,19 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ { if (!noQuant) { - //[!!] replacement of Scaling Node by Quantizer - float currentSF = getScalingFactor(node); + // Replacement of Scaling Node by Quantizer + double currentSF = getScalingFactor(node); // XXX bad naming ! + + std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, -(signedMax + 1), signedMax, node->name()); - std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, - (signedMax + 1), signedMax, node->name()); - quantizerNode->getOperator()->setDataType(DataType::Float32); + quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) quantizerNode->getOperator()->setBackend("cpu"); - graphView->replace({node}, {quantizerNode}); + graphView->replace({node}, {quantizerNode}); if (optimizeSigns) { - float rescaling = 1.0; + double rescaling = 1.0; bool inputIsUnsigned = signMap[node->name()].first; bool outputIsUnsigned = signMap[node->name()].second; @@ -842,7 +853,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ rescaling /= inputIsUnsigned ? unsignedMax : signedMax; rescaling *= outputIsUnsigned ? unsignedMax : signedMax; - float scalingFactor = getScalingFactor(quantizerNode); + double scalingFactor = getScalingFactor(quantizerNode); updateScalingFactor(quantizerNode,scalingFactor * rescaling); if(outputIsUnsigned) @@ -858,7 +869,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits) { // XXX Use the signMap to increase the resolution when possible ... - float signedMax = (1 << (nbBits - 1)) - 1; + double signedMax = (1 << (nbBits - 1)) - 1; std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); @@ -878,7 +889,8 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); std::shared_ptr<Node> mulNode = Mul(mulNodeName); - mulNode->getOperator()->setDataType(DataType::Float32); + + mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) mulNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, mulNode, 0, 0, 0); @@ -886,10 +898,11 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u // create and insert the producer node std::shared_ptr<Tensor> inputTensor = std::static_pointer_cast<Tensor> (mulNode->getOperator()->getRawInput(0)); - std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(); - coeffTensor->setDataType(DataType::Float32); - coeffTensor->setBackend("cpu"); + + coeffTensor->setDataType(DataType::Float64); // getDataType(parentNode) + coeffTensor->setBackend("cpu"); + coeffTensor->resize(inputTensor->dims()); fillTensor(coeffTensor, 1); @@ -900,8 +913,8 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u // rescale the coeffs and edit scaling factor fillTensor(coeffTensor, signedMax); - float sf = getScalingFactor(node); - updateScalingFactor(node,sf/signedMax); + double sf = getScalingFactor(node); // XXX bad naming ! + updateScalingFactor(node, sf/signedMax); // TODO : double check this !!! //std::cout << getTensorAbsoluteMax(coeffTensor) << std::endl; @@ -910,9 +923,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u } } -void - -performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant) +void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant) { std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); @@ -923,13 +934,13 @@ performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQua { std::shared_ptr<Node> scalingNode = (*node->getChildren().begin()); - float base = getScalingFactor(scalingNode); + double base = getScalingFactor(scalingNode); - float approx = std::pow(2, std::ceil(std::log2(base))); + double approx = std::pow(2, std::ceil(std::log2(base))); updateScalingFactor(scalingNode,approx); - float ratio = base / approx; + double ratio = base / approx; std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); rescaleTensor(weightTensor, ratio); @@ -953,17 +964,46 @@ static void printScalingFactors(std::shared_ptr<GraphView> graphView) for (auto node : retrieveNodeVector(graphView)) if (node->type() == "Scaling") { - float factor = getScalingFactor(node); + double factor = getScalingFactor(node); Log::info(" {:.6f} ({})", factor, node->name()); } } +/* +std::string deduceBackend(std::shared_ptr<GraphView> graphView) +{ + std::string rootNodeBackend = graphView->getRootNode()->backend(); + for (auto node : graphView->getNodes()) + if (node->backend() != rootNodeBackend) + log::warn(" Multiple backend detected, setting all nodes to {}") +} +*/ + +static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType dataType) +{ + graphView->setDataType(dataType); + + for (auto inputNode : graphView->inputNodes()) { + auto op = std::static_pointer_cast<OperatorTensor>(inputNode->getOperator()); + auto inputTensor = op->getInput(0); + if (inputTensor) + inputTensor->setDataType(dataType); + } + + for (auto tensor : inputDataSet) + tensor->setDataType(dataType); +} + + void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) { Log::info(" === QUANT PTQ 0.2.21 === "); graphView->setBackend("cpu"); + DataType initialDataType = (inputDataSet[0])->dataType(); + setupDataType(graphView, inputDataSet, DataType::Float64); + if (!checkArchitecture(graphView)) return; @@ -979,8 +1019,22 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, normalizeParameters(graphView); Log::info(" Computing the value ranges ..."); - std::map<std::string, float> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda); + std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda); + + // XXX +/* + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + scheduler.generateScheduling(); + + auto scheduling = scheduler.getStaticScheduling(); + for (auto node : scheduling) + if (node->type() == "Scaling") + std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl; + std::cout << " RETURN " << std::endl; + return; +*/ Log::info(" Optimizing the clipping values ..."); valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose); @@ -996,32 +1050,34 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, insertCompensationNodes(graphView, nbBits); Log::info(" Performing the Single-Shift approximation ..."); - performSingleShiftApproximation(graphView,noQuant); + performSingleShiftApproximation(graphView, noQuant); } if (verbose) printScalingFactors(graphView); - Log::info(" Resetting the scheduler ..."); + Log::info(" Reseting the scheduler ..."); SequentialScheduler scheduler(graphView); scheduler.resetScheduling(); + setupDataType(graphView, inputDataSet, initialDataType); if (useCuda) graphView->setBackend("cuda"); Log::info(" Network is quantized !"); + } -std::map<std::string, float> getWeightRanges(std::shared_ptr<GraphView> graphView) +std::map<std::string, double> getWeightRanges(std::shared_ptr<GraphView> graphView) { - std::map<std::string, float> weightRanges; + std::map<std::string, double> weightRanges; for (std::shared_ptr<Node> node : graphView->getNodes()) { if (isAffine(node)) { std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - float range = getTensorAbsoluteMax(weightTensor); + double range = getTensorAbsoluteMax(weightTensor); weightRanges.insert(std::make_pair(node->name(), range)); } } diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp index 89590cb..d2423d0 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/PTQ/PTQMetaOps.cpp @@ -28,9 +28,9 @@ #include "aidge/utils/Types.h" #include "aidge/operator/Identity.hpp" #include "aidge/data/Tensor.hpp" -std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float clip_max,const std::string& name) +std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,double clip_max,const std::string& name) { - std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1>{scalingFactor}); + std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1>{scalingFactor}); std::shared_ptr<Aidge::Node> mul_node = Aidge::Mul((!name.empty()) ? name + "_MulQuant" : ""); std::shared_ptr<Aidge::Node> producer_scaling_factor = addProducer(mul_node,1,{1},"ScalingFactor"); @@ -48,27 +48,30 @@ std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float return metaopNode; } -std::shared_ptr<Aidge::Node> Scaling(float scalingFactor,const std::string& name) +std::shared_ptr<Aidge::Node> Scaling(double scalingFactor,const std::string& name) { - std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1>{scalingFactor}); - - std::shared_ptr<Aidge::Node> mul_node = Aidge::Mul((!name.empty()) ? name + "_Scaling" : ""); - - std::shared_ptr<Aidge::Node> producer_scaling_factor = addProducer(mul_node,1,{1},"ScalingFactor"); - producer_scaling_factor->getOperator()->setOutput(0, ScalingFactorTensorAttached); - std::shared_ptr<Aidge::GraphView> graph = Aidge::Sequential({mul_node}); - std::shared_ptr<Aidge::GraphView> connectedGV = getConnectedGraphView(mul_node); - Aidge::NodePtr metaopNode = MetaOperator("Scaling",connectedGV,{},name); + std::shared_ptr<Aidge::Tensor> scalingFactorTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scalingFactor}); + + std::shared_ptr<Aidge::Node> mulNode = Aidge::Mul((!name.empty()) ? name + "_Scaling" : ""); + + std::shared_ptr<Aidge::Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); + + std::shared_ptr<Aidge::GraphView> graphView = Aidge::Sequential({mulNode}); + std::shared_ptr<Aidge::GraphView> connectedGraphView = getConnectedGraphView(mulNode); + + Aidge::NodePtr metaopNode = MetaOperator("Scaling", connectedGraphView, {}, name); + return metaopNode; } -bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, float newScalingFactor) +bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor) { if(MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") { AIDGE_ASSERT("Cannot use updatePTQMetaOpsScalingFactor on Node of type {}", MetaOpNode->type()); } - std::shared_ptr<Aidge::Tensor> newScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1>{newScalingFactor}); + std::shared_ptr<Aidge::Tensor> newScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1>{newScalingFactor}); std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(MetaOpNode->getOperator()); std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Nodes inside PTQ Metaop Node for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List) @@ -82,7 +85,7 @@ bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, float newScali AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type()); return false; } -float getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode) +double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode) { if(MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") { @@ -97,13 +100,13 @@ float getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode) { std::shared_ptr<Aidge::Data> MulInput1Data = node->input(1).first->getOperator()->getRawOutput(0); void* RawInputScalingFactor = std::static_pointer_cast<Aidge::Tensor>(MulInput1Data)->getImpl()->rawPtr(); - return (*(static_cast<float*>(RawInputScalingFactor))); + return (*(static_cast<double*>(RawInputScalingFactor))); } } AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type()); return -1; } -bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode,float min, float max) +bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode,double min, double max) { if(QuantizerNode->type() != "Quantizer") { -- GitLab From 3427ee5c620ae825ac70cf1dd6d250c922173f96 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Wed, 18 Dec 2024 12:10:45 +0100 Subject: [PATCH 04/21] Switch back to float32 except for Producer --- src/PTQ/PTQ.cpp | 70 ++++++++++++++++++------------------------------- 1 file changed, 25 insertions(+), 45 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index c3e02dc..ab71dae 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -52,7 +52,7 @@ bool isMerging(std::shared_ptr<Node> node) bool checkArchitecture(std::shared_ptr<GraphView> graphView) { - std::set<std::string> otherNodeTypes({"Flatten", "Softmax", "BatchNorm2D", "ReLU", "Producer"}); + const std::set<std::string> otherNodeTypes({"Flatten", "Softmax", "BatchNorm2D", "ReLU", "Producer"}); for (std::shared_ptr<Node> node : graphView->getNodes()) { @@ -76,10 +76,10 @@ static void fillTensor(std::shared_ptr<Tensor> tensor, double value) castedTensor[i] = value; } -static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) { // Get the tensor data pointer - double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); + float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) @@ -89,20 +89,20 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) static void roundTensor(std::shared_ptr<Tensor> tensor) { // Get the tensor data pointer - double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); + float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) castedTensor[i] = std::nearbyint(castedTensor[i]);//Round } -static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) { // Get the tensor data pointer and edit it - double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr()); + float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr()); // Get the tensor absolute max value - double maxValue = 0.0f; + float maxValue = 0.0f; for(std::size_t i = 0; i < tensor->size(); ++i) { if(std::fabs(castedTensor[i]) > maxValue) { maxValue = std::fabs(castedTensor[i]); @@ -186,7 +186,7 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView) removeFlatten(graphView); bool containsBatchNorm = false; - std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) if (node->type() == "BatchNorm") @@ -213,7 +213,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) { // TODO: double check this ... - std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -234,7 +234,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); std::shared_ptr<Node> residualNode = Scaling(1.0, residualNodeName); - residualNode->getOperator()->setDataType(DataType::Float64); //getDataType(parentNode) + residualNode->getOperator()->setDataType(DataType::Float32); //getDataType(parentNode) residualNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, residualNode, i, 0, 0); @@ -256,16 +256,16 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) { insertResidualNodes(graphView); - std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); + const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); for (std::shared_ptr<Node> parentNode : nodeSet) { if (isAffine(parentNode) || isMerging(parentNode)) { - std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); + const std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName); - scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + scalingNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode) scalingNode->getOperator()->setBackend("cpu"); if (parentNode->getChildren().size() > 0) @@ -273,7 +273,7 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) // SCALING NODE INSERTION // We always have one output from Affine and Add nodes, but possibly multiple childs - std::vector<std::shared_ptr<Node>> nextNodes = parentNode->getChildren(0); + const std::vector<std::shared_ptr<Node>> nextNodes = parentNode->getChildren(0); // For each node in nextNodes store the connexion index std::vector<int> inputIndices(nextNodes.size()); @@ -331,7 +331,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) { // CREATE THE ACCUMULATED RATIO MAP /////////////////////////////////////// - std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); std::map<std::string, double> accumulatedRatios; for (std::shared_ptr<Node> node : nodeVector) @@ -429,7 +429,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView // Gather ranges ... - std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); + const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); for (std::shared_ptr<Node> node : nodeSet) { if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) @@ -449,7 +449,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda) { std::map<std::string, double> valueRanges; - std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); + const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); // std::shared_ptr<Node> inputNode = getFirstNode(graphView); @@ -527,7 +527,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st // CREATE THE SCALING FACTOR MAP ////////////////////////////////////////// - std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); std::map<std::string, double> scalingFactors; @@ -628,7 +628,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap // ITERATE OVER THE GRAPH - std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -672,7 +672,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap if (isMerging(node)) { - std::vector<std::shared_ptr<Node>> parentNodes = node->getParents(); + const std::vector<std::shared_ptr<Node>> parentNodes = node->getParents(); bool allParentAreSigned = true; bool allParentAreUnsigned = true; @@ -767,7 +767,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ // ITERATE OVER THE GRAPH ///////////////////////////////////////////////// - std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -838,7 +838,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, -(signedMax + 1), signedMax, node->name()); - quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + quantizerNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode) quantizerNode->getOperator()->setBackend("cpu"); graphView->replace({node}, {quantizerNode}); @@ -871,7 +871,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u // XXX Use the signMap to increase the resolution when possible ... double signedMax = (1 << (nbBits - 1)) - 1; - std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -890,7 +890,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); std::shared_ptr<Node> mulNode = Mul(mulNodeName); - mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) + mulNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode) mulNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, mulNode, 0, 0, 0); @@ -925,7 +925,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant) { - std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -979,31 +979,12 @@ std::string deduceBackend(std::shared_ptr<GraphView> graphView) } */ -static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType dataType) -{ - graphView->setDataType(dataType); - - for (auto inputNode : graphView->inputNodes()) { - auto op = std::static_pointer_cast<OperatorTensor>(inputNode->getOperator()); - auto inputTensor = op->getInput(0); - if (inputTensor) - inputTensor->setDataType(dataType); - } - - for (auto tensor : inputDataSet) - tensor->setDataType(dataType); -} - - void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) { Log::info(" === QUANT PTQ 0.2.21 === "); graphView->setBackend("cpu"); - DataType initialDataType = (inputDataSet[0])->dataType(); - setupDataType(graphView, inputDataSet, DataType::Float64); - if (!checkArchitecture(graphView)) return; @@ -1060,7 +1041,6 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, SequentialScheduler scheduler(graphView); scheduler.resetScheduling(); - setupDataType(graphView, inputDataSet, initialDataType); if (useCuda) graphView->setBackend("cuda"); -- GitLab From 8cd6ed29c7bce21623fbc033794c4724ad50986e Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Wed, 18 Dec 2024 14:12:21 +0000 Subject: [PATCH 05/21] Revert "Switch back to float32 except for Producer" This reverts commit 57239cf31424a7c5a8f0a5f5b6db2197d75655f4 --- src/PTQ/PTQ.cpp | 70 +++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index ab71dae..c3e02dc 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -52,7 +52,7 @@ bool isMerging(std::shared_ptr<Node> node) bool checkArchitecture(std::shared_ptr<GraphView> graphView) { - const std::set<std::string> otherNodeTypes({"Flatten", "Softmax", "BatchNorm2D", "ReLU", "Producer"}); + std::set<std::string> otherNodeTypes({"Flatten", "Softmax", "BatchNorm2D", "ReLU", "Producer"}); for (std::shared_ptr<Node> node : graphView->getNodes()) { @@ -76,10 +76,10 @@ static void fillTensor(std::shared_ptr<Tensor> tensor, double value) castedTensor[i] = value; } -static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) { // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) @@ -89,20 +89,20 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) static void roundTensor(std::shared_ptr<Tensor> tensor) { // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) castedTensor[i] = std::nearbyint(castedTensor[i]);//Round } -static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) { // Get the tensor data pointer and edit it - float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr()); // Get the tensor absolute max value - float maxValue = 0.0f; + double maxValue = 0.0f; for(std::size_t i = 0; i < tensor->size(); ++i) { if(std::fabs(castedTensor[i]) > maxValue) { maxValue = std::fabs(castedTensor[i]); @@ -186,7 +186,7 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView) removeFlatten(graphView); bool containsBatchNorm = false; - const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) if (node->type() == "BatchNorm") @@ -213,7 +213,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) { // TODO: double check this ... - const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -234,7 +234,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); std::shared_ptr<Node> residualNode = Scaling(1.0, residualNodeName); - residualNode->getOperator()->setDataType(DataType::Float32); //getDataType(parentNode) + residualNode->getOperator()->setDataType(DataType::Float64); //getDataType(parentNode) residualNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, residualNode, i, 0, 0); @@ -256,16 +256,16 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) { insertResidualNodes(graphView); - const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); + std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); for (std::shared_ptr<Node> parentNode : nodeSet) { if (isAffine(parentNode) || isMerging(parentNode)) { - const std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); + std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName); - scalingNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode) + scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) scalingNode->getOperator()->setBackend("cpu"); if (parentNode->getChildren().size() > 0) @@ -273,7 +273,7 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) // SCALING NODE INSERTION // We always have one output from Affine and Add nodes, but possibly multiple childs - const std::vector<std::shared_ptr<Node>> nextNodes = parentNode->getChildren(0); + std::vector<std::shared_ptr<Node>> nextNodes = parentNode->getChildren(0); // For each node in nextNodes store the connexion index std::vector<int> inputIndices(nextNodes.size()); @@ -331,7 +331,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) { // CREATE THE ACCUMULATED RATIO MAP /////////////////////////////////////// - const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); std::map<std::string, double> accumulatedRatios; for (std::shared_ptr<Node> node : nodeVector) @@ -429,7 +429,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView // Gather ranges ... - const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); + std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); for (std::shared_ptr<Node> node : nodeSet) { if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) @@ -449,7 +449,7 @@ std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda) { std::map<std::string, double> valueRanges; - const std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); + std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); // std::shared_ptr<Node> inputNode = getFirstNode(graphView); @@ -527,7 +527,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st // CREATE THE SCALING FACTOR MAP ////////////////////////////////////////// - const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); std::map<std::string, double> scalingFactors; @@ -628,7 +628,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap // ITERATE OVER THE GRAPH - const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -672,7 +672,7 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap if (isMerging(node)) { - const std::vector<std::shared_ptr<Node>> parentNodes = node->getParents(); + std::vector<std::shared_ptr<Node>> parentNodes = node->getParents(); bool allParentAreSigned = true; bool allParentAreUnsigned = true; @@ -767,7 +767,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ // ITERATE OVER THE GRAPH ///////////////////////////////////////////////// - const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -838,7 +838,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, -(signedMax + 1), signedMax, node->name()); - quantizerNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode) + quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) quantizerNode->getOperator()->setBackend("cpu"); graphView->replace({node}, {quantizerNode}); @@ -871,7 +871,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u // XXX Use the signMap to increase the resolution when possible ... double signedMax = (1 << (nbBits - 1)) - 1; - const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -890,7 +890,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); std::shared_ptr<Node> mulNode = Mul(mulNodeName); - mulNode->getOperator()->setDataType(DataType::Float32); // getDataType(parentNode) + mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) mulNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, mulNode, 0, 0, 0); @@ -925,7 +925,7 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant) { - const std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { @@ -979,12 +979,31 @@ std::string deduceBackend(std::shared_ptr<GraphView> graphView) } */ +static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType dataType) +{ + graphView->setDataType(dataType); + + for (auto inputNode : graphView->inputNodes()) { + auto op = std::static_pointer_cast<OperatorTensor>(inputNode->getOperator()); + auto inputTensor = op->getInput(0); + if (inputTensor) + inputTensor->setDataType(dataType); + } + + for (auto tensor : inputDataSet) + tensor->setDataType(dataType); +} + + void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) { Log::info(" === QUANT PTQ 0.2.21 === "); graphView->setBackend("cpu"); + DataType initialDataType = (inputDataSet[0])->dataType(); + setupDataType(graphView, inputDataSet, DataType::Float64); + if (!checkArchitecture(graphView)) return; @@ -1041,6 +1060,7 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, SequentialScheduler scheduler(graphView); scheduler.resetScheduling(); + setupDataType(graphView, inputDataSet, initialDataType); if (useCuda) graphView->setBackend("cuda"); -- GitLab From 02b2a7fae21ae7120dbe3a9492afb499534a21c0 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 20 Dec 2024 12:33:50 +0000 Subject: [PATCH 06/21] fix the scaling factor getter --- src/PTQ/PTQMetaOps.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp index d2423d0..69b5dd4 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/PTQ/PTQMetaOps.cpp @@ -28,6 +28,9 @@ #include "aidge/utils/Types.h" #include "aidge/operator/Identity.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/operator/OperatorTensor.hpp" + + std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,double clip_max,const std::string& name) { std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1>{scalingFactor}); @@ -96,12 +99,17 @@ double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode) std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Nodes inside PTQ Metaop Node for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List) { - if(node->type() == "Mul") - { - std::shared_ptr<Aidge::Data> MulInput1Data = node->input(1).first->getOperator()->getRawOutput(0); - void* RawInputScalingFactor = std::static_pointer_cast<Aidge::Tensor>(MulInput1Data)->getImpl()->rawPtr(); - return (*(static_cast<double*>(RawInputScalingFactor))); - } + if(node->type() == "Mul") + { + //auto tensor = std::static_pointer_cast<Aidge::OperatorTensor>(node->getOperator())->getInput(1); + //bool useFloat = tensor->dataType() == Aidge::DataType::Float32; + //return useFloat ? tensor->get<float>(0) : tensor->get<double>(0); + + auto tensor = std::static_pointer_cast<Aidge::OperatorTensor>(node->getOperator())->getInput(1); + std::shared_ptr<Aidge::Tensor> fallback; + const Aidge::Tensor& scalingFactorTensor = tensor->refCastFrom(fallback, Aidge::DataType::Float64, "cpu"); + return scalingFactorTensor.get<double>(0); + } } AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type()); return -1; -- GitLab From 3b594c4ceafa025aa935fbd04f2667eaf876df97 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 20 Dec 2024 12:35:22 +0000 Subject: [PATCH 07/21] fix the histogram bin computation --- src/PTQ/Clipping.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp index f8765f3..57ad7a8 100644 --- a/src/PTQ/Clipping.cpp +++ b/src/PTQ/Clipping.cpp @@ -132,9 +132,10 @@ double computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, double clippingErrors.push_back(accumulatedError); } - std::vector<double>::iterator it = std::min_element(clippingErrors.begin(), clippingErrors.end()); - double bestClipping = static_cast<double> (std::distance(clippingErrors.begin(), it)) / static_cast<double> (nbIter); - + std::vector<double>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end()); + int bestBin = static_cast<int> (std::distance(clippingErrors.begin(), it)) + 1; + double bestClipping = static_cast<double> (bestBin) / static_cast<double> (nbIter); + return bestClipping; } @@ -199,7 +200,8 @@ double computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) } std::vector<double>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end()); - double bestClipping = static_cast<double> (std::distance(clippingErrors.begin(), it)) / static_cast<double> (nbIter); + int bestBin = static_cast<int> (std::distance(clippingErrors.begin(), it)) + 1; + double bestClipping = (static_cast<double> (bestBin)) / static_cast<double> (nbIter); return bestClipping; } -- GitLab From ee28b6fcef9d841e3636733f245894a37efbd4f1 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 20 Dec 2024 12:39:48 +0000 Subject: [PATCH 08/21] remove commented code --- src/PTQ/PTQ.cpp | 58 ++++++++++++++++++++---------------------- src/PTQ/PTQMetaOps.cpp | 4 --- 2 files changed, 28 insertions(+), 34 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index c3e02dc..3cec325 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -929,7 +929,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool for (std::shared_ptr<Node> node : nodeVector) { - //Use A meatoperator of type Scaling of MulCompensation instead + // Use A meatoperator of type Scaling of MulCompensation instead if (isAffine(node) || (node->type() == "Mul")) { std::shared_ptr<Node> scalingNode = (*node->getChildren().begin()); @@ -962,23 +962,13 @@ static void printScalingFactors(std::shared_ptr<GraphView> graphView) { Log::info(" === SCALING FACTORS === "); for (auto node : retrieveNodeVector(graphView)) - if (node->type() == "Scaling") + if (node->type() == "Scaling" || node->type() == "Quantizer") { double factor = getScalingFactor(node); Log::info(" {:.6f} ({})", factor, node->name()); } } -/* -std::string deduceBackend(std::shared_ptr<GraphView> graphView) -{ - std::string rootNodeBackend = graphView->getRootNode()->backend(); - for (auto node : graphView->getNodes()) - if (node->backend() != rootNodeBackend) - log::warn(" Multiple backend detected, setting all nodes to {}") -} -*/ - static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType dataType) { graphView->setDataType(dataType); @@ -994,6 +984,17 @@ static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std: tensor->setDataType(dataType); } +static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges) +{ + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + scheduler.generateScheduling(); + + auto scheduling = scheduler.getStaticScheduling(); + for (auto node : scheduling) + if (node->type() == "Scaling") + std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl; +} void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) { @@ -1021,23 +1022,15 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, Log::info(" Computing the value ranges ..."); std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda); - // XXX -/* - SequentialScheduler scheduler(graphView); - scheduler.resetScheduling(); - scheduler.generateScheduling(); - - auto scheduling = scheduler.getStaticScheduling(); - for (auto node : scheduling) - if (node->type() == "Scaling") - std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl; + //std::cout << " === RANGES (BEFORE ADJUST) ===" << std::endl; + //printRanges(graphView, valueRanges); - std::cout << " RETURN " << std::endl; - return; -*/ Log::info(" Optimizing the clipping values ..."); valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose); + //std::cout << " === RANGES (AFTER ADJUST) ===" << std::endl; + //printRanges(graphView, valueRanges); + Log::info(" Normalizing the activations ..."); normalizeActivations(graphView, valueRanges); @@ -1052,20 +1045,25 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, Log::info(" Performing the Single-Shift approximation ..."); performSingleShiftApproximation(graphView, noQuant); } - + if (verbose) printScalingFactors(graphView); - Log::info(" Reseting the scheduler ..."); - SequentialScheduler scheduler(graphView); - scheduler.resetScheduling(); + //std::cout << " === SCALINGS (BEFORE CAST) ===" << std::endl; + //printScalingFactors(graphView); setupDataType(graphView, inputDataSet, initialDataType); if (useCuda) graphView->setBackend("cuda"); - Log::info(" Network is quantized !"); + //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; + //printScalingFactors(graphView); + + Log::info(" Reseting the scheduler ..."); + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + Log::info(" Network is quantized !"); } std::map<std::string, double> getWeightRanges(std::shared_ptr<GraphView> graphView) diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp index 69b5dd4..d2bc184 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/PTQ/PTQMetaOps.cpp @@ -101,10 +101,6 @@ double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode) { if(node->type() == "Mul") { - //auto tensor = std::static_pointer_cast<Aidge::OperatorTensor>(node->getOperator())->getInput(1); - //bool useFloat = tensor->dataType() == Aidge::DataType::Float32; - //return useFloat ? tensor->get<float>(0) : tensor->get<double>(0); - auto tensor = std::static_pointer_cast<Aidge::OperatorTensor>(node->getOperator())->getInput(1); std::shared_ptr<Aidge::Tensor> fallback; const Aidge::Tensor& scalingFactorTensor = tensor->refCastFrom(fallback, Aidge::DataType::Float64, "cpu"); -- GitLab From 943e9cedd64daa5cee3fa2ddaf29eb51c90af03c Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 20 Dec 2024 16:03:48 +0000 Subject: [PATCH 09/21] improve code quality --- include/aidge/quantization/PTQ/PTQMetaOps.hpp | 12 +- python_binding/pybind_PTQ.cpp | 2 +- src/PTQ/PTQ.cpp | 42 ++--- src/PTQ/PTQMetaOps.cpp | 166 ++++++++++-------- 4 files changed, 123 insertions(+), 99 deletions(-) diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/quantization/PTQ/PTQMetaOps.hpp index 29bb7f2..62fac87 100644 --- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp +++ b/include/aidge/quantization/PTQ/PTQMetaOps.hpp @@ -25,6 +25,8 @@ #include "aidge/graph/OpArgs.hpp" // Sequential #include "aidge/operator/MetaOperator.hpp" +namespace Aidge { + /// @brief Quantizer acts as a meta-operator to handle scaling operations in the PTQ, replacing the Scaling Operator. /// This operator is composed of a sequence of [Mul] -> [Clip] -> [Round] operations. /// @@ -33,7 +35,7 @@ /// @param clip_max The maximum value for the clip operation. /// @param name The name of the meta-operator node created. /// @return A shared pointer to an instance of the meta-operator node. -std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,double clip_max,const std::string& name); +std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name); /// @brief The purpose of Scaling is to encapsulate the Mul operator and tag it as a PTQ node rather than a regular Mul operator. /// Therefore, this meta-operator consists solely of a [Mul] operation. @@ -41,7 +43,7 @@ std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,dou /// @param scalingFactor The scaling factor to apply to the input (a scalar to multiply the input with). /// @param name The name of the meta-operator node created. /// @return A shared pointer to an instance of the scaling node. -std::shared_ptr<Aidge::Node> Scaling(double scalingFactor,const std::string& name = ""); +std::shared_ptr<Aidge::Node> Scaling(double scalingFactor, const std::string& name = ""); /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter. /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation. @@ -50,7 +52,7 @@ std::shared_ptr<Aidge::Node> Scaling(double scalingFactor,const std::string& nam /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor will be updated. /// @param newScalingFactor The new scaling factor to apply to the meta-operator node. /// @return True if the scaling factor was successfully updated, false if the operation failed (e.g., if MetaOpNode is null or incompatible). -bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor); +void updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor); /// @brief Retrieves the current scaling factor of a PTQ meta-operator node. /// This function returns the scaling factor associated with the specified PTQ meta-operator node, @@ -69,6 +71,8 @@ double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode); /// @param min The minimum value for the clip range. Values below this will be clipped to this minimum. /// @param max The maximum value for the clip range. Values above this will be clipped to this maximum. /// @return True if the clip range was successfully set, false if the operation failed (e.g., if QuantizerNode is null). -bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, double min, double max); +void setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, double min, double max); + +} #endif /* AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ */ diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index ed26325..b5193bd 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -220,7 +220,7 @@ void init_PTQ(py::module &m) { :type network: :py:class:`aidge_core.GraphView` )mydelimiter"); - m.def("prepare_network", &prepareNetwork, py::arg("network"), "prepare the network fo the PTQ"); + m.def("prepare_network", &prepareNetwork, py::arg("network"), "prepare the network for the PTQ"); } diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 3cec325..54b95cb 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -407,8 +407,9 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); - double scaling_factor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode,scaling_factor / rescaling); + double currScalingFactor = getScalingFactor(scalingNode); + updateScalingFactor(scalingNode, currScalingFactor / rescaling); + accumulatedRatios[mergingNode->name()] /= rescaling; // optional ... } } @@ -565,8 +566,8 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st // ValueRanges must contains all the scaling nodes !!! double scalingFactor = valueRanges[node->name()]; - double scaling_factor = getScalingFactor(node); - updateScalingFactor(node, (scaling_factor) / (scalingFactor / prevScalingFactor)); + double currScalingFactor = getScalingFactor(node); + updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor)); scalingFactors[node->name()] = scalingFactor; @@ -608,8 +609,9 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name()); - double scaling_factor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode, scaling_factor * rescaling); + + double currScalingFactor = getScalingFactor(scalingNode); + updateScalingFactor(scalingNode, currScalingFactor * rescaling); } } } @@ -807,8 +809,9 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ rescaling *= outputIsUnsigned ? unsignedMax : signedMax; std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - double scaling_factor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode, scaling_factor * rescaling); + + double currScalingFactor = getScalingFactor(scalingNode); + updateScalingFactor(scalingNode, currScalingFactor * rescaling); } if (isMerging(node)) @@ -823,8 +826,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - double scaling_factor = getScalingFactor(scalingNode); // XXX bad naming - updateScalingFactor(scalingNode, scaling_factor * rescaling); + double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming + updateScalingFactor(scalingNode, currScalingFactor * rescaling); } // Handle the Scaling Nodes ... @@ -833,11 +836,9 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ { if (!noQuant) { - // Replacement of Scaling Node by Quantizer - double currentSF = getScalingFactor(node); // XXX bad naming ! - - std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, -(signedMax + 1), signedMax, node->name()); + // Replace the Scaling Node by Quantizer + std::shared_ptr<Node> quantizerNode = Quantizer(getScalingFactor(node), -(signedMax + 1), signedMax, node->name()); quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) quantizerNode->getOperator()->setBackend("cpu"); @@ -853,8 +854,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ rescaling /= inputIsUnsigned ? unsignedMax : signedMax; rescaling *= outputIsUnsigned ? unsignedMax : signedMax; - double scalingFactor = getScalingFactor(quantizerNode); - updateScalingFactor(quantizerNode,scalingFactor * rescaling); + double currScalingFactor = getScalingFactor(quantizerNode); + updateScalingFactor(quantizerNode, currScalingFactor * rescaling); if(outputIsUnsigned) { @@ -913,8 +914,9 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u // rescale the coeffs and edit scaling factor fillTensor(coeffTensor, signedMax); - double sf = getScalingFactor(node); // XXX bad naming ! - updateScalingFactor(node, sf/signedMax); + + double currScalingFactor = getScalingFactor(node); // XXX bad naming ! + updateScalingFactor(node, currScalingFactor / signedMax); // TODO : double check this !!! //std::cout << getTensorAbsoluteMax(coeffTensor) << std::endl; @@ -964,8 +966,8 @@ static void printScalingFactors(std::shared_ptr<GraphView> graphView) for (auto node : retrieveNodeVector(graphView)) if (node->type() == "Scaling" || node->type() == "Quantizer") { - double factor = getScalingFactor(node); - Log::info(" {:.6f} ({})", factor, node->name()); + double scalingFactor = getScalingFactor(node); + Log::info(" {:.6f} ({})", scalingFactor, node->name()); } } diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp index d2bc184..152a3b0 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/PTQ/PTQMetaOps.cpp @@ -29,106 +29,124 @@ #include "aidge/operator/Identity.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/OperatorTensor.hpp" +#include "aidge/utils/Log.hpp" -std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clip_min,double clip_max,const std::string& name) +namespace Aidge { - std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1>{scalingFactor}); - std::shared_ptr<Aidge::Node> mul_node = Aidge::Mul((!name.empty()) ? name + "_MulQuant" : ""); - std::shared_ptr<Aidge::Node> producer_scaling_factor = addProducer(mul_node,1,{1},"ScalingFactor"); - producer_scaling_factor ->getOperator()->setOutput(0,ScalingFactorTensorAttached); - - std::shared_ptr<Aidge::Node> clip_node = Aidge::Clip((!name.empty()) ? name + "_ClipQuant" : "",clip_min,clip_max); +std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name) +{ + // create the nodes + + std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_MulQuant" : ""); + std::shared_ptr<Node> roundNode = Round((!name.empty()) ? name + "_RoundQuant" : ""); + std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_ClipQuant" : "", clipMin, clipMax); + + // connect the scaling factor producer + + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); + std::shared_ptr<Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); - std::shared_ptr<Aidge::GraphView> graph = Aidge::Sequential({ - mul_node, - Aidge::Round((!name.empty()) ? name + "_RoundQuant" : ""), - clip_node}); + // create the metaop graph + + std::shared_ptr<GraphView> graphView = Sequential({mulNode, roundNode, clipNode}); + std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ??? + + // return the metaop + + std::shared_ptr<Node> metaopNode = MetaOperator("Quantizer", connectedGraphView, {}, name); // XXX alternative prototype - std::shared_ptr<Aidge::GraphView> connectedGV = getConnectedGraphView(mul_node); - std::shared_ptr<Aidge::Node> metaopNode = MetaOperator("Quantizer",connectedGV,{},name); return metaopNode; } -std::shared_ptr<Aidge::Node> Scaling(double scalingFactor,const std::string& name) +std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name) { - std::shared_ptr<Aidge::Tensor> scalingFactorTensor = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1> {scalingFactor}); + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); - std::shared_ptr<Aidge::Node> mulNode = Aidge::Mul((!name.empty()) ? name + "_Scaling" : ""); + std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_Scaling" : ""); - std::shared_ptr<Aidge::Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); + std::shared_ptr<Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); - std::shared_ptr<Aidge::GraphView> graphView = Aidge::Sequential({mulNode}); - std::shared_ptr<Aidge::GraphView> connectedGraphView = getConnectedGraphView(mulNode); + std::shared_ptr<GraphView> graphView = Sequential({mulNode}); + std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); - Aidge::NodePtr metaopNode = MetaOperator("Scaling", connectedGraphView, {}, name); + NodePtr metaopNode = MetaOperator("Scaling", connectedGraphView, {}, name); return metaopNode; } -bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor) +static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType) { - if(MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") - { - AIDGE_ASSERT("Cannot use updatePTQMetaOpsScalingFactor on Node of type {}", MetaOpNode->type()); - } - std::shared_ptr<Aidge::Tensor> newScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<double, 1>{newScalingFactor}); - std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(MetaOpNode->getOperator()); - std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Nodes inside PTQ Metaop Node - for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List) - { - if(node->type() == "Mul") - { - node->input(1).first->getOperator()->setOutput(0, newScalingFactorTensorAttached); - return true; - } - } - AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type()); - return false; + std::shared_ptr<Node> mulNode = nullptr; + for(std::shared_ptr<Node> node : graphView->getNodes()) + if (node->type() == nodeType) + mulNode = node; + + return mulNode; } -double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode) + +void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) { - if(MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") - { - AIDGE_ASSERT("Cannot use getPTQMetaOpsScalingFactor on Node of type {}",MetaOpNode->type()); - return -1; + if(metaOpNode->type() != "Scaling" && metaOpNode->type() != "Quantizer") + Log::warn(" Cannot update the scaling factor on Node of type {}", metaOpNode->type()); + + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); + + std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(metaOpNode->getOperator()); + + std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); + + if (!mulNode) + Log::warn(" Invalid PTQ MetaOperator, no Mul node found inside ! "); + + mulNode->input(1).first->getOperator()->setOutput(0, scalingFactorTensor); +} + +double getScalingFactor(std::shared_ptr<Node> MetaOpNode) +{ + if (MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") { + Log::warn(" Cannot get the scaling factor on Node of type {}", MetaOpNode->type()); + return 0; } - std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(MetaOpNode->getOperator()); - std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Nodes inside PTQ Metaop Node - for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List) - { - if(node->type() == "Mul") - { - auto tensor = std::static_pointer_cast<Aidge::OperatorTensor>(node->getOperator())->getInput(1); - std::shared_ptr<Aidge::Tensor> fallback; - const Aidge::Tensor& scalingFactorTensor = tensor->refCastFrom(fallback, Aidge::DataType::Float64, "cpu"); - return scalingFactorTensor.get<double>(0); - } + + std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(MetaOpNode->getOperator()); + + std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); + + if (!mulNode) { + Log::warn(" Invalid PTQ MetaOperator, no Mul found inside node of type {}", MetaOpNode->type()); + return 0; } - AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type()); - return -1; + + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + + return localTensor.get<double>(0); } -bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode,double min, double max) + + +void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max) { - if(QuantizerNode->type() != "Quantizer") - { - AIDGE_ASSERT("Cannot use setQuantizerClipRange on Node of type {}",QuantizerNode->type()); - return false; + if (quantizerNode->type() != "Quantizer") { + Log::warn(" Cannot set the clipping range on Node of type {}", quantizerNode->type()); + return; } - std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(QuantizerNode->getOperator()); - std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Node inside - for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List) - { - if(node->type() == "Clip") - { - std::shared_ptr<Aidge::Clip_Op> Clip_Node_Op = std::static_pointer_cast<Aidge::Clip_Op>(node->getOperator()); - Clip_Node_Op->max() = max; - Clip_Node_Op->min() = min; - return true; - } + + std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op> (quantizerNode->getOperator()); + + std::shared_ptr<Node> clipNode = getSubNode(metaOp->getMicroGraph(), "Clip"); + + if (!clipNode) { + Log::warn(" Invalid PTQ MetaOperator, no Clip found inside node of type {}", quantizerNode->type()); + return; } - AIDGE_ASSERT("Invalid MetaOperator Quantizer, no clip node found inside Node of type {}",QuantizerNode->type()); - return false; + + std::shared_ptr<Clip_Op> clipOp = std::static_pointer_cast<Clip_Op>(clipNode->getOperator()); + clipOp->max() = max; + clipOp->min() = min; +} } \ No newline at end of file -- GitLab From c6516ff59ee7aaad2063dd5be14bbd5fa1916158 Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Fri, 3 Jan 2025 16:10:53 +0000 Subject: [PATCH 10/21] set the LSQ op backward kernels to gradient accumulation mode --- .../aidge/backend/cpu/operator/LSQImpl_kernels.hpp | 12 ++++++------ src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu | 13 ++++++++----- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp b/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp index ddb8209..1ed05e2 100644 --- a/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp @@ -67,16 +67,16 @@ void LSQImpl_cpu_backward_kernel(const std::size_t inputLength, const GI fullPrecScale_4 = input[4*i+3] / stepSize[0]; /*****************Features Gradient Computation********************/ // STE method is simply applied - grad_input[4*i] = grad_output[4*i]*((fullPrecScale_1 <= static_cast<GI>(range.first)) ? GI(0.0) : + grad_input[4*i] += grad_output[4*i]*((fullPrecScale_1 <= static_cast<GI>(range.first)) ? GI(0.0) : (fullPrecScale_1 >= static_cast<GI>(range.second)) ? GI(0.0) : GI(1.0)); - grad_input[4*i+1] = grad_output[4*i+1]*((fullPrecScale_2 <= static_cast<GI>(range.first)) ? GI(0.0) : + grad_input[4*i+1] += grad_output[4*i+1]*((fullPrecScale_2 <= static_cast<GI>(range.first)) ? GI(0.0) : (fullPrecScale_2 >= static_cast<GI>(range.second)) ? GI(0.0) : GI(1.0)); - grad_input[4*i+2] = grad_output[4*i+2]*((fullPrecScale_3 <= static_cast<GI>(range.first)) ? GI(0.0) : + grad_input[4*i+2] += grad_output[4*i+2]*((fullPrecScale_3 <= static_cast<GI>(range.first)) ? GI(0.0) : (fullPrecScale_3 >= static_cast<GI>(range.second)) ? GI(0.0) : GI(1.0)); - grad_input[4*i+3] = grad_output[4*i+3]*((fullPrecScale_4 <= static_cast<GI>(range.first)) ? GI(0.0) : + grad_input[4*i+3] += grad_output[4*i+3]*((fullPrecScale_4 <= static_cast<GI>(range.first)) ? GI(0.0) : (fullPrecScale_4 >= static_cast<GI>(range.second)) ? GI(0.0) : GI(1.0)); @@ -105,7 +105,7 @@ void LSQImpl_cpu_backward_kernel(const std::size_t inputLength, // Process remaining for(unsigned int i=inputLength-inputLength%4; i<inputLength; ++i) { const GI fullPrecScale = input[i] / stepSize[0]; - grad_input[i] = grad_output[i]*((fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) : + grad_input[i] += grad_output[i]*((fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) : (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) : GI(1.0)); GI qData = fullPrecScale; @@ -117,7 +117,7 @@ void LSQImpl_cpu_backward_kernel(const std::size_t inputLength, const GI gradScaleFactor = static_cast<GI>(1.0f / std::sqrt(inputLength * range.second)); // 3rd: Multiply Step Size gradient with scale factor - grad_stepSize[0] = diffStepSize * gradScaleFactor; + grad_stepSize[0] += diffStepSize * gradScaleFactor; } diff --git a/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu b/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu index 0d54909..96065e4 100644 --- a/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu +++ b/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu @@ -84,10 +84,11 @@ __global__ void LSQImpl_cuda_backward_kernel_(const std::size_t inputLength, const GI fullPrecScale = input[i] / stepSize[0]; /*****************************Data/Weights Gradient Computation************************/ - // STE method is simply apply: - grad_input[i] = grad_output[i]*( (fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) : - (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) : - GI(1.0)); + // STE method is simply applied : + // (we accumulate the gradient instead of replacing it) + grad_input[i] += grad_output[i] * ((fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) : + (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) : + GI(1.0)); /*****************************Step Size Gradient Computation*************************/ GI qData = fullPrecScale; @@ -142,7 +143,9 @@ void Aidge::LSQImpl_cuda_backward_kernel(const std::size_t inputLength, // for simplicity and foolproof-ness thrust::device_ptr<GI> grad_workspacePtr(grad_workspace); thrust::device_ptr<GI> grad_stepSizePtr(grad_stepSize); - grad_stepSizePtr[0] = thrust::reduce(grad_workspacePtr, grad_workspacePtr + inputLength, GI(0.0)); + + // We accumulate the stepSize gradient instead of replacing it + grad_stepSizePtr[0] += thrust::reduce(grad_workspacePtr, grad_workspacePtr + inputLength, GI(0.0)); //printf(" step grad = %f \n", (float) grad_stepSizePtr[0]); -- GitLab From 2517cd728c84f4d771c9adc0d5663fe00db4ea1d Mon Sep 17 00:00:00 2001 From: bhalimi <benjamin.halimi@cea.fr> Date: Wed, 15 Jan 2025 13:18:27 +0000 Subject: [PATCH 11/21] set the CLE data types to double --- include/aidge/quantization/PTQ/CLE.hpp | 2 +- src/PTQ/CLE.cpp | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/include/aidge/quantization/PTQ/CLE.hpp b/include/aidge/quantization/PTQ/CLE.hpp index d94b6e9..77eaf7f 100644 --- a/include/aidge/quantization/PTQ/CLE.hpp +++ b/include/aidge/quantization/PTQ/CLE.hpp @@ -30,7 +30,7 @@ namespace Aidge * @param graphView The GraphView to process. * @param targetDelta the stopping criterion (typical value : 0.01) */ - void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDelta = 0.01); + void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta = 0.01); } diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 1d5ccc7..2c81815 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -32,23 +32,23 @@ static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node) return std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2); } -static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) { // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast<double *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) castedTensor[i] *= scaling; } -static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) { // Get the tensor data pointer and edit it - float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast<double*> (tensor->getImpl()->rawPtr()); // Get the tensor absolute max value - float maxValue = 0.0f; + double maxValue = 0.0f; for(std::size_t i = 0; i < tensor->size(); ++i) { if(std::fabs(castedTensor[i]) > maxValue) { maxValue = std::fabs(castedTensor[i]); @@ -57,7 +57,7 @@ static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) return maxValue; } -void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDelta) +void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta) { std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); @@ -79,7 +79,7 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDe if (isAffine(node)) affineNodeVector.push_back(node); - float maxRangeDelta; + double maxRangeDelta; do { @@ -94,18 +94,18 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDe std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; - float r1 = getTensorAbsoluteMax(getWeightTensor(n1)); - float r2 = getTensorAbsoluteMax(getWeightTensor(n2)); + double r1 = getTensorAbsoluteMax(getWeightTensor(n1)); + double r2 = getTensorAbsoluteMax(getWeightTensor(n2)); - float s1 = std::sqrt(r1 * r2) / r1; - float s2 = std::sqrt(r1 * r2) / r2; + double s1 = std::sqrt(r1 * r2) / r1; + double s2 = std::sqrt(r1 * r2) / r2; rescaleTensor(getWeightTensor(n1), s1); rescaleTensor(getWeightTensor(n2), s2); rescaleTensor(getBiasTensor(n1), s1); - float rangeDelta = std::abs(r1 - r2); + double rangeDelta = std::abs(r1 - r2); if (rangeDelta > maxRangeDelta) maxRangeDelta = rangeDelta; } -- GitLab From 353cb42c27f908f1e78fb100bd793f43bba77c47 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Fri, 17 Jan 2025 10:54:17 +0100 Subject: [PATCH 12/21] Hotfix --- include/aidge/operator/LSQ.hpp | 2 +- src/PTQ/PTQMetaOps.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/aidge/operator/LSQ.hpp b/include/aidge/operator/LSQ.hpp index 10ceb81..eb266bc 100644 --- a/include/aidge/operator/LSQ.hpp +++ b/include/aidge/operator/LSQ.hpp @@ -95,7 +95,7 @@ public: */ inline std::shared_ptr<Node> LSQ(const std::pair<int, int>& range = {0, 255}, const std::string& name = "") { auto lsq = std::make_shared<Node>(std::make_shared<LSQ_Op>(range), name); - addProducer(lsq, 1, {1}, "ss"); + addProducer<1>(lsq, 1, {1}, "ss"); return lsq; } } diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp index 152a3b0..527d853 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/PTQ/PTQMetaOps.cpp @@ -46,7 +46,7 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli // connect the scaling factor producer std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); - std::shared_ptr<Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); + std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); // create the metaop graph @@ -67,7 +67,7 @@ std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name) std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_Scaling" : ""); - std::shared_ptr<Node> scalingFactorProducer = addProducer(mulNode, 1, {1}, "ScalingFactor"); + std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); std::shared_ptr<GraphView> graphView = Sequential({mulNode}); -- GitLab From b63d13b327ee317f885f20de12f4e313f807b19d Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Sun, 19 Jan 2025 14:07:53 +0100 Subject: [PATCH 13/21] Hotfix: removed std::cout --- src/PTQ/PTQ.cpp | 4 ++-- src/QAT/QAT_FixedQ.cpp | 6 +++--- src/QAT/QAT_LSQ.cpp | 4 ++-- src/recipes/QuantRecipes.cpp | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 54b95cb..0e26313 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -995,7 +995,7 @@ static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::stri auto scheduling = scheduler.getStaticScheduling(); for (auto node : scheduling) if (node->type() == "Scaling") - std::cout << node->name() << " range = " << valueRanges[node->name()] << std::endl; + fmt::println("{} range = {}", node->name(), valueRanges[node->name()]); } void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) @@ -1098,7 +1098,7 @@ void clearBiases(std::shared_ptr<GraphView> graphView) void devPTQ(std::shared_ptr<GraphView> graphView) { for (std::shared_ptr<Node> node : graphView->getNodes()) - std::cout << " UUU : " << node->name() << std::endl; + fmt::println(" UUU : {}", node->name()); } } diff --git a/src/QAT/QAT_FixedQ.cpp b/src/QAT/QAT_FixedQ.cpp index d22074f..9160b4a 100644 --- a/src/QAT/QAT_FixedQ.cpp +++ b/src/QAT/QAT_FixedQ.cpp @@ -91,7 +91,7 @@ static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator()); float inputStd = getTensorStd(op->getInput(0)); inputStats.insert(std::make_pair(node->name(), inputStd)); - std::cout << node->name() << " -> " << inputStd << std::endl; + fmt::println("{} -> {}", node->name(), inputStd); } } @@ -108,7 +108,7 @@ static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator()); float paramStd = getTensorStd(op->getInput(1)); paramStats.insert(std::make_pair(node->name(), paramStd)); - std::cout << node->name() << " -> " << paramStd << std::endl; + fmt::println("{} -> {}", node->name(), paramStd); } } @@ -156,7 +156,7 @@ void QuantFixedQ::devQAT(std::shared_ptr<GraphView> graphView) scheduler.generateScheduling(); auto s = scheduler.getStaticScheduling(); for (std::shared_ptr<Node> node : s) - std::cout << " name : " << node->name() << std::endl; + fmt::println(" name : {}", node->name()); } } \ No newline at end of file diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 38c8182..9b51e84 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -125,7 +125,7 @@ static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); float inputAbsMean = getTensorAbsMean(op->getInput(0)); inputStats.insert(std::make_pair(node->name(), inputAbsMean)); - std::cout << node->name() << " -> " << inputAbsMean << std::endl; + fmt::println("{} -> {}", node->name(), inputAbsMean); } } @@ -148,7 +148,7 @@ static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); float paramAbsMean = getTensorAbsMean(op->getInput(1)); paramStats.insert(std::make_pair(node->name(), paramAbsMean)); - std::cout << node->name() << " -> " << paramAbsMean << std::endl; + fmt::println("{} -> {}", node->name(), paramAbsMean); } } diff --git a/src/recipes/QuantRecipes.cpp b/src/recipes/QuantRecipes.cpp index 562948c..6e1dcdb 100644 --- a/src/recipes/QuantRecipes.cpp +++ b/src/recipes/QuantRecipes.cpp @@ -59,7 +59,7 @@ void insertBatchNormNodes(std::shared_ptr<GraphView> graphView) { std::shared_ptr<Conv_Op<2>> convOperator = std::static_pointer_cast<Conv_Op<2>> (parentNode->getOperator()); int nb_channels = convOperator->getInput(1)->dims()[0]; - std::cout << " NB CHANNELS = " << nb_channels << std::endl; // TODO : remove this ... + fmt::println(" NB CHANNELS = {}", nb_channels); // TODO : remove this ... std::string batchnormNodeName = makeUniqueName(parentNode->name() + "_BN", graphView); std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nb_channels, 1e-5, 0.1, false, batchnormNodeName); -- GitLab From bfe31067cfca3fb8aca8f7dea01fe3e7a3f2bcff Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Tue, 21 Jan 2025 11:04:56 +0000 Subject: [PATCH 14/21] ADD: fmt as private library --- CMakeLists.txt | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a2b168..80c5ae7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -85,17 +85,6 @@ endif() # ############################################## # Find system dependencies -Include(FetchContent) - -FetchContent_Declare( - fmt - GIT_REPOSITORY https://github.com/fmtlib/fmt.git - GIT_TAG 10.2.1 # or a later release -) - -set(FMT_SYSTEM_HEADERS ON) -FetchContent_MakeAvailable(fmt) -set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON) if(CUDA) find_package(CUDAToolkit REQUIRED) @@ -169,7 +158,7 @@ if (PYBIND) endif() # XXX HERE !!! -target_link_libraries(${module_name} PUBLIC fmt::fmt) +target_link_libraries(${module_name} PRIVATE fmt::fmt) target_compile_features(${module_name} PRIVATE cxx_std_14) target_compile_options(${module_name} PRIVATE -- GitLab From 6d73c46e795f255aaa3c3f488c1dc43cc1424ff9 Mon Sep 17 00:00:00 2001 From: Maxence Naud <maxence.naud@cea.fr> Date: Wed, 29 Jan 2025 16:41:43 +0000 Subject: [PATCH 15/21] [Upd] standardization of some files --- aidge_quantization/unit_tests/test_ptq.py | 22 +++--- include/aidge/operator/FixedQ.hpp | 43 ++++++------ include/aidge/operator/LSQ.hpp | 6 +- include/aidge/operator/SAT/DoReFa.hpp | 67 ++++++++++++------- include/aidge/operator/SAT/TanhClamp.hpp | 33 +++------ include/aidge/quantization/PTQ/CLE.hpp | 30 +++++---- include/aidge/quantization/PTQ/Clipping.hpp | 22 +++--- include/aidge/quantization/PTQ/PTQ.hpp | 27 ++++---- include/aidge/quantization/PTQ/PTQMetaOps.hpp | 18 ++--- include/aidge/quantization/QAT/QAT_FixedQ.hpp | 10 +-- include/aidge/quantization/QAT/QAT_LSQ.hpp | 20 +++--- src/PTQ/CLE.cpp | 40 +++++++---- src/PTQ/PTQMetaOps.cpp | 22 +++--- src/operator/FixedQ.cpp | 20 ++++++ src/operator/SAT/DoReFa.cpp | 33 +++++++-- src/operator/SAT/TanhClamp.cpp | 22 +++++- 16 files changed, 259 insertions(+), 176 deletions(-) diff --git a/aidge_quantization/unit_tests/test_ptq.py b/aidge_quantization/unit_tests/test_ptq.py index dfdedd8..56080bf 100644 --- a/aidge_quantization/unit_tests/test_ptq.py +++ b/aidge_quantization/unit_tests/test_ptq.py @@ -21,7 +21,7 @@ ACCURACIES = (95.4, 94.4) # (97.9, 97.7) NB_BITS = 4 # -------------------------------------------------------------- -# UTILS +# UTILS # -------------------------------------------------------------- def propagate(model, scheduler, sample): @@ -50,7 +50,7 @@ def compute_accuracy(model, samples, labels): # -------------------------------------------------------------- class test_ptq(unittest.TestCase): - + def setUp(self): # load the samples / labels (numpy) @@ -70,19 +70,20 @@ class test_ptq(unittest.TestCase): def tearDown(self): pass - + def test_model(self): Log.set_console_level(Level.Info) # compute the base accuracy accuracy = compute_accuracy(self.model, self.samples[0:NB_SAMPLES], self.labels) self.assertAlmostEqual(accuracy * 100, ACCURACIES[0], msg='base accuracy does not meet the baseline !', delta=0.1) - + def test_quant_model(self): - Log.set_console_level(Level.Info) + Log.set_console_level(Level.Debug) # create the calibration dataset + tensors = [] for sample in self.samples[0:NB_SAMPLES]: sample = prepare_sample(sample) @@ -91,14 +92,13 @@ class test_ptq(unittest.TestCase): # quantize the model - aidge_quantization.quantize_network( - self.model, - NB_BITS, - tensors, - clipping_mode=aidge_quantization.Clipping.MSE, + self.model, + NB_BITS, + tensors, + clipping_mode=aidge_quantization.Clipping.MSE, no_quantization=False, - optimize_signs=True, + optimize_signs=True, single_shift=False ) diff --git a/include/aidge/operator/FixedQ.hpp b/include/aidge/operator/FixedQ.hpp index 96a52b4..3d46dcf 100644 --- a/include/aidge/operator/FixedQ.hpp +++ b/include/aidge/operator/FixedQ.hpp @@ -9,11 +9,12 @@ * ********************************************************************************/ -#ifndef AIDGE_CORE_OPERATOR_FIXEDQ_H_ -#define AIDGE_CORE_OPERATOR_FIXEDQ_H_ +#ifndef AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_ +#define AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_ -#include <cassert> +#include <cstddef> // std::size_t #include <memory> +#include <string> #include <vector> #include "aidge/backend/OperatorImpl.hpp" @@ -21,8 +22,8 @@ #include "aidge/operator/OperatorTensor.hpp" #include "aidge/utils/ErrorHandling.hpp" #include "aidge/utils/Registrar.hpp" -#include "aidge/utils/Types.h" #include "aidge/utils/StaticAttributes.hpp" +#include "aidge/utils/Types.h" namespace Aidge { @@ -43,24 +44,20 @@ private: public: - FixedQ_Op(std::size_t nbBits, float span, bool isOutputUnsigned) : - OperatorTensor(Type, {InputCategory::Data}, 1), - mAttributes(std::make_shared<Attributes_>(attr<FixedQAttr::NbBits>(nbBits), attr<FixedQAttr::Span>(span), attr<FixedQAttr::IsOutputUnsigned>(isOutputUnsigned))) + FixedQ_Op(std::size_t nbBits = 8, float span = 4.0f, bool isOutputUnsigned = false) : + OperatorTensor(Type, {InputCategory::Data}, 1), + mAttributes(std::make_shared<Attributes_>( + attr<FixedQAttr::NbBits>(nbBits), + attr<FixedQAttr::Span>(span), + attr<FixedQAttr::IsOutputUnsigned>(isOutputUnsigned))) {} /** - * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). + * @brief Copy-constructor. Copy the operator attributes and its output + * tensor(s), but not its input tensors (the new operator has no input associated). * @param op Operator to copy. */ - FixedQ_Op(const FixedQ_Op& op) - : OperatorTensor(op), mAttributes(op.mAttributes) - { - if (op.mImpl){ - SET_IMPL_MACRO(FixedQ_Op, *this, op.backend()); - }else{ - mImpl = nullptr; - } - } + FixedQ_Op(const FixedQ_Op& op); /** * @brief Clone the operator using its copy-constructor. @@ -88,14 +85,16 @@ public: }; -inline std::shared_ptr<Node> FixedQ(std::size_t nbBits = 8, float span = 4.0f, bool isOutputUnsigned = false, const std::string& name = "") { - return std::make_shared<Node>(std::make_shared<FixedQ_Op>(nbBits, span, isOutputUnsigned), name); -} -} +std::shared_ptr<Node> FixedQ(std::size_t nbBits = 8, + float span = 4.0f, + bool isOutputUnsigned = false, + const std::string& name = ""); + +} // namespace Aidge namespace { template <> const char* const EnumStrings<Aidge::FixedQAttr>::data[] = {"nb_bits", "span", "is_output_unsigned"}; } -#endif /* AIDGE_CORE_OPERATOR_FIXEDQ_H_ */ +#endif /* AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_ */ diff --git a/include/aidge/operator/LSQ.hpp b/include/aidge/operator/LSQ.hpp index eb266bc..970c476 100644 --- a/include/aidge/operator/LSQ.hpp +++ b/include/aidge/operator/LSQ.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CORE_OPERATOR_LSQ_H_ -#define AIDGE_CORE_OPERATOR_LSQ_H_ +#ifndef AIDGE_QUANTIZATION_OPERATOR_LSQ_H_ +#define AIDGE_QUANTIZATION_OPERATOR_LSQ_H_ #include <cassert> #include <memory> @@ -105,4 +105,4 @@ template <> const char *const EnumStrings<Aidge::LSQAttr>::data[] = {"range"}; } -#endif /* AIDGE_CORE_OPERATOR_LSQ_H_ */ +#endif /* AIDGE_QUANTIZATION_OPERATOR_LSQ_H_ */ diff --git a/include/aidge/operator/SAT/DoReFa.hpp b/include/aidge/operator/SAT/DoReFa.hpp index 92ce167..d168c38 100644 --- a/include/aidge/operator/SAT/DoReFa.hpp +++ b/include/aidge/operator/SAT/DoReFa.hpp @@ -9,17 +9,15 @@ * ********************************************************************************/ -#ifndef AIDGE_CORE_OPERATOR_DOREFA_H_ -#define AIDGE_CORE_OPERATOR_DOREFA_H_ +#ifndef AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_ +#define AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_ -#include <cassert> #include <memory> #include <vector> #include "aidge/backend/OperatorImpl.hpp" #include "aidge/graph/Node.hpp" #include "aidge/operator/OperatorTensor.hpp" -#include "aidge/utils/ErrorHandling.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/StaticAttributes.hpp" #include "aidge/utils/Types.h" @@ -43,12 +41,17 @@ public: static const std::string Type; private: - using Attributes_ = StaticAttributes<DoReFaAttr, size_t, DoReFaMode>; + using Attributes_ = StaticAttributes<DoReFaAttr, std::size_t, DoReFaMode>; template <DoReFaAttr e> using attr = typename Attributes_::template attr<e>; const std::shared_ptr<Attributes_> mAttributes; public: - DoReFa_Op(size_t range = 255, DoReFaMode mode = DoReFaMode::Default) + /** + * @brief Constructor for DoReFa_Op + * @param range The quantization range (default: 255) + * @param mode The quantization mode (default: Default) + */ + DoReFa_Op(std::size_t range = 255, DoReFaMode mode = DoReFaMode::Default) : OperatorTensor(Type, {InputCategory::Param}, 1), mAttributes(std::make_shared<Attributes_>( attr<DoReFaAttr::Range>(range), @@ -59,30 +62,34 @@ public: * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). * @param op Operator to copy. */ - DoReFa_Op(const DoReFa_Op& op) - : OperatorTensor(op), - mAttributes(op.mAttributes) - { - if (op.mImpl){ - SET_IMPL_MACRO(DoReFa_Op, *this, op.backend()); - }else{ - mImpl = nullptr; - } - } + DoReFa_Op(const DoReFa_Op& op); /** * @brief Clone the operator using its copy-constructor. * @see Operator::DoReFa_Op + * @return std::shared_ptr<Operator> A deep copy of the operator */ - std::shared_ptr<Operator> clone() const override { - return std::make_shared<DoReFa_Op>(*this); - } + std::shared_ptr<Operator> clone() const override; + /** + * @brief Get available backends for this operator + * @return std::set<std::string> Set of supported backend names + */ std::set<std::string> getAvailableBackends() const override final; + + /** + * @brief Set the backend for this operator + * @param name Backend name + * @param device Device index (default: 0) + */ void setBackend(const std::string& name, DeviceIdx_t device = 0) override final; + /** + * @brief Get operator attributes + * @return std::shared_ptr<Attributes> Shared pointer to operator attributes + */ inline std::shared_ptr<Attributes> attributes() const override { return mAttributes; } - inline size_t& range() const noexcept { return mAttributes->getAttr<DoReFaAttr::Range>(); } + inline std::size_t& range() const noexcept { return mAttributes->getAttr<DoReFaAttr::Range>(); } inline DoReFaMode& mode() const noexcept { return mAttributes->getAttr<DoReFaAttr::Mode>(); } static const std::vector<std::string> getInputsName(){ @@ -93,10 +100,20 @@ public: } }; -inline std::shared_ptr<Node> DoReFa(size_t range = 255, DoReFaMode mode = DoReFaMode::Default, const std::string& name = "") { - return std::make_shared<Node>(std::make_shared<DoReFa_Op>(range, mode), name); -} -} +/** + * @brief Factory function to create a DoReFa operator node + * + * @param range Quantization range (default: 255) + * @param mode Quantization mode (default: Default) + * @param name Node name (default: empty) + * + * @return std::shared_ptr<Node> Shared pointer to the created node + */ +std::shared_ptr<Node> DoReFa(std::size_t range = 255, + DoReFaMode mode = DoReFaMode::Default, + const std::string& name = ""); + +} // namespace Aidge namespace { template <> @@ -106,4 +123,4 @@ template <> const char *const EnumStrings<Aidge::DoReFaMode>::data[] = {"default", "symmetric", "asymmetric", "full_range"}; } -#endif /* AIDGE_CORE_OPERATOR_DOREFA_H_ */ +#endif /* AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_ */ diff --git a/include/aidge/operator/SAT/TanhClamp.hpp b/include/aidge/operator/SAT/TanhClamp.hpp index def43b8..9d99d70 100644 --- a/include/aidge/operator/SAT/TanhClamp.hpp +++ b/include/aidge/operator/SAT/TanhClamp.hpp @@ -9,20 +9,18 @@ * ********************************************************************************/ -#ifndef AIDGE_CORE_OPERATOR_TANHCLAMP_H_ -#define AIDGE_CORE_OPERATOR_TANHCLAMP_H_ +#ifndef AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_ +#define AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_ -#include <cassert> #include <memory> +#include <set> +#include <string> #include <vector> #include "aidge/backend/OperatorImpl.hpp" #include "aidge/graph/Node.hpp" #include "aidge/operator/OperatorTensor.hpp" -#include "aidge/operator/Producer.hpp" -#include "aidge/utils/ErrorHandling.hpp" #include "aidge/utils/Registrar.hpp" -#include "aidge/utils/StaticAttributes.hpp" #include "aidge/utils/Types.h" namespace Aidge { @@ -44,23 +42,13 @@ public: * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). * @param op Operator to copy. */ - TanhClamp_Op(const TanhClamp_Op& op) - : OperatorTensor(op) - { - if (op.mImpl){ - SET_IMPL_MACRO(TanhClamp_Op, *this, op.backend()); - }else{ - mImpl = nullptr; - } - } + TanhClamp_Op(const TanhClamp_Op& op); /** * @brief Clone the operator using its copy-constructor. * @see Operator::TanhClamp_Op */ - std::shared_ptr<Operator> clone() const override { - return std::make_shared<TanhClamp_Op>(*this); - } + std::shared_ptr<Operator> clone() const override; bool forwardDims(bool allowDataDependency = false) override final; std::set<std::string> getAvailableBackends() const override final; @@ -75,9 +63,8 @@ public: } }; -inline std::shared_ptr<Node> TanhClamp(const std::string& name = "") { - return std::make_shared<Node>(std::make_shared<TanhClamp_Op>(), name); -} -} +std::shared_ptr<Node> TanhClamp(const std::string& name = ""); + +} // namespace Aidge -#endif /* AIDGE_CORE_OPERATOR_TANHCLAMP_H_ */ +#endif /* AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_ */ diff --git a/include/aidge/quantization/PTQ/CLE.hpp b/include/aidge/quantization/PTQ/CLE.hpp index 77eaf7f..f4dc073 100644 --- a/include/aidge/quantization/PTQ/CLE.hpp +++ b/include/aidge/quantization/PTQ/CLE.hpp @@ -9,29 +9,33 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_PTQ_CLE_H_ -#define AIDGE_QUANTIZATION_PTQ_CLE_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_ -//#include <cstdint> -//#include <map> -//#include <memory> -//#include <string> -//#include <vector> +#include <memory> -#include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" namespace Aidge { /** - * @brief Equalize the ranges of the nodes parameters by proceding iteratively. - * Can only be applied to single branch networks (otherwise does not edit the graphView). + * @brief Equalize the ranges of the nodes parameters by proceding iteratively. + * Can only be applied to single branch networks (otherwise does not edit the GraphView). + * + * Cross Layer Equalization (CLE) is used to balance the weights between consecutive + * layers to improve quantization performance. It works by iteratively scaling weights + * and biases of adjacent layers while preserving the overall function of the network. + * + * @note The operation modifies weights and biases in-place but preserves the mathematical + * function computed by the network. + * * @param graphView The GraphView to process. - * @param targetDelta the stopping criterion (typical value : 0.01) + * @param targetDelta the stopping criterion (typical value : 0.01). Smaller values lead + * to more precise equalization but may require more iterations. */ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta = 0.01); -} +} // namespace Aidge -#endif /* AIDGE_QUANTIZATION_PTQ_CLE_H_ */ \ No newline at end of file +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_ */ diff --git a/include/aidge/quantization/PTQ/Clipping.hpp b/include/aidge/quantization/PTQ/Clipping.hpp index d0622f4..3f65c42 100644 --- a/include/aidge/quantization/PTQ/Clipping.hpp +++ b/include/aidge/quantization/PTQ/Clipping.hpp @@ -9,14 +9,14 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_PTQ_CLIP_H_ -#define AIDGE_QUANTIZATION_PTQ_CLIP_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_ -//#include <cstdint> -//#include <map> -//#include <memory> -//#include <string> -//#include <vector> +#include <cstdint> // std::uint8_t +#include <map> +#include <memory> +#include <string> +#include <vector> #include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" @@ -56,9 +56,9 @@ namespace Aidge double computeKLClipping(std::vector<int> histogram, std::uint8_t nbBits); /** - * @brief Return a corrected map of the provided activation ranges. - * To do so compute the optimal clipping values for every node and multiply the input ranges by those values. - * The method used to compute the clippings can be eihter 'MSE', 'AA', 'KL' or 'MAX'. + * @brief Return a corrected map of the provided activation ranges. + * To do so compute the optimal clipping values for every node and multiply the input ranges by those values. + * The method used to compute the clippings can be eihter 'MSE', 'AA', 'KL' or 'MAX'. * @param clippingMode The method used to compute the optimal clippings. * @param valueRanges The map associating each affine node to its output range. * @param nbBits The quantization number of bits. @@ -71,5 +71,5 @@ namespace Aidge } -#endif /* AIDGE_QUANTIZATION_PTQ_CLIP_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_ */ diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index d2b8b7f..4fc38bc 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -9,16 +9,19 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_PTQ_PTQ_H_ -#define AIDGE_QUANTIZATION_PTQ_PTQ_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_ -//#include <cstdint> -//#include <map> -//#include <memory> -//#include <string> -//#include <vector> +#include <cstdint> // std::uint8_t +#include <map> +#include <memory> +#include <set> +#include <string> +#include <utility> // std::pair +#include <vector> #include "aidge/data/Tensor.hpp" +#include "aidge/quantization/PTQ/Clipping.hpp" #include "aidge/graph/GraphView.hpp" namespace Aidge { @@ -104,12 +107,12 @@ namespace Aidge { * @brief Normalize the activations of each affine node so that they fit in the [-1:1] range. * This is done by reconfiguring the scaling nodes, as well as rescaling the weights and biases tensors. * @param graphView The GraphView containing the affine nodes. - * @param valueRanges The node output value ranges computed over the calibration dataset. + * @param valueRanges The node output value ranges computed over the calibration dataset. */ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges); /** - * @brief For each node, compute the sign of its input and output values. + * @brief For each node, compute the sign of its input and output values. * The goal of the routine is to maximize the number of unsigned IOs in order to double the value resolution when possible. * @param graphView The GraphView to analyze. * @param verbose Whether to print the sign map or not. @@ -135,7 +138,7 @@ namespace Aidge { * @param clippingMode: Type of the clipping optimization. Can be either 'MAX', 'MSE', 'AA' or 'KL'. * @param applyRounding Whether to apply the rounding operations or not. * @param optimizeSigns Whether to take account of the IO signs of the operators or not. - * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights. + * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights. * @param verbose Whether to print internal informations about the quantization process. */ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool applyRounding, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose); @@ -157,8 +160,8 @@ namespace Aidge { * @brief Developement and test routine. * @param graphView The GraphView under test. */ - void devPTQ(std::shared_ptr<GraphView> graphView); + void devPTQ(std::shared_ptr<GraphView> graphView); } -#endif /* AIDGE_QUANTIZATION_PTQ_PTQ_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_ */ diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/quantization/PTQ/PTQMetaOps.hpp index 62fac87..b9bad0d 100644 --- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp +++ b/include/aidge/quantization/PTQ/PTQMetaOps.hpp @@ -8,22 +8,14 @@ * SPDX-License-Identifier: EPL-2.0 * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ -#define AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_ -#include <array> #include <memory> #include <string> -#include <utility> - -#include "aidge/operator/Clip.hpp" -#include "aidge/operator/Mul.hpp" -#include "aidge/operator/Round.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/graph/Node.hpp" -#include "aidge/graph/OpArgs.hpp" // Sequential -#include "aidge/operator/MetaOperator.hpp" namespace Aidge { @@ -55,7 +47,7 @@ std::shared_ptr<Aidge::Node> Scaling(double scalingFactor, const std::string& na void updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor); /// @brief Retrieves the current scaling factor of a PTQ meta-operator node. -/// This function returns the scaling factor associated with the specified PTQ meta-operator node, +/// This function returns the scaling factor associated with the specified PTQ meta-operator node, /// allowing inspection of the current scalar applied in the [Mul] operation. /// /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor is being queried. @@ -66,7 +58,7 @@ double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode); /// This function modifies the clip range of a Quantizer node, allowing adjustment of the range within which values are clipped /// in the [Clip] operation of the Quantizer sequence. /// -/// @param QuantizerNode A shared pointer to the Quantizer node whose clip range is being set. +/// @param QuantizerNode A shared pointer to the Quantizer node whose clip range is being set. /// This node should have been created using the Quantizer function. /// @param min The minimum value for the clip range. Values below this will be clipped to this minimum. /// @param max The maximum value for the clip range. Values above this will be clipped to this maximum. @@ -75,4 +67,4 @@ void setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, double min, double } -#endif /* AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_ */ diff --git a/include/aidge/quantization/QAT/QAT_FixedQ.hpp b/include/aidge/quantization/QAT/QAT_FixedQ.hpp index ecbe742..6a2aa24 100644 --- a/include/aidge/quantization/QAT/QAT_FixedQ.hpp +++ b/include/aidge/quantization/QAT/QAT_FixedQ.hpp @@ -9,8 +9,10 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ -#define AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_ + +#include <memory> #include "aidge/graph/Node.hpp" #include "aidge/graph/GraphView.hpp" @@ -41,10 +43,10 @@ void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits * @brief Developement and test routine. * @param graphView The GraphView under test. */ -void devQAT(std::shared_ptr<GraphView> graphView); +void devQAT(std::shared_ptr<GraphView> graphView); } } -#endif /* AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_ */ diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp index 4970be0..a44c71b 100644 --- a/include/aidge/quantization/QAT/QAT_LSQ.hpp +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -9,12 +9,14 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_QAT_LSQ_H_ -#define AIDGE_QUANTIZATION_QAT_LSQ_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ + +#include <cstddef> // std::size_t +#include <memory> -#include "aidge/graph/Node.hpp" -#include "aidge/graph/GraphView.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/graph/GraphView.hpp" namespace Aidge { namespace QuantLSQ { @@ -25,7 +27,7 @@ namespace QuantLSQ { * @param nbBits Number of quantization bits. * @param span Fixed output span of the quantizers. */ -void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float step_size); +void insertQuantizers(std::shared_ptr<GraphView> graphView, std::size_t nbBits, float step_size); /** * @brief Given a GraphView with parameters properly initialized and some calibration data, @@ -35,10 +37,10 @@ void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float * @param calibrationData Calibration data used to adjust the spans. * @param scale Multiplicative constant applied to the spans. */ -void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData); +void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, std::size_t nbBits, std::shared_ptr<Tensor> calibrationData); -} -} +} // namespace QuantLSQ +} // namespace Aidge -#endif /* AIDGE_QUANTIZATION_QAT_LSQ_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ */ diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 2c81815..5265d9c 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -10,14 +10,19 @@ ********************************************************************************/ #include "aidge/quantization/PTQ/CLE.hpp" + +#include <cmath> // std::abs, std::fabs, std::sqrt +#include <cstddef> // std::size_t +#include <memory> +#include <vector> + #include "aidge/quantization/PTQ/Clipping.hpp" -#include "aidge/quantization/PTQ/PTQ.hpp" +#include "aidge/quantization/PTQ/PTQ.hpp" // retrieveNodeVector #include "aidge/graph/GraphView.hpp" -#include "aidge/scheduler/SequentialScheduler.hpp" -#include "aidge/scheduler/Scheduler.hpp" -#include "aidge/utils/Log.hpp" +#include "aidge/graph/Node.hpp" #include "aidge/operator/OperatorTensor.hpp" +#include "aidge/utils/Log.hpp" namespace Aidge { @@ -42,13 +47,13 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) castedTensor[i] *= scaling; } -static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) { // Get the tensor data pointer and edit it double * castedTensor = static_cast<double*> (tensor->getImpl()->rawPtr()); // Get the tensor absolute max value - double maxValue = 0.0f; + double maxValue = 0.0; for(std::size_t i = 0; i < tensor->size(); ++i) { if(std::fabs(castedTensor[i]) > maxValue) { maxValue = std::fabs(castedTensor[i]); @@ -62,15 +67,14 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); // Check if the CLE can be applied ... - for (std::shared_ptr<Node> node : nodeVector) if (node->getChildren().size() > 1) { - Log::info(" Network have multiple branches, skipping the CLE ... "); + Log::notice("Network have multiple branches, skipping the CLE ... "); return; - } + } - Log::info(" Applying the Cross-Layer Equalization ... "); + Log::info("Applying the Cross-Layer Equalization ... "); // Get the vector of affine nodes @@ -79,17 +83,22 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD if (isAffine(node)) affineNodeVector.push_back(node); + if (affineNodeVector.empty()) { + Log::notice("No affine nodes found in the network. CLE cannot be applied."); + return; + } double maxRangeDelta; + int iteration = 0; - do + do { + ++iteration; maxRangeDelta = 0.0; - //std::cout << " ----- " << std::endl; //for (std::shared_ptr<Node> node : affineNodeVector) // std::cout << getTensorAbsoluteMax(getWeightTensor(node)) << std::endl; - - for (size_t i = 0; i < (affineNodeVector.size() - 1); i++) + + for (std::size_t i = 0; i < (affineNodeVector.size() - 1); i++) { std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; @@ -111,6 +120,9 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetD } } while (maxRangeDelta > targetDelta); + + Log::notice("CLE completed after {} iterations. Final max range delta: {:.6f}", + iteration, maxRangeDelta); } } \ No newline at end of file diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp index 527d853..77018c2 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/PTQ/PTQMetaOps.cpp @@ -11,8 +11,8 @@ #include "aidge/quantization/PTQ/PTQMetaOps.hpp" -#include <array> #include <memory> +#include <string> #include <utility> //Operator @@ -32,7 +32,7 @@ #include "aidge/utils/Log.hpp" -namespace Aidge +namespace Aidge { std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name) @@ -46,19 +46,19 @@ std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double cli // connect the scaling factor producer std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); - std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); + std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); - + // create the metaop graph std::shared_ptr<GraphView> graphView = Sequential({mulNode, roundNode, clipNode}); std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ??? - // return the metaop + // return the metaop std::shared_ptr<Node> metaopNode = MetaOperator("Quantizer", connectedGraphView, {}, name); // XXX alternative prototype - return metaopNode; + return metaopNode; } std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name) @@ -67,7 +67,7 @@ std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name) std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_Scaling" : ""); - std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); + std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); std::shared_ptr<GraphView> graphView = Sequential({mulNode}); @@ -96,7 +96,7 @@ void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(metaOpNode->getOperator()); - + std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); if (!mulNode) @@ -113,7 +113,7 @@ double getScalingFactor(std::shared_ptr<Node> MetaOpNode) } std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(MetaOpNode->getOperator()); - + std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); if (!mulNode) { @@ -123,8 +123,8 @@ double getScalingFactor(std::shared_ptr<Node> MetaOpNode) auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1); std::shared_ptr<Tensor> fallback; - const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); - + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + return localTensor.get<double>(0); } diff --git a/src/operator/FixedQ.cpp b/src/operator/FixedQ.cpp index 8791740..9828ce9 100644 --- a/src/operator/FixedQ.cpp +++ b/src/operator/FixedQ.cpp @@ -20,6 +20,17 @@ const std::string Aidge::FixedQ_Op::Type = "FixedQ"; +Aidge::FixedQ_Op::FixedQ_Op(const Aidge::FixedQ_Op& op) + : OperatorTensor(op), + mAttributes(op.mAttributes) +{ + if (op.mImpl){ + SET_IMPL_MACRO(FixedQ_Op, *this, op.backend()); + }else{ + mImpl = nullptr; + } +} + std::set<std::string> Aidge::FixedQ_Op::getAvailableBackends() const { return Registrar<FixedQ_Op>::getKeys(); } @@ -28,3 +39,12 @@ void Aidge::FixedQ_Op::setBackend(const std::string& name, DeviceIdx_t device) { SET_IMPL_MACRO(FixedQ_Op, *this, name); mOutputs[0]->setBackend(name, device); } + +//////////////////////////////////////////////////////////////////////////////// + +std::shared_ptr<Aidge::Node> Aidge::FixedQ(std::size_t nbBits, + float span, + bool isOutputUnsigned, + const std::string& name) { + return std::make_shared<Node>(std::make_shared<FixedQ_Op>(nbBits, span, isOutputUnsigned), name); +} \ No newline at end of file diff --git a/src/operator/SAT/DoReFa.cpp b/src/operator/SAT/DoReFa.cpp index b6124ba..426e330 100644 --- a/src/operator/SAT/DoReFa.cpp +++ b/src/operator/SAT/DoReFa.cpp @@ -17,13 +17,38 @@ #include "aidge/data/Tensor.hpp" #include "aidge/utils/Types.h" -const std::string Aidge::DoReFa_Op::Type = "DoReFa"; +namespace Aidge { -std::set<std::string> Aidge::DoReFa_Op::getAvailableBackends() const { +const std::string DoReFa_Op::Type = "DoReFa"; + +DoReFa_Op::DoReFa_Op(const DoReFa_Op& op) + : OperatorTensor(op), + mAttributes(op.mAttributes) +{ + if (op.mImpl) { + SET_IMPL_MACRO(DoReFa_Op, *this, op.backend()); + } else { + mImpl = nullptr; + } +} + +std::shared_ptr<Operator> DoReFa_Op::clone() const { + return std::make_shared<DoReFa_Op>(*this); +} + +std::set<std::string> DoReFa_Op::getAvailableBackends() const { return Registrar<DoReFa_Op>::getKeys(); } -void Aidge::DoReFa_Op::setBackend(const std::string& name, DeviceIdx_t device) { +void DoReFa_Op::setBackend(const std::string& name, DeviceIdx_t device) { SET_IMPL_MACRO(DoReFa_Op, *this, name); mOutputs[0]->setBackend(name, device); -} \ No newline at end of file +} + +//////////////////////////////////////////////////////////////////////////////// + +std::shared_ptr<Node> DoReFa(size_t range, DoReFaMode mode, const std::string& name) { + return std::make_shared<Node>(std::make_shared<DoReFa_Op>(range, mode), name); +} + +} // namespace Aidge \ No newline at end of file diff --git a/src/operator/SAT/TanhClamp.cpp b/src/operator/SAT/TanhClamp.cpp index 2b8d63d..a03fc7d 100644 --- a/src/operator/SAT/TanhClamp.cpp +++ b/src/operator/SAT/TanhClamp.cpp @@ -20,6 +20,20 @@ const std::string Aidge::TanhClamp_Op::Type = "TanhClamp"; +Aidge::TanhClamp_Op::TanhClamp_Op(const Aidge::TanhClamp_Op& op) + : OperatorTensor(op) +{ + if (op.mImpl) { + SET_IMPL_MACRO(TanhClamp_Op, *this, op.backend()); + } else { + mImpl = nullptr; + } +} + +std::shared_ptr<Aidge::Operator> Aidge::TanhClamp_Op::clone() const { + return std::make_shared<TanhClamp_Op>(*this); +} + bool Aidge::TanhClamp_Op::forwardDims(bool /*allowDataDependency*/) { if (inputsAssociated()) { @@ -40,5 +54,11 @@ void Aidge::TanhClamp_Op::setBackend(const std::string& name, DeviceIdx_t device mOutputs[0]->setBackend(name, device); // Scale output is always on CPU for now - mOutputs[1]->setBackend("cpu"); // XXX why ? + mOutputs[1]->setBackend("cpu"); // XXX why ? +} + +//////////////////////////////////////////////////////////////////////////////// + +std::shared_ptr<Aidge::Node> Aidge::TanhClamp(const std::string& name) { + return std::make_shared<Node>(std::make_shared<TanhClamp_Op>(), name); } \ No newline at end of file -- GitLab From 93e06ced08e376e57a3c3cd0f36f10c91beeca06 Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Wed, 29 Jan 2025 22:26:39 +0000 Subject: [PATCH 16/21] Change Python minimum version 3.7 -> 3.8 --- pyproject.toml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index deb91c7..c7cd4c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ description="Quantization algorithms to compress aidge networks." dependencies = [ "numpy>=1.21.6", ] -requires-python = ">= 3.7" +requires-python = ">= 3.8" readme = "README.md" license = { file = "LICENSE" } classifiers = [ @@ -56,6 +56,19 @@ test-command = "pytest {package}/aidge_quantization/unit_tests" # "cp39-win_amd64", # "cp310-win_amd64", # ] +# PYLINT +[tool.pylint.main] +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list = ["aidge_core", "aidge_backend_cpu", "aidge_quantization", "onnx"] +# Files or directories to be skipped. They should be base names, not paths. +ignore = ["CVS"] +# List of module names for which member attributes should not be checked (useful +# for modules/projects where namespaces are manipulated during runtime and thus +# existing member attributes cannot be deduced by static analysis). It supports +# qualified module names, as well as Unix pattern matching. +ignored-modules = ["aidge_core", "aidge_backend_cpu", "aidge_quantization", "onnx"] ## AIDGE DEPENDENCIES DECLARATION [tool.cibuildwheel.environment] AIDGE_DEPENDENCIES = "aidge_core aidge_backend_cpu aidge_onnx" # format => "dep_1 dep_2 ... dep_n" -- GitLab From 94de7485a3f525f55ce3b8eeb8ce139ca032cd74 Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Wed, 29 Jan 2025 22:27:21 +0000 Subject: [PATCH 17/21] UPD: 'setup.py' to access compilation options from environment variables set by 'setup.sh' --- setup.py | 55 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/setup.py b/setup.py index 8774d01..1bfc0ac 100644 --- a/setup.py +++ b/setup.py @@ -37,6 +37,7 @@ class AidgePkgBuild(build_ext): # This lists the number of processors available on the machine # The compilation will use half of them max_jobs = str(ceil(multiprocessing.cpu_count() / 2)) + max_jobs = os.environ.get("AIDGE_NB_PROC", max_jobs) cwd = pathlib.Path().absolute() @@ -51,14 +52,20 @@ class AidgePkgBuild(build_ext): package_prefix = build_lib if not self.editable_mode else SETUP_DIR pybind_install_prefix = (package_prefix / PROJECT_NAME).absolute() - os.chdir(str(build_temp)) - - compile_type = os.environ.get("AIDGE_PYTHON_BUILD_TYPE", "Release") install_path = ( os.path.join(sys.prefix, "lib", "libAidge") if "AIDGE_INSTALL" not in os.environ else os.environ["AIDGE_INSTALL"] ) + + # Read environment variables for CMake options + c_compiler = os.environ.get("AIDGE_C_COMPILER", "gcc") + cxx_compiler = os.environ.get("AIDGE_CXX_COMPILER", "g++") + build_type = os.environ.get("AIDGE_BUILD_TYPE", "Release") + asan = os.environ.get("AIDGE_ASAN", "OFF") + with_cuda = os.environ.get("AIDGE_WITH_CUDA", "OFF") + cmake_arch = os.environ.get("AIDGE_CMAKE_ARCH", "") + build_gen = os.environ.get("AIDGE_BUILD_GEN", "") build_gen_opts = ( ["-G", build_gen] @@ -67,26 +74,36 @@ class AidgePkgBuild(build_ext): ) test_onoff = os.environ.get("AIDGE_BUILD_TEST", "OFF") - self.spawn( - [ - "cmake", - *build_gen_opts, - str(cwd), - f"-DTEST={test_onoff}", - f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}", - f"-DCMAKE_BUILD_TYPE={compile_type}", - "-DPYBIND=ON", - f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}", - "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", - "-DCOVERAGE=OFF", - ] - ) + os.chdir(str(build_temp)) + + cmake_cmd = [ + "cmake", + *build_gen_opts, + str(cwd), + f"-DTEST={test_onoff}", + f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}", + f"-DCMAKE_BUILD_TYPE={build_type}", + f"-DCMAKE_C_COMPILER={c_compiler}", + f"-DCMAKE_CXX_COMPILER={cxx_compiler}", + f"-DENABLE_ASAN={asan}", + f"-DCUDA={with_cuda}", + "-DPYBIND=ON", + f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}", + "-DCMAKE_EXPORT_COMPILE_COMMANDS=1", + "-DCOVERAGE=OFF", + ] + + # Append architecture-specific arguments if provided + if cmake_arch: + cmake_cmd.append(cmake_arch) + + self.spawn(cmake_cmd) if not self.dry_run: self.spawn( - ["cmake", "--build", ".", "--config", compile_type, "-j", max_jobs] + ["cmake", "--build", ".", "--config", build_type, "-j", max_jobs] ) - self.spawn(["cmake", "--install", ".", "--config", compile_type]) + self.spawn(["cmake", "--install", ".", "--config", build_type]) os.chdir(str(cwd)) -- GitLab From 63cd594e6759eb362f20490bda58c0f16cff67bd Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Wed, 29 Jan 2025 23:48:19 +0000 Subject: [PATCH 18/21] FEAT: unit-tests/CMakeLists.txt add minimum version for Catch2 --- unit_tests/CMakeLists.txt | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt index 9d9f815..cfdbf0a 100644 --- a/unit_tests/CMakeLists.txt +++ b/unit_tests/CMakeLists.txt @@ -1,12 +1,23 @@ -Include(FetchContent) +# Catch2 configuration +set(CATCH2_MIN_VERSION 3.3.0) -FetchContent_Declare( - Catch2 - GIT_REPOSITORY https://github.com/catchorg/Catch2.git - GIT_TAG v3.0.1 # or a later release -) +# Try to find system installed Catch2 +find_package(Catch2 ${CATCH2_MIN_VERSION} QUIET) -FetchContent_MakeAvailable(Catch2) +if(NOT Catch2_FOUND) + message(STATUS "Catch2 not found in system, retrieving from git") + Include(FetchContent) + + FetchContent_Declare( + Catch2 + GIT_REPOSITORY https://github.com/catchorg/Catch2.git + GIT_TAG devel # or a later release + ) + FetchContent_MakeAvailable(Catch2) + message(STATUS "Fetched Catch2 version ${Catch2_VERSION}") +else() + message(STATUS "Using system Catch2 version ${Catch2_VERSION}") +endif() file(GLOB_RECURSE src_files "*.cpp") -- GitLab From 6d7131c1c7427ca2afb3da3e16a54ebab1efd72c Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Wed, 29 Jan 2025 23:50:48 +0000 Subject: [PATCH 19/21] UPD: CMakeLists.txt enforce C++14 and try to reorder sections --- CMakeLists.txt | 172 +++++++++++++++++++++++++------------------------ 1 file changed, 89 insertions(+), 83 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 80c5ae7..b3c6d45 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,15 +1,22 @@ # CMake >= 3.18 is required for good support of FindCUDAToolkit -cmake_minimum_required(VERSION 3.18) # XXX 3.18 -set(CXX_STANDARD 14) +cmake_minimum_required(VERSION 3.18) -file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version) +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +# Read project metadata file(STRINGS "${CMAKE_SOURCE_DIR}/project_name.txt" project) +message(STATUS "Project name: ${project}") +file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version) # Parse version.txt to retrieve Major, Minor and Path string(REGEX MATCH "([0-9]+)\\.([0-9]+)\\.([0-9]+)" _ MATCHES ${version}) set(PROJECT_VERSION_MAJOR ${CMAKE_MATCH_1}) set(PROJECT_VERSION_MINOR ${CMAKE_MATCH_2}) set(PROJECT_VERSION_PATCH ${CMAKE_MATCH_3}) +message(STATUS "Project version: ${version}") + # Retrieve latest git commit execute_process( @@ -19,17 +26,25 @@ execute_process( OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET ) - -message(STATUS "Project name: ${project}") -message(STATUS "Project version: ${version}") message(STATUS "Latest git commit: ${GIT_COMMIT_HASH}") -message(STATUS "Creating ${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h") project(${project} VERSION ${version} DESCRIPTION "Quantization methods for the Aidge framework." LANGUAGES CXX) + +if(NOT $ENV{AIDGE_INSTALL} STREQUAL "") + set(CMAKE_INSTALL_PREFIX $ENV{AIDGE_INSTALL}) + list(APPEND CMAKE_PREFIX_PATH $ENV{AIDGE_INSTALL}) + message(WARNING "Env var AIDGE_INSTALL detected : $ENV{AIDGE_INSTALL}. Set CMAKE_INSTALL_PREFIX to AIDGE_INSTALL & added to CMAKE_PREFIX_PATH" + "\n\tCMAKE_INSTALL_PREFIX = ${CMAKE_INSTALL_PREFIX}" + "\n\tCMAKE_PREFIX_PATH = ${CMAKE_PREFIX_PATH}") +endif() + +message(STATUS "Creating ${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h") + + # Note: Using configure_file later in the code make so that version variables are lost... # I tried to set in internal cache but it failed. # Current code is working, but there might be a scope issue. @@ -39,21 +54,12 @@ configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h" ) -# Note : project name is {project} and python module name is also {project} -set(module_name _${project}) # target name -set(pybind_module_name ${CMAKE_PROJECT_NAME}) # name of submodule for python bindings - -set(CXX_STANDARD 14) - -############################################## -# Import utils CMakeLists -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") ############################################## # Define options -option(PYBIND "python binding" ON) +option(PYBIND "python binding" OFF) option(WERROR "Warning as error" OFF) -option(TEST "Enable tests" ON) +option(TEST "Enable tests" OFF) option(COVERAGE "Enable coverage" OFF) option(CUDA "Enable CUDA backend" OFF) # XXX OFF option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memory use (over/underflow, memory leak, ...)" OFF) @@ -61,74 +67,55 @@ option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memor ############################################## # Import utils CMakeLists set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") -include(PybindModuleCreation) if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) Include(CodeCoverage) endif() +# Set variables if(CUDA) enable_language(CUDA) - message(STATUS "Cuda compiler version = ${CMAKE_CUDA_COMPILER_VERSION}") # Define a preprocessor macro with the Cuda compiler version add_definitions(-DCUDA_COMPILER_VERSION="${CMAKE_CUDA_COMPILER_VERSION}") endif() -if(NOT $ENV{AIDGE_INSTALL} STREQUAL "") - set(CMAKE_INSTALL_PREFIX $ENV{AIDGE_INSTALL}) - list(APPEND CMAKE_PREFIX_PATH $ENV{AIDGE_INSTALL}) - message(WARNING "Env var AIDGE_INSTALL detected : $ENV{AIDGE_INSTALL}. Set CMAKE_INSTALL_PREFIX to AIDGE_INSTALL & added to CMAKE_PREFIX_PATH" - "\n\tCMAKE_INSTALL_PREFIX = ${CMAKE_INSTALL_PREFIX}" - "\n\tCMAKE_PREFIX_PATH = ${CMAKE_PREFIX_PATH}") +# Source files +if(CUDA) + file(GLOB_RECURSE src_files "src/*.cpp" "src/*.cu") +else() + file(GLOB_RECURSE src_files "src/*.cpp") endif() -# ############################################## -# Find system dependencies +# Header files +file(GLOB_RECURSE inc_files "include/*.hpp") -if(CUDA) - find_package(CUDAToolkit REQUIRED) -endif() +# Note: cxx project name is {CMAKE_PROJECT_NAME} and python module name is also {CMAKE_PROJECT_NAME} +set(module_name _${CMAKE_PROJECT_NAME}) # target name +add_library(${module_name} ${src_files} ${inc_files}) +set(pybind_module_name ${CMAKE_PROJECT_NAME}) # name of submodule for python bindings -############################################## -# Find system dependencies +# Dependencies and linking find_package(aidge_core REQUIRED) find_package(aidge_backend_cpu REQUIRED) +target_link_libraries(${module_name} + PUBLIC + _aidge_core + _aidge_backend_cpu +) if(CUDA) + find_package(CUDAToolkit REQUIRED) find_package(aidge_backend_cuda REQUIRED) -endif() - -############################################## -# Create target and set properties - -if(CUDA) - file(GLOB_RECURSE src_files "src/*.cpp" "src/*.cu") - file(GLOB_RECURSE inc_files "include/*.hpp") - - add_library(${module_name} ${src_files} ${inc_files}) target_link_libraries(${module_name} PUBLIC - _aidge_core # _ is added because we link the target not the project - _aidge_backend_cpu - # _aidge_backend_cuda # XXX CUDA::cudart CUDA::cublas cudnn ) -else() - file(GLOB_RECURSE src_files "src/*.cpp") - file(GLOB_RECURSE inc_files "include/*.hpp") - - add_library(${module_name} ${src_files} ${inc_files}) - target_link_libraries(${module_name} - PUBLIC - _aidge_core # _ is added because we link the target not the project - _aidge_backend_cpu - ) endif() -#Set target properties +# Include directories target_include_directories(${module_name} PUBLIC $<INSTALL_INTERFACE:include> @@ -137,6 +124,7 @@ target_include_directories(${module_name} ${CMAKE_CURRENT_SOURCE_DIR}/src ) +# Compilation settings if(CUDA) if(NOT DEFINED CMAKE_CUDA_STANDARD) set(CMAKE_CUDA_STANDARD 14) @@ -157,23 +145,44 @@ if (PYBIND) generate_python_binding(${pybind_module_name} ${module_name}) endif() -# XXX HERE !!! -target_link_libraries(${module_name} PRIVATE fmt::fmt) target_compile_features(${module_name} PRIVATE cxx_std_14) +target_link_libraries(${module_name} PRIVATE fmt::fmt) +#################################### +# Compilation options and warnings target_compile_options(${module_name} PRIVATE + # Options for Clang, AppleClang, and GCC compilers $<$<COMPILE_LANGUAGE:CPP>:$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>: - -Wall -Wextra -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow $<$<BOOL:${WERROR}>:-Werror>>>) + -Wall # Enable all warnings + -Wextra # Enable extra warnings + -Wold-style-cast # Warn about C-style casts + -Winline # Warn if inline expansion fails + -pedantic # Enforce strict ISO C++ standards + -Werror=narrowing # Treat narrowing conversions as errors + -Wshadow # Warn about variable shadowing + $<$<BOOL:${WERROR}>:-Werror> # Optionally treat warnings as errors + >> +) + +# Additional MSVC-specific warning level +target_compile_options(${module_name} PRIVATE + $<$<CXX_COMPILER_ID:MSVC>: + /W4 # Warning level 4 (highest for MSVC) + > +) + +# CUDA-specific compile options if(CUDA) target_compile_options(${module_name} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: - -Wall>) + -Wall # Enable all warnings for CUDA + > + ) endif() -target_compile_options(${module_name} PRIVATE - $<$<CXX_COMPILER_ID:MSVC>: - /W4>) +# Coverage flags for GCC if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) + include(CodeCoverage) append_coverage_compiler_flags() endif() @@ -183,29 +192,31 @@ endif() include(GNUInstallDirs) set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/${project}) +# Install the library target install(TARGETS ${module_name} EXPORT ${project}-targets - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ) +# Install header files install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) -#Export the targets to a script - +# Export targets for other projects to use install(EXPORT ${project}-targets - FILE "${project}-targets.cmake" - DESTINATION ${INSTALL_CONFIGDIR} - COMPONENT ${module_name} + FILE "${project}-targets.cmake" + DESTINATION ${INSTALL_CONFIGDIR} + COMPONENT ${module_name} ) -if (PYBIND) +# Python binding installation +if(PYBIND) install(TARGETS ${pybind_module_name} DESTINATION ${PYBIND_INSTALL_PREFIX} ) endif() -#Create a ConfigVersion.cmake file +# Create and install CMake configuration files include(CMakePackageConfigHelpers) write_basic_package_version_file( "${CMAKE_CURRENT_BINARY_DIR}/${project}-config-version.cmake" @@ -218,15 +229,14 @@ configure_package_config_file("${project}-config.cmake.in" INSTALL_DESTINATION ${INSTALL_CONFIGDIR} ) -#Install the config, configversion and custom find modules +# Install CMake configuration files install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${project}-config.cmake" "${CMAKE_CURRENT_BINARY_DIR}/${project}-config-version.cmake" DESTINATION ${INSTALL_CONFIGDIR} ) -############################################## -## Exporting from the build tree +# Export from build tree export(EXPORT ${project}-targets FILE "${CMAKE_CURRENT_BINARY_DIR}/${project}-targets.cmake") @@ -234,10 +244,6 @@ export(EXPORT ${project}-targets ############################################## ## Add test if(TEST) - if (AIDGE_REQUIRES_PYTHON AND NOT AIDGE_PYTHON_HAS_EMBED) - message(WARNING "Skipping compilation of tests: missing Python embedded interpreter") - else() - enable_testing() - add_subdirectory(unit_tests) - endif() + enable_testing() + add_subdirectory(unit_tests) endif() -- GitLab From 7b58718a49e21c9c94e4eefdb585486fc9e32b99 Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Wed, 29 Jan 2025 23:52:56 +0000 Subject: [PATCH 20/21] ADD: basic test --- unit_tests/Test_QuantPTQ.cpp | 50 +++++++++++++++++------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/unit_tests/Test_QuantPTQ.cpp b/unit_tests/Test_QuantPTQ.cpp index 36377e8..e7211ce 100644 --- a/unit_tests/Test_QuantPTQ.cpp +++ b/unit_tests/Test_QuantPTQ.cpp @@ -1,21 +1,19 @@ -// #include <catch2/catch_test_macros.hpp> - -// #include "aidge/data/Tensor.hpp" -// #include "aidge/backend/TensorImpl.hpp" -// #include "aidge/backend/cpu.hpp" -// #include "aidge/operator/Conv.hpp" -// #include "aidge/operator/Scaling.hpp" -// #include "aidge/operator/GenericOperator.hpp" -// #include "aidge/graph/GraphView.hpp" -// #include "aidge/QuantPTQ.hpp" -// #include "aidge/scheduler/Scheduler.hpp" -// #include "aidge/hook/OutputRange.hpp" -// #include "aidge/operator/Producer.hpp" - -// #include <unordered_map> - -// using namespace Aidge; -// //using namespace Aidge_HELPER; +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +TEST_CASE("[tmp] basic test") { + REQUIRE(true == true); +} // TEST_CASE("[aidge_module_template/ref_cpp/quantization] PTQ : Quantize Graph") { @@ -79,7 +77,7 @@ // std::shared_ptr<Tensor> myInput = // std::make_shared<Tensor>( -// Array4D<float,2,3,5,5> { +// Array4D<float,2,3,5,5> { // { // { // {{ 0., 1., 2., 3., 4.}, @@ -124,7 +122,7 @@ // ); // auto dataProvider = Producer(myInput, "dataProvider"); -// Tensor myOutput = Array4D<float,2,4,3,3> { +// Tensor myOutput = Array4D<float,2,4,3,3> { // { // { // {{ 15226., 15577., 15928.}, @@ -188,9 +186,9 @@ // "%f" // "\n", // max_output_conv); - + // } - + // float max_output_relu = std::static_pointer_cast<OutputRange>(myReLU1->getOperator()->getHook("output_range"))->getOutput(0); // if(verbose) { // printf("[hook] OutputRange(forward) :: ReLU output max: " @@ -222,10 +220,10 @@ // "\n", // (nodePtr->type()).c_str(), (nodePtr->name()).c_str()); // } -// } - +// } + // SequentialScheduler scheduler_v2(g1); - + // scheduler_v2.forward(); // scheduler_v2.generateScheduling(false); // std::vector<std::shared_ptr<Node>> ordered_graph_view_v2 = scheduler_v2.getStaticScheduling(); @@ -242,7 +240,7 @@ // "\n", // (nodePtr->type()).c_str(), (nodePtr->name()).c_str()); // } -// } +// } // } \ No newline at end of file -- GitLab From 579cda77cf402adbeacae680cee1d03d40fabcff Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Fri, 31 Jan 2025 22:59:05 +0000 Subject: [PATCH 21/21] UPD: version 0.3.0 -> 0.3.1 --- pyproject.toml | 20 ++++++++++++++++++-- version.txt | 3 +-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c7cd4c2..088200e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,8 +9,24 @@ readme = "README.md" license = { file = "LICENSE" } classifiers = [ "Development Status :: 2 - Pre-Alpha", - "Programming Language :: Python :: 3" - ] + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Eclipse Public License 2.0 (EPL-2.0)", + "Programming Language :: C++", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development" +] dynamic = ["version"] #Â defined by pbr [build-system] diff --git a/version.txt b/version.txt index 69367fd..9e11b32 100644 --- a/version.txt +++ b/version.txt @@ -1,2 +1 @@ -0.3.0 - +0.3.1 -- GitLab