diff --git a/.gitignore b/.gitignore index 18f1583283b848051e2915895eae6a62751a9d0b..ba5c59398b68083c6c1c5fe820fb9070d999c18e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,17 +4,16 @@ # C++ Build build*/ install*/ +include/aidge/backend/quantization_version.h # VSCode .vscode # Python -aidge_quantization/_version.py *.so __pycache__ *.pyc *.egg-info -aidge_quantization/_version.py wheelhouse/* # Mermaid diff --git a/CMakeLists.txt b/CMakeLists.txt index 905a2a25c639b2186ddd6ab4b8737ff00c7ed4aa..b3c6d459dfaf29f5accbc0be4565a3709e9ffd3b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,33 +1,65 @@ # CMake >= 3.18 is required for good support of FindCUDAToolkit -cmake_minimum_required(VERSION 3.18) # XXX 3.18 -set(CXX_STANDARD 14) +cmake_minimum_required(VERSION 3.18) -file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version) -file(STRINGS "${CMAKE_SOURCE_DIR}/project_name.txt" project) +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) +# Read project metadata +file(STRINGS "${CMAKE_SOURCE_DIR}/project_name.txt" project) message(STATUS "Project name: ${project}") + +file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version) +# Parse version.txt to retrieve Major, Minor and Path +string(REGEX MATCH "([0-9]+)\\.([0-9]+)\\.([0-9]+)" _ MATCHES ${version}) +set(PROJECT_VERSION_MAJOR ${CMAKE_MATCH_1}) +set(PROJECT_VERSION_MINOR ${CMAKE_MATCH_2}) +set(PROJECT_VERSION_PATCH ${CMAKE_MATCH_3}) message(STATUS "Project version: ${version}") + +# Retrieve latest git commit +execute_process( + COMMAND git rev-parse --short HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_HASH + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET +) +message(STATUS "Latest git commit: ${GIT_COMMIT_HASH}") + + project(${project} VERSION ${version} DESCRIPTION "Quantization methods for the Aidge framework." LANGUAGES CXX) -# Note : project name is {project} and python module name is also {project} -set(module_name _${project}) # target name -set(pybind_module_name ${CMAKE_PROJECT_NAME}) # name of submodule for python bindings +if(NOT $ENV{AIDGE_INSTALL} STREQUAL "") + set(CMAKE_INSTALL_PREFIX $ENV{AIDGE_INSTALL}) + list(APPEND CMAKE_PREFIX_PATH $ENV{AIDGE_INSTALL}) + message(WARNING "Env var AIDGE_INSTALL detected : $ENV{AIDGE_INSTALL}. Set CMAKE_INSTALL_PREFIX to AIDGE_INSTALL & added to CMAKE_PREFIX_PATH" + "\n\tCMAKE_INSTALL_PREFIX = ${CMAKE_INSTALL_PREFIX}" + "\n\tCMAKE_PREFIX_PATH = ${CMAKE_PREFIX_PATH}") +endif() -set(CXX_STANDARD 14) +message(STATUS "Creating ${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h") + + +# Note: Using configure_file later in the code make so that version variables are lost... +# I tried to set in internal cache but it failed. +# Current code is working, but there might be a scope issue. +# Generate version.h file from config file version.h.in +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/version.h.in" + "${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/quantization_version.h" +) -############################################## -# Import utils CMakeLists -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") ############################################## # Define options -option(PYBIND "python binding" ON) +option(PYBIND "python binding" OFF) option(WERROR "Warning as error" OFF) -option(TEST "Enable tests" ON) +option(TEST "Enable tests" OFF) option(COVERAGE "Enable coverage" OFF) option(CUDA "Enable CUDA backend" OFF) # XXX OFF option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memory use (over/underflow, memory leak, ...)" OFF) @@ -35,85 +67,55 @@ option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memor ############################################## # Import utils CMakeLists set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") -include(PybindModuleCreation) if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) Include(CodeCoverage) endif() +# Set variables if(CUDA) enable_language(CUDA) - message(STATUS "Cuda compiler version = ${CMAKE_CUDA_COMPILER_VERSION}") # Define a preprocessor macro with the Cuda compiler version add_definitions(-DCUDA_COMPILER_VERSION="${CMAKE_CUDA_COMPILER_VERSION}") endif() -if(NOT $ENV{AIDGE_INSTALL} STREQUAL "") - set(CMAKE_INSTALL_PREFIX $ENV{AIDGE_INSTALL}) - list(APPEND CMAKE_PREFIX_PATH $ENV{AIDGE_INSTALL}) - message(WARNING "Env var AIDGE_INSTALL detected : $ENV{AIDGE_INSTALL}. Set CMAKE_INSTALL_PREFIX to AIDGE_INSTALL & added to CMAKE_PREFIX_PATH" - "\n\tCMAKE_INSTALL_PREFIX = ${CMAKE_INSTALL_PREFIX}" - "\n\tCMAKE_PREFIX_PATH = ${CMAKE_PREFIX_PATH}") +# Source files +if(CUDA) + file(GLOB_RECURSE src_files "src/*.cpp" "src/*.cu") +else() + file(GLOB_RECURSE src_files "src/*.cpp") endif() -# ############################################## -# Find system dependencies -Include(FetchContent) - -FetchContent_Declare( - fmt - GIT_REPOSITORY https://github.com/fmtlib/fmt.git - GIT_TAG 10.2.1 # or a later release -) - -set(FMT_SYSTEM_HEADERS ON) -FetchContent_MakeAvailable(fmt) -set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON) +# Header files +file(GLOB_RECURSE inc_files "include/*.hpp") -if(CUDA) - find_package(CUDAToolkit REQUIRED) -endif() +# Note: cxx project name is {CMAKE_PROJECT_NAME} and python module name is also {CMAKE_PROJECT_NAME} +set(module_name _${CMAKE_PROJECT_NAME}) # target name +add_library(${module_name} ${src_files} ${inc_files}) +set(pybind_module_name ${CMAKE_PROJECT_NAME}) # name of submodule for python bindings -############################################## -# Find system dependencies +# Dependencies and linking find_package(aidge_core REQUIRED) find_package(aidge_backend_cpu REQUIRED) +target_link_libraries(${module_name} + PUBLIC + _aidge_core + _aidge_backend_cpu +) if(CUDA) + find_package(CUDAToolkit REQUIRED) find_package(aidge_backend_cuda REQUIRED) -endif() - -############################################## -# Create target and set properties - -if(CUDA) - file(GLOB_RECURSE src_files "src/*.cpp" "src/*.cu") - file(GLOB_RECURSE inc_files "include/*.hpp") - - add_library(${module_name} ${src_files} ${inc_files}) target_link_libraries(${module_name} PUBLIC - _aidge_core # _ is added because we link the target not the project - _aidge_backend_cpu - # _aidge_backend_cuda # XXX CUDA::cudart CUDA::cublas cudnn ) -else() - file(GLOB_RECURSE src_files "src/*.cpp") - file(GLOB_RECURSE inc_files "include/*.hpp") - - add_library(${module_name} ${src_files} ${inc_files}) - target_link_libraries(${module_name} - PUBLIC - _aidge_core # _ is added because we link the target not the project - _aidge_backend_cpu - ) endif() -#Set target properties +# Include directories target_include_directories(${module_name} PUBLIC $<INSTALL_INTERFACE:include> @@ -122,6 +124,7 @@ target_include_directories(${module_name} ${CMAKE_CURRENT_SOURCE_DIR}/src ) +# Compilation settings if(CUDA) if(NOT DEFINED CMAKE_CUDA_STANDARD) set(CMAKE_CUDA_STANDARD 14) @@ -142,23 +145,44 @@ if (PYBIND) generate_python_binding(${pybind_module_name} ${module_name}) endif() -# XXX HERE !!! -target_link_libraries(${module_name} PUBLIC fmt::fmt) target_compile_features(${module_name} PRIVATE cxx_std_14) +target_link_libraries(${module_name} PRIVATE fmt::fmt) +#################################### +# Compilation options and warnings target_compile_options(${module_name} PRIVATE + # Options for Clang, AppleClang, and GCC compilers $<$<COMPILE_LANGUAGE:CPP>:$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>: - -Wall -Wextra -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow $<$<BOOL:${WERROR}>:-Werror>>>) + -Wall # Enable all warnings + -Wextra # Enable extra warnings + -Wold-style-cast # Warn about C-style casts + -Winline # Warn if inline expansion fails + -pedantic # Enforce strict ISO C++ standards + -Werror=narrowing # Treat narrowing conversions as errors + -Wshadow # Warn about variable shadowing + $<$<BOOL:${WERROR}>:-Werror> # Optionally treat warnings as errors + >> +) + +# Additional MSVC-specific warning level +target_compile_options(${module_name} PRIVATE + $<$<CXX_COMPILER_ID:MSVC>: + /W4 # Warning level 4 (highest for MSVC) + > +) + +# CUDA-specific compile options if(CUDA) target_compile_options(${module_name} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: - -Wall>) + -Wall # Enable all warnings for CUDA + > + ) endif() -target_compile_options(${module_name} PRIVATE - $<$<CXX_COMPILER_ID:MSVC>: - /W4>) +# Coverage flags for GCC if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) + include(CodeCoverage) append_coverage_compiler_flags() endif() @@ -168,29 +192,31 @@ endif() include(GNUInstallDirs) set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/${project}) +# Install the library target install(TARGETS ${module_name} EXPORT ${project}-targets - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ) +# Install header files install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) -#Export the targets to a script - +# Export targets for other projects to use install(EXPORT ${project}-targets - FILE "${project}-targets.cmake" - DESTINATION ${INSTALL_CONFIGDIR} - COMPONENT ${module_name} + FILE "${project}-targets.cmake" + DESTINATION ${INSTALL_CONFIGDIR} + COMPONENT ${module_name} ) -if (PYBIND) +# Python binding installation +if(PYBIND) install(TARGETS ${pybind_module_name} DESTINATION ${PYBIND_INSTALL_PREFIX} ) endif() -#Create a ConfigVersion.cmake file +# Create and install CMake configuration files include(CMakePackageConfigHelpers) write_basic_package_version_file( "${CMAKE_CURRENT_BINARY_DIR}/${project}-config-version.cmake" @@ -203,15 +229,14 @@ configure_package_config_file("${project}-config.cmake.in" INSTALL_DESTINATION ${INSTALL_CONFIGDIR} ) -#Install the config, configversion and custom find modules +# Install CMake configuration files install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${project}-config.cmake" "${CMAKE_CURRENT_BINARY_DIR}/${project}-config-version.cmake" DESTINATION ${INSTALL_CONFIGDIR} ) -############################################## -## Exporting from the build tree +# Export from build tree export(EXPORT ${project}-targets FILE "${CMAKE_CURRENT_BINARY_DIR}/${project}-targets.cmake") @@ -219,10 +244,6 @@ export(EXPORT ${project}-targets ############################################## ## Add test if(TEST) - if (AIDGE_REQUIRES_PYTHON AND NOT AIDGE_PYTHON_HAS_EMBED) - message(WARNING "Skipping compilation of tests: missing Python embedded interpreter") - else() - enable_testing() - add_subdirectory(unit_tests) - endif() + enable_testing() + add_subdirectory(unit_tests) endif() diff --git a/aidge_quantization/unit_tests/test_ptq.py b/aidge_quantization/unit_tests/test_ptq.py index dfdedd8394913c0b205bbb1084b4dfb3c95b24a3..56080bff0d1f4a95248fa983316dbafd35565501 100644 --- a/aidge_quantization/unit_tests/test_ptq.py +++ b/aidge_quantization/unit_tests/test_ptq.py @@ -21,7 +21,7 @@ ACCURACIES = (95.4, 94.4) # (97.9, 97.7) NB_BITS = 4 # -------------------------------------------------------------- -# UTILS +# UTILS # -------------------------------------------------------------- def propagate(model, scheduler, sample): @@ -50,7 +50,7 @@ def compute_accuracy(model, samples, labels): # -------------------------------------------------------------- class test_ptq(unittest.TestCase): - + def setUp(self): # load the samples / labels (numpy) @@ -70,19 +70,20 @@ class test_ptq(unittest.TestCase): def tearDown(self): pass - + def test_model(self): Log.set_console_level(Level.Info) # compute the base accuracy accuracy = compute_accuracy(self.model, self.samples[0:NB_SAMPLES], self.labels) self.assertAlmostEqual(accuracy * 100, ACCURACIES[0], msg='base accuracy does not meet the baseline !', delta=0.1) - + def test_quant_model(self): - Log.set_console_level(Level.Info) + Log.set_console_level(Level.Debug) # create the calibration dataset + tensors = [] for sample in self.samples[0:NB_SAMPLES]: sample = prepare_sample(sample) @@ -91,14 +92,13 @@ class test_ptq(unittest.TestCase): # quantize the model - aidge_quantization.quantize_network( - self.model, - NB_BITS, - tensors, - clipping_mode=aidge_quantization.Clipping.MSE, + self.model, + NB_BITS, + tensors, + clipping_mode=aidge_quantization.Clipping.MSE, no_quantization=False, - optimize_signs=True, + optimize_signs=True, single_shift=False ) diff --git a/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp b/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp index ddb820997837ec9b3603c6007497c8161145d587..1ed05e232ba9f8332c372a9524edd26fc7d9c45a 100644 --- a/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp @@ -67,16 +67,16 @@ void LSQImpl_cpu_backward_kernel(const std::size_t inputLength, const GI fullPrecScale_4 = input[4*i+3] / stepSize[0]; /*****************Features Gradient Computation********************/ // STE method is simply applied - grad_input[4*i] = grad_output[4*i]*((fullPrecScale_1 <= static_cast<GI>(range.first)) ? GI(0.0) : + grad_input[4*i] += grad_output[4*i]*((fullPrecScale_1 <= static_cast<GI>(range.first)) ? GI(0.0) : (fullPrecScale_1 >= static_cast<GI>(range.second)) ? GI(0.0) : GI(1.0)); - grad_input[4*i+1] = grad_output[4*i+1]*((fullPrecScale_2 <= static_cast<GI>(range.first)) ? GI(0.0) : + grad_input[4*i+1] += grad_output[4*i+1]*((fullPrecScale_2 <= static_cast<GI>(range.first)) ? GI(0.0) : (fullPrecScale_2 >= static_cast<GI>(range.second)) ? GI(0.0) : GI(1.0)); - grad_input[4*i+2] = grad_output[4*i+2]*((fullPrecScale_3 <= static_cast<GI>(range.first)) ? GI(0.0) : + grad_input[4*i+2] += grad_output[4*i+2]*((fullPrecScale_3 <= static_cast<GI>(range.first)) ? GI(0.0) : (fullPrecScale_3 >= static_cast<GI>(range.second)) ? GI(0.0) : GI(1.0)); - grad_input[4*i+3] = grad_output[4*i+3]*((fullPrecScale_4 <= static_cast<GI>(range.first)) ? GI(0.0) : + grad_input[4*i+3] += grad_output[4*i+3]*((fullPrecScale_4 <= static_cast<GI>(range.first)) ? GI(0.0) : (fullPrecScale_4 >= static_cast<GI>(range.second)) ? GI(0.0) : GI(1.0)); @@ -105,7 +105,7 @@ void LSQImpl_cpu_backward_kernel(const std::size_t inputLength, // Process remaining for(unsigned int i=inputLength-inputLength%4; i<inputLength; ++i) { const GI fullPrecScale = input[i] / stepSize[0]; - grad_input[i] = grad_output[i]*((fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) : + grad_input[i] += grad_output[i]*((fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) : (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) : GI(1.0)); GI qData = fullPrecScale; @@ -117,7 +117,7 @@ void LSQImpl_cpu_backward_kernel(const std::size_t inputLength, const GI gradScaleFactor = static_cast<GI>(1.0f / std::sqrt(inputLength * range.second)); // 3rd: Multiply Step Size gradient with scale factor - grad_stepSize[0] = diffStepSize * gradScaleFactor; + grad_stepSize[0] += diffStepSize * gradScaleFactor; } diff --git a/include/aidge/operator/FixedQ.hpp b/include/aidge/operator/FixedQ.hpp index 96a52b4592bc05f34a47e04e664df27847a48e85..3d46dcfacc59e98ae193a9238a9474c6df015b7d 100644 --- a/include/aidge/operator/FixedQ.hpp +++ b/include/aidge/operator/FixedQ.hpp @@ -9,11 +9,12 @@ * ********************************************************************************/ -#ifndef AIDGE_CORE_OPERATOR_FIXEDQ_H_ -#define AIDGE_CORE_OPERATOR_FIXEDQ_H_ +#ifndef AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_ +#define AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_ -#include <cassert> +#include <cstddef> // std::size_t #include <memory> +#include <string> #include <vector> #include "aidge/backend/OperatorImpl.hpp" @@ -21,8 +22,8 @@ #include "aidge/operator/OperatorTensor.hpp" #include "aidge/utils/ErrorHandling.hpp" #include "aidge/utils/Registrar.hpp" -#include "aidge/utils/Types.h" #include "aidge/utils/StaticAttributes.hpp" +#include "aidge/utils/Types.h" namespace Aidge { @@ -43,24 +44,20 @@ private: public: - FixedQ_Op(std::size_t nbBits, float span, bool isOutputUnsigned) : - OperatorTensor(Type, {InputCategory::Data}, 1), - mAttributes(std::make_shared<Attributes_>(attr<FixedQAttr::NbBits>(nbBits), attr<FixedQAttr::Span>(span), attr<FixedQAttr::IsOutputUnsigned>(isOutputUnsigned))) + FixedQ_Op(std::size_t nbBits = 8, float span = 4.0f, bool isOutputUnsigned = false) : + OperatorTensor(Type, {InputCategory::Data}, 1), + mAttributes(std::make_shared<Attributes_>( + attr<FixedQAttr::NbBits>(nbBits), + attr<FixedQAttr::Span>(span), + attr<FixedQAttr::IsOutputUnsigned>(isOutputUnsigned))) {} /** - * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). + * @brief Copy-constructor. Copy the operator attributes and its output + * tensor(s), but not its input tensors (the new operator has no input associated). * @param op Operator to copy. */ - FixedQ_Op(const FixedQ_Op& op) - : OperatorTensor(op), mAttributes(op.mAttributes) - { - if (op.mImpl){ - SET_IMPL_MACRO(FixedQ_Op, *this, op.backend()); - }else{ - mImpl = nullptr; - } - } + FixedQ_Op(const FixedQ_Op& op); /** * @brief Clone the operator using its copy-constructor. @@ -88,14 +85,16 @@ public: }; -inline std::shared_ptr<Node> FixedQ(std::size_t nbBits = 8, float span = 4.0f, bool isOutputUnsigned = false, const std::string& name = "") { - return std::make_shared<Node>(std::make_shared<FixedQ_Op>(nbBits, span, isOutputUnsigned), name); -} -} +std::shared_ptr<Node> FixedQ(std::size_t nbBits = 8, + float span = 4.0f, + bool isOutputUnsigned = false, + const std::string& name = ""); + +} // namespace Aidge namespace { template <> const char* const EnumStrings<Aidge::FixedQAttr>::data[] = {"nb_bits", "span", "is_output_unsigned"}; } -#endif /* AIDGE_CORE_OPERATOR_FIXEDQ_H_ */ +#endif /* AIDGE_QUANTIZATION_OPERATOR_FIXEDQ_H_ */ diff --git a/include/aidge/operator/LSQ.hpp b/include/aidge/operator/LSQ.hpp index 10ceb81b0346cd5c15e19460cd44923d6e062f76..970c476cb7be18b8d001edb27d60079de85b9349 100644 --- a/include/aidge/operator/LSQ.hpp +++ b/include/aidge/operator/LSQ.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CORE_OPERATOR_LSQ_H_ -#define AIDGE_CORE_OPERATOR_LSQ_H_ +#ifndef AIDGE_QUANTIZATION_OPERATOR_LSQ_H_ +#define AIDGE_QUANTIZATION_OPERATOR_LSQ_H_ #include <cassert> #include <memory> @@ -95,7 +95,7 @@ public: */ inline std::shared_ptr<Node> LSQ(const std::pair<int, int>& range = {0, 255}, const std::string& name = "") { auto lsq = std::make_shared<Node>(std::make_shared<LSQ_Op>(range), name); - addProducer(lsq, 1, {1}, "ss"); + addProducer<1>(lsq, 1, {1}, "ss"); return lsq; } } @@ -105,4 +105,4 @@ template <> const char *const EnumStrings<Aidge::LSQAttr>::data[] = {"range"}; } -#endif /* AIDGE_CORE_OPERATOR_LSQ_H_ */ +#endif /* AIDGE_QUANTIZATION_OPERATOR_LSQ_H_ */ diff --git a/include/aidge/operator/SAT/DoReFa.hpp b/include/aidge/operator/SAT/DoReFa.hpp index 92ce1677b1b28e303c8488b55dd00cfafb519457..d168c38bf4f21a64f0007f2f65b0dfc4820d8297 100644 --- a/include/aidge/operator/SAT/DoReFa.hpp +++ b/include/aidge/operator/SAT/DoReFa.hpp @@ -9,17 +9,15 @@ * ********************************************************************************/ -#ifndef AIDGE_CORE_OPERATOR_DOREFA_H_ -#define AIDGE_CORE_OPERATOR_DOREFA_H_ +#ifndef AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_ +#define AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_ -#include <cassert> #include <memory> #include <vector> #include "aidge/backend/OperatorImpl.hpp" #include "aidge/graph/Node.hpp" #include "aidge/operator/OperatorTensor.hpp" -#include "aidge/utils/ErrorHandling.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/StaticAttributes.hpp" #include "aidge/utils/Types.h" @@ -43,12 +41,17 @@ public: static const std::string Type; private: - using Attributes_ = StaticAttributes<DoReFaAttr, size_t, DoReFaMode>; + using Attributes_ = StaticAttributes<DoReFaAttr, std::size_t, DoReFaMode>; template <DoReFaAttr e> using attr = typename Attributes_::template attr<e>; const std::shared_ptr<Attributes_> mAttributes; public: - DoReFa_Op(size_t range = 255, DoReFaMode mode = DoReFaMode::Default) + /** + * @brief Constructor for DoReFa_Op + * @param range The quantization range (default: 255) + * @param mode The quantization mode (default: Default) + */ + DoReFa_Op(std::size_t range = 255, DoReFaMode mode = DoReFaMode::Default) : OperatorTensor(Type, {InputCategory::Param}, 1), mAttributes(std::make_shared<Attributes_>( attr<DoReFaAttr::Range>(range), @@ -59,30 +62,34 @@ public: * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). * @param op Operator to copy. */ - DoReFa_Op(const DoReFa_Op& op) - : OperatorTensor(op), - mAttributes(op.mAttributes) - { - if (op.mImpl){ - SET_IMPL_MACRO(DoReFa_Op, *this, op.backend()); - }else{ - mImpl = nullptr; - } - } + DoReFa_Op(const DoReFa_Op& op); /** * @brief Clone the operator using its copy-constructor. * @see Operator::DoReFa_Op + * @return std::shared_ptr<Operator> A deep copy of the operator */ - std::shared_ptr<Operator> clone() const override { - return std::make_shared<DoReFa_Op>(*this); - } + std::shared_ptr<Operator> clone() const override; + /** + * @brief Get available backends for this operator + * @return std::set<std::string> Set of supported backend names + */ std::set<std::string> getAvailableBackends() const override final; + + /** + * @brief Set the backend for this operator + * @param name Backend name + * @param device Device index (default: 0) + */ void setBackend(const std::string& name, DeviceIdx_t device = 0) override final; + /** + * @brief Get operator attributes + * @return std::shared_ptr<Attributes> Shared pointer to operator attributes + */ inline std::shared_ptr<Attributes> attributes() const override { return mAttributes; } - inline size_t& range() const noexcept { return mAttributes->getAttr<DoReFaAttr::Range>(); } + inline std::size_t& range() const noexcept { return mAttributes->getAttr<DoReFaAttr::Range>(); } inline DoReFaMode& mode() const noexcept { return mAttributes->getAttr<DoReFaAttr::Mode>(); } static const std::vector<std::string> getInputsName(){ @@ -93,10 +100,20 @@ public: } }; -inline std::shared_ptr<Node> DoReFa(size_t range = 255, DoReFaMode mode = DoReFaMode::Default, const std::string& name = "") { - return std::make_shared<Node>(std::make_shared<DoReFa_Op>(range, mode), name); -} -} +/** + * @brief Factory function to create a DoReFa operator node + * + * @param range Quantization range (default: 255) + * @param mode Quantization mode (default: Default) + * @param name Node name (default: empty) + * + * @return std::shared_ptr<Node> Shared pointer to the created node + */ +std::shared_ptr<Node> DoReFa(std::size_t range = 255, + DoReFaMode mode = DoReFaMode::Default, + const std::string& name = ""); + +} // namespace Aidge namespace { template <> @@ -106,4 +123,4 @@ template <> const char *const EnumStrings<Aidge::DoReFaMode>::data[] = {"default", "symmetric", "asymmetric", "full_range"}; } -#endif /* AIDGE_CORE_OPERATOR_DOREFA_H_ */ +#endif /* AIDGE_QUANTIZATION_OPERATOR_SAT_DOREFA_H_ */ diff --git a/include/aidge/operator/SAT/TanhClamp.hpp b/include/aidge/operator/SAT/TanhClamp.hpp index def43b872c021e539efe5658b592ceec9b3b5d4d..9d99d7024905332ff7336c62aaaa14d09c51e6d1 100644 --- a/include/aidge/operator/SAT/TanhClamp.hpp +++ b/include/aidge/operator/SAT/TanhClamp.hpp @@ -9,20 +9,18 @@ * ********************************************************************************/ -#ifndef AIDGE_CORE_OPERATOR_TANHCLAMP_H_ -#define AIDGE_CORE_OPERATOR_TANHCLAMP_H_ +#ifndef AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_ +#define AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_ -#include <cassert> #include <memory> +#include <set> +#include <string> #include <vector> #include "aidge/backend/OperatorImpl.hpp" #include "aidge/graph/Node.hpp" #include "aidge/operator/OperatorTensor.hpp" -#include "aidge/operator/Producer.hpp" -#include "aidge/utils/ErrorHandling.hpp" #include "aidge/utils/Registrar.hpp" -#include "aidge/utils/StaticAttributes.hpp" #include "aidge/utils/Types.h" namespace Aidge { @@ -44,23 +42,13 @@ public: * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). * @param op Operator to copy. */ - TanhClamp_Op(const TanhClamp_Op& op) - : OperatorTensor(op) - { - if (op.mImpl){ - SET_IMPL_MACRO(TanhClamp_Op, *this, op.backend()); - }else{ - mImpl = nullptr; - } - } + TanhClamp_Op(const TanhClamp_Op& op); /** * @brief Clone the operator using its copy-constructor. * @see Operator::TanhClamp_Op */ - std::shared_ptr<Operator> clone() const override { - return std::make_shared<TanhClamp_Op>(*this); - } + std::shared_ptr<Operator> clone() const override; bool forwardDims(bool allowDataDependency = false) override final; std::set<std::string> getAvailableBackends() const override final; @@ -75,9 +63,8 @@ public: } }; -inline std::shared_ptr<Node> TanhClamp(const std::string& name = "") { - return std::make_shared<Node>(std::make_shared<TanhClamp_Op>(), name); -} -} +std::shared_ptr<Node> TanhClamp(const std::string& name = ""); + +} // namespace Aidge -#endif /* AIDGE_CORE_OPERATOR_TANHCLAMP_H_ */ +#endif /* AIDGE_QUANTIZATION_OPERATOR_SAT_TANHCLAMP_H_ */ diff --git a/include/aidge/quantization/PTQ/CLE.hpp b/include/aidge/quantization/PTQ/CLE.hpp index d94b6e930209450bc5d33331832b81a2623c56a7..f4dc073ee5ed02799a75505a2dc0a3a519e66548 100644 --- a/include/aidge/quantization/PTQ/CLE.hpp +++ b/include/aidge/quantization/PTQ/CLE.hpp @@ -9,29 +9,33 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_PTQ_CLE_H_ -#define AIDGE_QUANTIZATION_PTQ_CLE_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_ -//#include <cstdint> -//#include <map> -//#include <memory> -//#include <string> -//#include <vector> +#include <memory> -#include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" namespace Aidge { /** - * @brief Equalize the ranges of the nodes parameters by proceding iteratively. - * Can only be applied to single branch networks (otherwise does not edit the graphView). + * @brief Equalize the ranges of the nodes parameters by proceding iteratively. + * Can only be applied to single branch networks (otherwise does not edit the GraphView). + * + * Cross Layer Equalization (CLE) is used to balance the weights between consecutive + * layers to improve quantization performance. It works by iteratively scaling weights + * and biases of adjacent layers while preserving the overall function of the network. + * + * @note The operation modifies weights and biases in-place but preserves the mathematical + * function computed by the network. + * * @param graphView The GraphView to process. - * @param targetDelta the stopping criterion (typical value : 0.01) + * @param targetDelta the stopping criterion (typical value : 0.01). Smaller values lead + * to more precise equalization but may require more iterations. */ - void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDelta = 0.01); + void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta = 0.01); -} +} // namespace Aidge -#endif /* AIDGE_QUANTIZATION_PTQ_CLE_H_ */ \ No newline at end of file +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLE_H_ */ diff --git a/include/aidge/quantization/PTQ/Clipping.hpp b/include/aidge/quantization/PTQ/Clipping.hpp index 08a0b0ade5fdec76dea5b222884fbbe6f206c138..3f65c42eb2032da10c4d337b53fb1bdd08a7aa55 100644 --- a/include/aidge/quantization/PTQ/Clipping.hpp +++ b/include/aidge/quantization/PTQ/Clipping.hpp @@ -9,14 +9,14 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_PTQ_CLIP_H_ -#define AIDGE_QUANTIZATION_PTQ_CLIP_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_ -//#include <cstdint> -//#include <map> -//#include <memory> -//#include <string> -//#include <vector> +#include <cstdint> // std::uint8_t +#include <map> +#include <memory> +#include <string> +#include <vector> #include "aidge/data/Tensor.hpp" #include "aidge/graph/GraphView.hpp" @@ -36,7 +36,7 @@ namespace Aidge * @param inputDataSet The input dataset, consisting of a vector of input samples. * @return A map associating each node name to it's corresponding activation histogram. */ - std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, float> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda); + std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda); /** * @brief Given an input activation histogram, compute the optimal clipping value in the sense of the Lp norm. @@ -45,7 +45,7 @@ namespace Aidge * @param exponent: The exponent of the Lp norm (e.g. 2 for the MSE). * @return The optimal clipping value. */ - float computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, float exponent); + double computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, double exponent); /** * @brief Given an input activation histogram, compute the optimal clipping value in the sense of the KL divergence. @@ -53,12 +53,12 @@ namespace Aidge * @param nbBits: The quantization number of bits. * @return The optimal clipping value. */ - float computeKLClipping(std::vector<int> histogram, std::uint8_t nbBits); + double computeKLClipping(std::vector<int> histogram, std::uint8_t nbBits); /** - * @brief Return a corrected map of the provided activation ranges. - * To do so compute the optimal clipping values for every node and multiply the input ranges by those values. - * The method used to compute the clippings can be eihter 'MSE', 'AA', 'KL' or 'MAX'. + * @brief Return a corrected map of the provided activation ranges. + * To do so compute the optimal clipping values for every node and multiply the input ranges by those values. + * The method used to compute the clippings can be eihter 'MSE', 'AA', 'KL' or 'MAX'. * @param clippingMode The method used to compute the optimal clippings. * @param valueRanges The map associating each affine node to its output range. * @param nbBits The quantization number of bits. @@ -67,9 +67,9 @@ namespace Aidge * @param verbose Whether to print the clipping values or not. * @return The corrected map associating each provided node to its clipped range. */ - std::map<std::string, float> adjustRanges(Clipping clippingMode, std::map<std::string, float> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose); + std::map<std::string, double> adjustRanges(Clipping clippingMode, std::map<std::string, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose); } -#endif /* AIDGE_QUANTIZATION_PTQ_CLIP_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_CLIP_H_ */ diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index 52d83d60b9a6f31ca99204daf213b3abd28fae3c..4fc38bc3b959ec8264ddaddbd4673fbe1f75e4ab 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -9,16 +9,19 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_PTQ_PTQ_H_ -#define AIDGE_QUANTIZATION_PTQ_PTQ_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_ -//#include <cstdint> -//#include <map> -//#include <memory> -//#include <string> -//#include <vector> +#include <cstdint> // std::uint8_t +#include <map> +#include <memory> +#include <set> +#include <string> +#include <utility> // std::pair +#include <vector> #include "aidge/data/Tensor.hpp" +#include "aidge/quantization/PTQ/Clipping.hpp" #include "aidge/graph/GraphView.hpp" namespace Aidge { @@ -31,7 +34,7 @@ namespace Aidge { /** * @brief Set of the types of the nodes which does not affect the PTQ process */ - static const std::set<std::string> seamlessNodeTypes({"LeakyReLU", "Pad2D", "MaxPooling2D", "AvgPooling2D", "PaddedMaxPooling2D", "PaddedAvgPooling2D", "GlobalAveragePooling", "Reshape", "Transpose", "Gather"}); + static const std::set<std::string> seamlessNodeTypes({"LeakyReLU", "Pad2D", "MaxPooling2D", "AvgPooling2D", "PaddedMaxPooling2D", "PaddedAvgPooling2D", "GlobalAveragePooling", "Reshape", "Transpose", "Gather", "Resize"}); /** * @brief Set of the types of the nodes that merge multiple branches into one @@ -98,18 +101,18 @@ namespace Aidge { * @param scalingNodesOnly Whether to restrain the retreival of the ranges to scaling nodes only or not. * @return A map associating each affine node name to it's corresponding output range. */ - std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda); + std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda); /** * @brief Normalize the activations of each affine node so that they fit in the [-1:1] range. * This is done by reconfiguring the scaling nodes, as well as rescaling the weights and biases tensors. * @param graphView The GraphView containing the affine nodes. - * @param valueRanges The node output value ranges computed over the calibration dataset. + * @param valueRanges The node output value ranges computed over the calibration dataset. */ - void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, float> valueRanges); + void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges); /** - * @brief For each node, compute the sign of its input and output values. + * @brief For each node, compute the sign of its input and output values. * The goal of the routine is to maximize the number of unsigned IOs in order to double the value resolution when possible. * @param graphView The GraphView to analyze. * @param verbose Whether to print the sign map or not. @@ -135,7 +138,7 @@ namespace Aidge { * @param clippingMode: Type of the clipping optimization. Can be either 'MAX', 'MSE', 'AA' or 'KL'. * @param applyRounding Whether to apply the rounding operations or not. * @param optimizeSigns Whether to take account of the IO signs of the operators or not. - * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights. + * @param singleShift Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights. * @param verbose Whether to print internal informations about the quantization process. */ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool applyRounding, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose); @@ -145,7 +148,7 @@ namespace Aidge { * @param graphView The GraphView containing the affine nodes. * @return A map associating each affine node name to it's corresponding weight range. */ - std::map<std::string, float> getWeightRanges(std::shared_ptr<GraphView> graphView); + std::map<std::string, double> getWeightRanges(std::shared_ptr<GraphView> graphView); /** * @brief Clear the affine nodes biases. Provided form debugging purposes. @@ -157,8 +160,8 @@ namespace Aidge { * @brief Developement and test routine. * @param graphView The GraphView under test. */ - void devPTQ(std::shared_ptr<GraphView> graphView); + void devPTQ(std::shared_ptr<GraphView> graphView); } -#endif /* AIDGE_QUANTIZATION_PTQ_PTQ_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQ_H_ */ diff --git a/include/aidge/quantization/PTQ/PTQMetaOps.hpp b/include/aidge/quantization/PTQ/PTQMetaOps.hpp index c4f2ac7262257ff5384302e3929700d8ed8e4fca..b9bad0d18f099e94d4c52254b08629c7f947db6a 100644 --- a/include/aidge/quantization/PTQ/PTQMetaOps.hpp +++ b/include/aidge/quantization/PTQ/PTQMetaOps.hpp @@ -8,22 +8,16 @@ * SPDX-License-Identifier: EPL-2.0 * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ -#define AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_ -#include <array> #include <memory> #include <string> -#include <utility> - -#include "aidge/operator/Clip.hpp" -#include "aidge/operator/Mul.hpp" -#include "aidge/operator/Round.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/graph/Node.hpp" -#include "aidge/graph/OpArgs.hpp" // Sequential -#include "aidge/operator/MetaOperator.hpp" + +namespace Aidge { /// @brief Quantizer acts as a meta-operator to handle scaling operations in the PTQ, replacing the Scaling Operator. /// This operator is composed of a sequence of [Mul] -> [Clip] -> [Round] operations. @@ -33,7 +27,7 @@ /// @param clip_max The maximum value for the clip operation. /// @param name The name of the meta-operator node created. /// @return A shared pointer to an instance of the meta-operator node. -std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float clip_max,const std::string& name); +std::shared_ptr<Aidge::Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name); /// @brief The purpose of Scaling is to encapsulate the Mul operator and tag it as a PTQ node rather than a regular Mul operator. /// Therefore, this meta-operator consists solely of a [Mul] operation. @@ -41,7 +35,7 @@ std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float /// @param scalingFactor The scaling factor to apply to the input (a scalar to multiply the input with). /// @param name The name of the meta-operator node created. /// @return A shared pointer to an instance of the scaling node. -std::shared_ptr<Aidge::Node> Scaling(float scalingFactor,const std::string& name = ""); +std::shared_ptr<Aidge::Node> Scaling(double scalingFactor, const std::string& name = ""); /// @brief Updates the scaling factor of a PTQ meta-operator node, allowing for dynamic adjustment of the scaling parameter. /// This function sets a new scaling factor for a specified meta-operator node, modifying the scalar applied in the [Mul] operation. @@ -50,25 +44,27 @@ std::shared_ptr<Aidge::Node> Scaling(float scalingFactor,const std::string& name /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor will be updated. /// @param newScalingFactor The new scaling factor to apply to the meta-operator node. /// @return True if the scaling factor was successfully updated, false if the operation failed (e.g., if MetaOpNode is null or incompatible). -bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, float newScalingFactor); +void updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, double newScalingFactor); /// @brief Retrieves the current scaling factor of a PTQ meta-operator node. -/// This function returns the scaling factor associated with the specified PTQ meta-operator node, +/// This function returns the scaling factor associated with the specified PTQ meta-operator node, /// allowing inspection of the current scalar applied in the [Mul] operation. /// /// @param MetaOpNode A shared pointer to the PTQ meta-operator node whose scaling factor is being queried. /// @return The scaling factor currently applied to the meta-operator node, or -1 if the operation fails (e.g., if MetaOpNode is null or incompatible). -float getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode); +double getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode); /// @brief Sets the clip range for an existing Quantizer node by specifying minimum and maximum clipping values. /// This function modifies the clip range of a Quantizer node, allowing adjustment of the range within which values are clipped /// in the [Clip] operation of the Quantizer sequence. /// -/// @param QuantizerNode A shared pointer to the Quantizer node whose clip range is being set. +/// @param QuantizerNode A shared pointer to the Quantizer node whose clip range is being set. /// This node should have been created using the Quantizer function. /// @param min The minimum value for the clip range. Values below this will be clipped to this minimum. /// @param max The maximum value for the clip range. Values above this will be clipped to this maximum. /// @return True if the clip range was successfully set, false if the operation failed (e.g., if QuantizerNode is null). -bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, float min, float max); +void setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode, double min, double max); + +} -#endif /* AIDGE_QUANTIZATION_PTQ_PTQMETAOPS_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_PTQ_PTQMETAOPS_H_ */ diff --git a/include/aidge/quantization/QAT/QAT_FixedQ.hpp b/include/aidge/quantization/QAT/QAT_FixedQ.hpp index ecbe7422ea85db1771d91e161c93740993ebbe2b..6a2aa249892d58fcbd5a45a8d7bb8de67effabaf 100644 --- a/include/aidge/quantization/QAT/QAT_FixedQ.hpp +++ b/include/aidge/quantization/QAT/QAT_FixedQ.hpp @@ -9,8 +9,10 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ -#define AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_ + +#include <memory> #include "aidge/graph/Node.hpp" #include "aidge/graph/GraphView.hpp" @@ -41,10 +43,10 @@ void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits * @brief Developement and test routine. * @param graphView The GraphView under test. */ -void devQAT(std::shared_ptr<GraphView> graphView); +void devQAT(std::shared_ptr<GraphView> graphView); } } -#endif /* AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_QAT_FIXEDQ_H_ */ diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp index 4970be07fae8737a1c2863600757bb81ff3a65f9..a44c71b04ca9e9c6a8fba27c615c99b4893d3d8c 100644 --- a/include/aidge/quantization/QAT/QAT_LSQ.hpp +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -9,12 +9,14 @@ * ********************************************************************************/ -#ifndef AIDGE_QUANTIZATION_QAT_LSQ_H_ -#define AIDGE_QUANTIZATION_QAT_LSQ_H_ +#ifndef AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ +#define AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ + +#include <cstddef> // std::size_t +#include <memory> -#include "aidge/graph/Node.hpp" -#include "aidge/graph/GraphView.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/graph/GraphView.hpp" namespace Aidge { namespace QuantLSQ { @@ -25,7 +27,7 @@ namespace QuantLSQ { * @param nbBits Number of quantization bits. * @param span Fixed output span of the quantizers. */ -void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float step_size); +void insertQuantizers(std::shared_ptr<GraphView> graphView, std::size_t nbBits, float step_size); /** * @brief Given a GraphView with parameters properly initialized and some calibration data, @@ -35,10 +37,10 @@ void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float * @param calibrationData Calibration data used to adjust the spans. * @param scale Multiplicative constant applied to the spans. */ -void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData); +void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, std::size_t nbBits, std::shared_ptr<Tensor> calibrationData); -} -} +} // namespace QuantLSQ +} // namespace Aidge -#endif /* AIDGE_QUANTIZATION_QAT_LSQ_H_ */ +#endif /* AIDGE_QUANTIZATION_QUANTIZATION_QAT_LSQ_H_ */ diff --git a/include/aidge/quantization_version.h b/include/aidge/quantization_version.h new file mode 100644 index 0000000000000000000000000000000000000000..546263af3a7e8b7a73991173f48d0b095c7d9501 --- /dev/null +++ b/include/aidge/quantization_version.h @@ -0,0 +1,11 @@ +#ifndef VERSION_H +#define VERSION_H + +namespace Aidge { +static constexpr const int PROJECT_VERSION_MAJOR = 0; +static constexpr const int PROJECT_VERSION_MINOR = 2; +static constexpr const int PROJECT_VERSION_PATCH = 0; +static constexpr const char * PROJECT_VERSION = "0.2.0"; +static constexpr const char * PROJECT_GIT_HASH = "f50c860"; +} +#endif // VERSION_H diff --git a/include/aidge/utils/sys_info/QuantizationVersionInfo.hpp b/include/aidge/utils/sys_info/QuantizationVersionInfo.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6b4deb81aa77924a813f0704221620e5f0f7fe04 --- /dev/null +++ b/include/aidge/utils/sys_info/QuantizationVersionInfo.hpp @@ -0,0 +1,38 @@ +#ifndef AIDGE_UTILS_SYS_INFO_OPENCV_VERSION_INFO_H +#define AIDGE_UTILS_SYS_INFO_OPENCV_VERSION_INFO_H + +#include "aidge/utils/Log.hpp" +#include "aidge/quantization_version.h" + +namespace Aidge { + +constexpr inline const char * getQuantizationProjectVersion(){ + return PROJECT_VERSION; +} + +constexpr inline const char * getQuantizationGitHash(){ + return PROJECT_GIT_HASH; +} + +void showQuantizationVersion() { + Log::info("Aidge quantization: {} ({}), {} {}", getQuantizationProjectVersion(), getQuantizationGitHash(), __DATE__, __TIME__); + // Compiler version + #if defined(__clang__) + /* Clang/LLVM. ---------------------------------------------- */ + Log::info("Clang/LLVM compiler version: {}.{}.{}\n", __clang_major__ , __clang_minor__, __clang_patchlevel__); + #elif defined(__ICC) || defined(__INTEL_COMPILER) + /* Intel ICC/ICPC. ------------------------------------------ */ + Log::info("Intel ICC/ICPC compiler version: {}\n", __INTEL_COMPILER); + #elif defined(__GNUC__) || defined(__GNUG__) + /* GNU GCC/G++. --------------------------------------------- */ + Log::info("GNU GCC/G++ compiler version: {}.{}.{}", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); + #elif defined(_MSC_VER) + /* Microsoft Visual Studio. --------------------------------- */ + Log::info("Microsoft Visual Studio compiler version: {}\n", _MSC_VER); + #else + Log::info("Unknown compiler\n"); + #endif + +} +} // namespace Aidge +#endif // AIDGE_UTILS_SYS_INFO_OPENCV_VERSION_INFO_H diff --git a/include/aidge/version.h.in b/include/aidge/version.h.in new file mode 100644 index 0000000000000000000000000000000000000000..4b876f63002972c1f8f1340b70cdecdace911012 --- /dev/null +++ b/include/aidge/version.h.in @@ -0,0 +1,11 @@ +#ifndef VERSION_H +#define VERSION_H + +namespace Aidge { +static constexpr const int PROJECT_VERSION_MAJOR = @PROJECT_VERSION_MAJOR@; +static constexpr const int PROJECT_VERSION_MINOR = @PROJECT_VERSION_MINOR@; +static constexpr const int PROJECT_VERSION_PATCH = @PROJECT_VERSION_PATCH@; +static constexpr const char * PROJECT_VERSION = "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@"; +static constexpr const char * PROJECT_GIT_HASH = "@GIT_COMMIT_HASH@"; +} +#endif // VERSION_H diff --git a/pyproject.toml b/pyproject.toml index fc745eb1b584a097e17dddbdd6feeb1565ae3b96..088200e44f589e221982ddaab825986c4224243d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,25 +4,47 @@ description="Quantization algorithms to compress aidge networks." dependencies = [ "numpy>=1.21.6", ] -requires-python = ">= 3.7" +requires-python = ">= 3.8" readme = "README.md" license = { file = "LICENSE" } classifiers = [ "Development Status :: 2 - Pre-Alpha", - "Programming Language :: Python :: 3" - ] -dynamic = ["version"] #Â defined in tool.setuptools_scm -# version="1" + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Eclipse Public License 2.0 (EPL-2.0)", + "Programming Language :: C++", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development" +] +dynamic = ["version"] #Â defined by pbr [build-system] requires = [ "setuptools>=64", - "setuptools_scm[toml]==7.1.0", "cmake>=3.15.3.post1", - "toml" + "toml", + "pbr" ] build-backend = "setuptools.build_meta" +[project.urls] +Homepage = "https://www.deepgreen.ai/en/platform" +Documentation = "https://eclipse-aidge.readthedocs.io/en/latest/" +Repository = "https://gitlab.eclipse.org/eclipse/aidge/aidge_quantization" +Issues = "https://gitlab.eclipse.org/eclipse/aidge/aidge_quantization/-/issues/" +Changelog = "https://gitlab.eclipse.org/eclipse/aidge/aidge_quantization/-/releases" + ##################################################### # SETUPTOOLS [tool.setuptools] @@ -35,10 +57,6 @@ exclude = [ "aidge_quantization.unit_tests.assets" ] # exclude packages matching these glob patterns (empty by default) -# SETUPTOOLS_SCM -[tool.setuptools_scm] -write_to = "aidge_quantization/_version.py" - ##################################################### # CIBUILDWHEEL [tool.cibuildwheel] @@ -54,6 +72,19 @@ test-command = "pytest {package}/aidge_quantization/unit_tests" # "cp39-win_amd64", # "cp310-win_amd64", # ] +# PYLINT +[tool.pylint.main] +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list = ["aidge_core", "aidge_backend_cpu", "aidge_quantization", "onnx"] +# Files or directories to be skipped. They should be base names, not paths. +ignore = ["CVS"] +# List of module names for which member attributes should not be checked (useful +# for modules/projects where namespaces are manipulated during runtime and thus +# existing member attributes cannot be deduced by static analysis). It supports +# qualified module names, as well as Unix pattern matching. +ignored-modules = ["aidge_core", "aidge_backend_cpu", "aidge_quantization", "onnx"] ## AIDGE DEPENDENCIES DECLARATION [tool.cibuildwheel.environment] AIDGE_DEPENDENCIES = "aidge_core aidge_backend_cpu aidge_onnx" # format => "dep_1 dep_2 ... dep_n" diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index ed2632566f4535e89d1e78314256f4c9b2b84623..b5193bddcfe345a1702f02fcc139a4cf5b94a1ce 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -220,7 +220,7 @@ void init_PTQ(py::module &m) { :type network: :py:class:`aidge_core.GraphView` )mydelimiter"); - m.def("prepare_network", &prepareNetwork, py::arg("network"), "prepare the network fo the PTQ"); + m.def("prepare_network", &prepareNetwork, py::arg("network"), "prepare the network for the PTQ"); } diff --git a/python_binding/pybind_Quantization.cpp b/python_binding/pybind_Quantization.cpp index cd18cf8ebdd165f85284e397fe75e2a3eaf988bc..7ac344dcfcd4fc93e3bba1dcd19c1413f5a29d0c 100644 --- a/python_binding/pybind_Quantization.cpp +++ b/python_binding/pybind_Quantization.cpp @@ -20,7 +20,7 @@ namespace py = pybind11; -namespace Aidge +namespace Aidge { // operators @@ -35,8 +35,9 @@ void init_QAT_FixedQ(py::module &m); void init_QAT_LSQ(py::module &m); void init_QuantRecipes(py::module &m); +void init_QuantizationVersionInfo(py::module &m); -PYBIND11_MODULE(aidge_quantization, m) +PYBIND11_MODULE(aidge_quantization, m) { init_FixedQ(m); init_LSQ(m); @@ -47,6 +48,7 @@ PYBIND11_MODULE(aidge_quantization, m) init_QAT_FixedQ(m); init_QAT_LSQ(m); init_QuantRecipes(m); + init_QuantizationVersionInfo(m); } } // namespace Aidge diff --git a/python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp b/python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..abed12b38df75471ddb57c505146ab12bc833ed8 --- /dev/null +++ b/python_binding/utils/sys_info/pybind_QuantizationVersionInfo.cpp @@ -0,0 +1,11 @@ +#include <pybind11/pybind11.h> +#include "aidge/utils/sys_info/QuantizationVersionInfo.hpp" + +namespace py = pybind11; +namespace Aidge { +void init_QuantizationVersionInfo(py::module& m){ + m.def("show_version", &showQuantizationVersion); + m.def("get_project_version", &getQuantizationProjectVersion); + m.def("get_git_hash", &getQuantizationGitHash); +} +} diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000000000000000000000000000000000000..aa0f227f6688468a5ab93384f7b1670086000035 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,3 @@ +# pbr file +[metadata] +version = file: version.txt diff --git a/setup.py b/setup.py index 8774d01a4abd69c76319c71b610e98061153d4c2..1bfc0ac515fd8cceeec4cba666addc1e7666fd25 100644 --- a/setup.py +++ b/setup.py @@ -37,6 +37,7 @@ class AidgePkgBuild(build_ext): # This lists the number of processors available on the machine # The compilation will use half of them max_jobs = str(ceil(multiprocessing.cpu_count() / 2)) + max_jobs = os.environ.get("AIDGE_NB_PROC", max_jobs) cwd = pathlib.Path().absolute() @@ -51,14 +52,20 @@ class AidgePkgBuild(build_ext): package_prefix = build_lib if not self.editable_mode else SETUP_DIR pybind_install_prefix = (package_prefix / PROJECT_NAME).absolute() - os.chdir(str(build_temp)) - - compile_type = os.environ.get("AIDGE_PYTHON_BUILD_TYPE", "Release") install_path = ( os.path.join(sys.prefix, "lib", "libAidge") if "AIDGE_INSTALL" not in os.environ else os.environ["AIDGE_INSTALL"] ) + + # Read environment variables for CMake options + c_compiler = os.environ.get("AIDGE_C_COMPILER", "gcc") + cxx_compiler = os.environ.get("AIDGE_CXX_COMPILER", "g++") + build_type = os.environ.get("AIDGE_BUILD_TYPE", "Release") + asan = os.environ.get("AIDGE_ASAN", "OFF") + with_cuda = os.environ.get("AIDGE_WITH_CUDA", "OFF") + cmake_arch = os.environ.get("AIDGE_CMAKE_ARCH", "") + build_gen = os.environ.get("AIDGE_BUILD_GEN", "") build_gen_opts = ( ["-G", build_gen] @@ -67,26 +74,36 @@ class AidgePkgBuild(build_ext): ) test_onoff = os.environ.get("AIDGE_BUILD_TEST", "OFF") - self.spawn( - [ - "cmake", - *build_gen_opts, - str(cwd), - f"-DTEST={test_onoff}", - f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}", - f"-DCMAKE_BUILD_TYPE={compile_type}", - "-DPYBIND=ON", - f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}", - "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", - "-DCOVERAGE=OFF", - ] - ) + os.chdir(str(build_temp)) + + cmake_cmd = [ + "cmake", + *build_gen_opts, + str(cwd), + f"-DTEST={test_onoff}", + f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}", + f"-DCMAKE_BUILD_TYPE={build_type}", + f"-DCMAKE_C_COMPILER={c_compiler}", + f"-DCMAKE_CXX_COMPILER={cxx_compiler}", + f"-DENABLE_ASAN={asan}", + f"-DCUDA={with_cuda}", + "-DPYBIND=ON", + f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}", + "-DCMAKE_EXPORT_COMPILE_COMMANDS=1", + "-DCOVERAGE=OFF", + ] + + # Append architecture-specific arguments if provided + if cmake_arch: + cmake_cmd.append(cmake_arch) + + self.spawn(cmake_cmd) if not self.dry_run: self.spawn( - ["cmake", "--build", ".", "--config", compile_type, "-j", max_jobs] + ["cmake", "--build", ".", "--config", build_type, "-j", max_jobs] ) - self.spawn(["cmake", "--install", ".", "--config", compile_type]) + self.spawn(["cmake", "--install", ".", "--config", build_type]) os.chdir(str(cwd)) diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index 1d5ccc757db7066da9b88ac338674a4ae81d16fc..5265d9c9b1326e73ee4080fe5f69fed5047a0dbb 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -10,14 +10,19 @@ ********************************************************************************/ #include "aidge/quantization/PTQ/CLE.hpp" + +#include <cmath> // std::abs, std::fabs, std::sqrt +#include <cstddef> // std::size_t +#include <memory> +#include <vector> + #include "aidge/quantization/PTQ/Clipping.hpp" -#include "aidge/quantization/PTQ/PTQ.hpp" +#include "aidge/quantization/PTQ/PTQ.hpp" // retrieveNodeVector #include "aidge/graph/GraphView.hpp" -#include "aidge/scheduler/SequentialScheduler.hpp" -#include "aidge/scheduler/Scheduler.hpp" -#include "aidge/utils/Log.hpp" +#include "aidge/graph/Node.hpp" #include "aidge/operator/OperatorTensor.hpp" +#include "aidge/utils/Log.hpp" namespace Aidge { @@ -32,23 +37,23 @@ static std::shared_ptr<Tensor> getBiasTensor(std::shared_ptr<Node> node) return std::static_pointer_cast<OperatorTensor>(node->getOperator())->getInput(2); } -static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) { // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast<double *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) castedTensor[i] *= scaling; } -static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +static double getTensorAbsoluteMax(std::shared_ptr<Tensor> tensor) { // Get the tensor data pointer and edit it - float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast<double*> (tensor->getImpl()->rawPtr()); // Get the tensor absolute max value - float maxValue = 0.0f; + double maxValue = 0.0; for(std::size_t i = 0; i < tensor->size(); ++i) { if(std::fabs(castedTensor[i]) > maxValue) { maxValue = std::fabs(castedTensor[i]); @@ -57,20 +62,19 @@ static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) return maxValue; } -void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDelta) +void crossLayerEqualization(std::shared_ptr<GraphView> graphView, double targetDelta) { std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); // Check if the CLE can be applied ... - for (std::shared_ptr<Node> node : nodeVector) if (node->getChildren().size() > 1) { - Log::info(" Network have multiple branches, skipping the CLE ... "); + Log::notice("Network have multiple branches, skipping the CLE ... "); return; - } + } - Log::info(" Applying the Cross-Layer Equalization ... "); + Log::info("Applying the Cross-Layer Equalization ... "); // Get the vector of affine nodes @@ -79,38 +83,46 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDe if (isAffine(node)) affineNodeVector.push_back(node); - float maxRangeDelta; + if (affineNodeVector.empty()) { + Log::notice("No affine nodes found in the network. CLE cannot be applied."); + return; + } + double maxRangeDelta; + int iteration = 0; - do + do { + ++iteration; maxRangeDelta = 0.0; - //std::cout << " ----- " << std::endl; //for (std::shared_ptr<Node> node : affineNodeVector) // std::cout << getTensorAbsoluteMax(getWeightTensor(node)) << std::endl; - - for (size_t i = 0; i < (affineNodeVector.size() - 1); i++) + + for (std::size_t i = 0; i < (affineNodeVector.size() - 1); i++) { std::shared_ptr<Node> n1 = affineNodeVector[i]; std::shared_ptr<Node> n2 = affineNodeVector[i+1]; - float r1 = getTensorAbsoluteMax(getWeightTensor(n1)); - float r2 = getTensorAbsoluteMax(getWeightTensor(n2)); + double r1 = getTensorAbsoluteMax(getWeightTensor(n1)); + double r2 = getTensorAbsoluteMax(getWeightTensor(n2)); - float s1 = std::sqrt(r1 * r2) / r1; - float s2 = std::sqrt(r1 * r2) / r2; + double s1 = std::sqrt(r1 * r2) / r1; + double s2 = std::sqrt(r1 * r2) / r2; rescaleTensor(getWeightTensor(n1), s1); rescaleTensor(getWeightTensor(n2), s2); rescaleTensor(getBiasTensor(n1), s1); - float rangeDelta = std::abs(r1 - r2); + double rangeDelta = std::abs(r1 - r2); if (rangeDelta > maxRangeDelta) maxRangeDelta = rangeDelta; } } while (maxRangeDelta > targetDelta); + + Log::notice("CLE completed after {} iterations. Final max range delta: {:.6f}", + iteration, maxRangeDelta); } } \ No newline at end of file diff --git a/src/PTQ/Clipping.cpp b/src/PTQ/Clipping.cpp index e00140800e25b210aeabd27a877a6fdfa08708f9..57ad7a836bbb6251a8eeb6da87e3647b4f54afe2 100644 --- a/src/PTQ/Clipping.cpp +++ b/src/PTQ/Clipping.cpp @@ -19,7 +19,7 @@ namespace Aidge { -std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, float> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda) +std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, double> valueRanges, int nbBins, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda) { if (useCuda) graphView->setBackend("cuda"); @@ -72,7 +72,7 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, bool isInsideRanges = (valueRanges.find(node->name()) != valueRanges.end()); if (isInsideRanges) { - float valueRange = valueRanges[node->name()]; + double valueRange = valueRanges[node->name()]; std::shared_ptr<Operator> nodeOperator = node->getOperator(); std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0)); @@ -80,15 +80,17 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, if (useCuda) valueTensor->setBackend("cpu"); - float * castedTensor = static_cast<float *> (valueTensor->getImpl()->rawPtr()); + double * castedTensor = static_cast<double *> (valueTensor->getImpl()->rawPtr()); std::vector<int> nodeHistogram = histograms[node->name()]; for(std::size_t i = 0; i < valueTensor->size(); i++) { - int bin = std::round(std::abs(castedTensor[i] / valueRange * nbBins)); + std::size_t bin = std::round(std::abs(castedTensor[i] / valueRange * nbBins)); + bin = std::min(bin, nodeHistogram.size() - 1); nodeHistogram[bin]++; } - histograms[node->name()] = nodeHistogram; + + histograms[node->name()] = nodeHistogram; if (useCuda) valueTensor->setBackend("cuda"); @@ -105,52 +107,53 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, return histograms; } -float computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, float exponent) +double computeMEClipping(std::vector<int> histogram, std::uint8_t nbBits, double exponent) { int nbBins = histogram.size(); int nbIter = 100; int signedMax = (1 << (nbBits - 1)) - 1; - std::vector<float> clippingErrors; + std::vector<double> clippingErrors; for (int it = 1; it < nbIter; it++) { // Compute the rounding cost of this particular clipping ... - float accumulatedError = 0.0; - float clipping = it / static_cast<float> (nbIter); + double accumulatedError = 0.0; + double clipping = it / static_cast<double> (nbIter); for (int bin = 0; bin < nbBins; bin++) { - float value = (bin + 0.5) / nbBins; - float scaling = signedMax / clipping; - float rounded = std::round(value * scaling) / scaling; - float clipped = std::min(clipping, rounded); + double value = (bin + 0.5) / nbBins; + double scaling = signedMax / clipping; + double rounded = std::round(value * scaling) / scaling; + double clipped = std::min(clipping, rounded); - float approxError = std::abs(clipped - value); + double approxError = std::abs(clipped - value); accumulatedError += std::pow(approxError, exponent) * histogram[bin]; } clippingErrors.push_back(accumulatedError); } - std::vector<float>::iterator it = std::min_element(clippingErrors.begin(), clippingErrors.end()); - float bestClipping = static_cast<float> (std::distance(clippingErrors.begin(), it)) / static_cast<float> (nbIter); - + std::vector<double>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end()); + int bestBin = static_cast<int> (std::distance(clippingErrors.begin(), it)) + 1; + double bestClipping = static_cast<double> (bestBin) / static_cast<double> (nbIter); + return bestClipping; } -float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) +double computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) { // KL Clipping int nbIter = 100; int signedMax = (1 << (nbBits - 1)) - 1; - float refNorm = 0; + double refNorm = 0; for (int n : refHistogram) - refNorm += static_cast<float> (n); + refNorm += static_cast<double> (n); - std::vector<float> clippingErrors; + std::vector<double> clippingErrors; for (int it = 1; it < nbIter; it++) { - float clipping = it / static_cast<float> (nbIter); + double clipping = it / static_cast<double> (nbIter); // Create the histogram for this particular clipping ... @@ -160,7 +163,7 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) for (std::size_t refBin = 0; refBin < refHistogram.size(); refBin++) { - float value = (static_cast<float> (refBin) + 0.5f) / static_cast<float> (refHistogram.size()); + double value = (static_cast<double> (refBin) + 0.5f) / static_cast<double> (refHistogram.size()); int quantBin = std::floor(value / clipping * signedMax); quantBin = std::min(quantBin, signedMax-1); quantHistogram[quantBin] += refHistogram[refBin]; @@ -168,10 +171,10 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) // Compute the mass of the histogram - float quantNorm = 0; + double quantNorm = 0; for (std::size_t refBin = 0; refBin < refHistogram.size(); refBin++) { - float value = (static_cast<float> (refBin) + 0.5f) / static_cast<float> (refHistogram.size()); + double value = (static_cast<double> (refBin) + 0.5f) / static_cast<double> (refHistogram.size()); int quantBin = std::floor(value / clipping * signedMax); if (quantBin < static_cast<int> (quantHistogram.size())) quantNorm += quantHistogram[quantBin]; @@ -179,15 +182,15 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) // Compute the KL divergence - float accumulatedError = 0.0; + double accumulatedError = 0.0; for (std::size_t refBin = 0; refBin < refHistogram.size(); refBin++) { - float value = (static_cast<float> (refBin) + 0.5f) / static_cast<float> (refHistogram.size()); + double value = (static_cast<double> (refBin) + 0.5f) / static_cast<double> (refHistogram.size()); int quantBin = std::floor(value / clipping * signedMax); - float p = static_cast<float> (refHistogram[refBin]) / refNorm; - float q = (quantBin < static_cast<int> (quantHistogram.size())) ? - static_cast<float> (quantHistogram[quantBin]) / quantNorm : 0; + double p = static_cast<double> (refHistogram[refBin]) / refNorm; + double q = (quantBin < static_cast<int> (quantHistogram.size())) ? + static_cast<double> (quantHistogram[quantBin]) / quantNorm : 0; if (p != 0 && q != 0) accumulatedError += q * std::log(q / p); @@ -196,16 +199,17 @@ float computeKLClipping(std::vector<int> refHistogram, std::uint8_t nbBits) clippingErrors.push_back(accumulatedError); } - std::vector<float>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end()); - float bestClipping = static_cast<float> (std::distance(clippingErrors.begin(), it)) / static_cast<float> (nbIter); + std::vector<double>::iterator it = std::min_element(clippingErrors.begin() + 1, clippingErrors.end()); + int bestBin = static_cast<int> (std::distance(clippingErrors.begin(), it)) + 1; + double bestClipping = (static_cast<double> (bestBin)) / static_cast<double> (nbIter); return bestClipping; } -std::map<std::string, float> adjustRanges(Clipping clippingMode, std::map<std::string, float> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose) +std::map<std::string, double> adjustRanges(Clipping clippingMode, std::map<std::string, double> valueRanges, std::uint8_t nbBits, std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool useCuda, bool verbose) { - float clipping = 1.0f; + double clipping = 1.0f; int nbBins = (1 << (nbBits + 4)) ; // XXX Enhance this !!! @@ -213,6 +217,7 @@ std::map<std::string, float> adjustRanges(Clipping clippingMode, std::map<std::s { if (verbose) Log::info(" === CLIPPING VALUES === "); + std::map<std::string, std::vector<int>> histograms = computeHistograms(valueRanges, nbBins, graphView, inputDataSet, useCuda); for (std::shared_ptr<Node> node : graphView->getNodes()) diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 76fe8f24b3492a9488f808d54c2ce6b491a9b79a..0e26313475bbbda23a56dcdda52d55a0a5af8204 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -66,20 +66,20 @@ bool checkArchitecture(std::shared_ptr<GraphView> graphView) return true; } -static void fillTensor(std::shared_ptr<Tensor> tensor, float value) +static void fillTensor(std::shared_ptr<Tensor> tensor, double value) { // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); // Fill the tensor for(std::size_t i = 0; i < tensor->size(); i++) castedTensor[i] = value; } -static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) +static void rescaleTensor(std::shared_ptr<Tensor> tensor, double scaling) { // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) @@ -89,20 +89,20 @@ static void rescaleTensor(std::shared_ptr<Tensor> tensor, float scaling) static void roundTensor(std::shared_ptr<Tensor> tensor) { // Get the tensor data pointer - float * castedTensor = static_cast <float *> (tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast <double *> (tensor->getImpl()->rawPtr()); // Rescale the tensor for(std::size_t i = 0; i < tensor->size(); i++) castedTensor[i] = std::nearbyint(castedTensor[i]);//Round } -static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) +static double getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) { // Get the tensor data pointer and edit it - float * castedTensor = static_cast<float*>(tensor->getImpl()->rawPtr()); + double * castedTensor = static_cast<double*>(tensor->getImpl()->rawPtr()); // Get the tensor absolute max value - float maxValue = 0.0f; + double maxValue = 0.0f; for(std::size_t i = 0; i < tensor->size(); ++i) { if(std::fabs(castedTensor[i]) > maxValue) { maxValue = std::fabs(castedTensor[i]); @@ -187,6 +187,7 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView) bool containsBatchNorm = false; std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); + for (std::shared_ptr<Node> node : nodeVector) if (node->type() == "BatchNorm") { @@ -200,6 +201,12 @@ void prepareNetwork(std::shared_ptr<GraphView> graphView) popSoftMax(graphView); } +// TODO : enhance this by modifying OperatorImpl in "core" ... +static DataType getDataType(std::shared_ptr<Node> node) +{ + auto op = std::static_pointer_cast<OperatorTensor>(node->getOperator()); + return op->getOutput(0)->dataType(); +} // XXX HERE : Branches containing only Seamless nodes should be considered as residual too !!! void insertResidualNodes(std::shared_ptr<GraphView> graphView) @@ -217,6 +224,7 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) { std::shared_ptr<Node> parentNode = node->getParent(i); bool parentIsForking = (parentNode->getChildren().size() > 1); + if (parentIsForking) { // temporary verbose ... @@ -224,8 +232,9 @@ void insertResidualNodes(std::shared_ptr<GraphView> graphView) Log::info(" ### inserting multiplicative node ..."); std::string residualNodeName = makeUniqueName(parentNode->name() + "_Res", graphView); - std::shared_ptr<Node> residualNode = Scaling(1.0,residualNodeName); - residualNode->getOperator()->setDataType(DataType::Float32); + std::shared_ptr<Node> residualNode = Scaling(1.0, residualNodeName); + + residualNode->getOperator()->setDataType(DataType::Float64); //getDataType(parentNode) residualNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, residualNode, i, 0, 0); @@ -255,7 +264,8 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) { std::string scalingNodeName = makeUniqueName(parentNode->name() + "_Scaling", graphView); std::shared_ptr<Node> scalingNode = Scaling(1.0, scalingNodeName); - scalingNode->getOperator()->setDataType(DataType::Float32); + + scalingNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) scalingNode->getOperator()->setBackend("cpu"); if (parentNode->getChildren().size() > 0) @@ -283,7 +293,8 @@ void insertScalingNodes(std::shared_ptr<GraphView> graphView) else { // Log::info(" last node reached ! "); - graphView->addChild(scalingNode); + parentNode->addChild(scalingNode, 0, 0); + graphView->add(scalingNode); } } } @@ -322,7 +333,7 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); - std::map<std::string, float> accumulatedRatios; + std::map<std::string, double> accumulatedRatios; for (std::shared_ptr<Node> node : nodeVector) { accumulatedRatios.insert(std::make_pair(node->name(), 1.0)); @@ -349,8 +360,8 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) { // Rescale the weight tensor std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - float scaling = getTensorAbsoluteMax(weightTensor); - float ratio = 1.0 / scaling; + double scaling = getTensorAbsoluteMax(weightTensor); + double ratio = 1.0 / scaling; rescaleTensor(weightTensor, ratio); // Accumulate the ratio @@ -378,10 +389,10 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) std::vector<std::shared_ptr<Node>> mergingNodes = node->getParents(); // Compute the max ratio ... - float maxRatio = 0; + double maxRatio = 0; for (std::shared_ptr<Node> mergingNode : mergingNodes) { - float merginNodeRatio = accumulatedRatios[mergingNode->name()]; + double merginNodeRatio = accumulatedRatios[mergingNode->name()]; if (merginNodeRatio > maxRatio) maxRatio = merginNodeRatio; } @@ -391,13 +402,14 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) // Rescale the previous scaling Nodes for (std::shared_ptr<Node> mergingNode : mergingNodes) { - float mergingNodeRatio = accumulatedRatios[mergingNode->name()]; - float rescaling = mergingNodeRatio / maxRatio; + double mergingNodeRatio = accumulatedRatios[mergingNode->name()]; + double rescaling = mergingNodeRatio / maxRatio; std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); - float scaling_factor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode,scaling_factor / rescaling); + double currScalingFactor = getScalingFactor(scalingNode); + updateScalingFactor(scalingNode, currScalingFactor / rescaling); + accumulatedRatios[mergingNode->name()] /= rescaling; // optional ... } } @@ -405,9 +417,9 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) } // XXX TODO : take care of the CUDA backend for this too !!! -std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> inputTensor, bool scalingNodesOnly) +std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> inputTensor, bool scalingNodesOnly) { - std::map<std::string, float> valueRanges; + std::map<std::string, double> valueRanges; SequentialScheduler scheduler(graphView); scheduler.resetScheduling(); @@ -425,7 +437,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, { std::shared_ptr<Operator> nodeOperator = node->getOperator(); std::shared_ptr<Tensor> valueTensor = std::static_pointer_cast<Tensor> (nodeOperator->getRawOutput(0)); - float range = getTensorAbsoluteMax(valueTensor); + double range = getTensorAbsoluteMax(valueTensor); // Associate the value to the scaling node ... valueRanges.insert(std::make_pair(node->name(), range)); @@ -435,9 +447,9 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, return valueRanges; } -std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda) +std::map<std::string, double> computeRanges(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, bool scalingNodesOnly, bool useCuda) { - std::map<std::string, float> valueRanges; + std::map<std::string, double> valueRanges; std::set<std::shared_ptr<Node>> nodeSet = graphView->getNodes(); // std::shared_ptr<Node> inputNode = getFirstNode(graphView); @@ -467,7 +479,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, // Gather the sample ranges ... - std::map<std::string, float> sampleRanges; + std::map<std::string, double> sampleRanges; for (std::shared_ptr<Node> node : nodeSet) { if ((scalingNodesOnly && (node->type() == "Scaling")) || (!scalingNodesOnly && (node->type() != "Producer"))) @@ -478,7 +490,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, if (useCuda) valueTensor->setBackend("cpu"); - float range = getTensorAbsoluteMax(valueTensor); + double range = getTensorAbsoluteMax(valueTensor); // Associate the value to the scaling node ... sampleRanges.insert(std::make_pair(node->name(), range)); @@ -510,7 +522,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, return valueRanges; } -void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, float> valueRanges) +void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges) { std::shared_ptr<Node> firstNode = getFirstNode(graphView); @@ -518,7 +530,7 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); - std::map<std::string, float> scalingFactors; + std::map<std::string, double> scalingFactors; for (std::shared_ptr<Node> node : nodeVector) scalingFactors.insert(std::make_pair(node->name(), 1.0)); @@ -549,13 +561,13 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st { // retrieve the previous scaling factor ... std::shared_ptr<Node> prevNode = node->getParent(0); - float prevScalingFactor = scalingFactors[prevNode->name()]; + double prevScalingFactor = scalingFactors[prevNode->name()]; // ValueRanges must contains all the scaling nodes !!! - float scalingFactor = valueRanges[node->name()]; + double scalingFactor = valueRanges[node->name()]; - float scaling_factor = getScalingFactor(node); - updateScalingFactor(node, (scaling_factor) / (scalingFactor / prevScalingFactor)); + double currScalingFactor = getScalingFactor(node); + updateScalingFactor(node, currScalingFactor / (scalingFactor / prevScalingFactor)); scalingFactors[node->name()] = scalingFactor; @@ -579,10 +591,10 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st std::vector<std::shared_ptr<Node>> mergingNodes = node->getParents(); // Compute the max scaling ... - float maxScaling = 0; + double maxScaling = 0; for (std::size_t i = 0; i < mergingNodes.size(); i++) { - float merginNodeScaling = scalingFactors[mergingNodes[i]->name()]; + double merginNodeScaling = scalingFactors[mergingNodes[i]->name()]; if (merginNodeScaling > maxScaling) { maxScaling = merginNodeScaling; } @@ -592,13 +604,14 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st for (std::shared_ptr<Node> mergingNode : mergingNodes) { - float mergingNodeScaling = scalingFactors[mergingNode->name()]; - float rescaling = mergingNodeScaling / maxScaling; + double mergingNodeScaling = scalingFactors[mergingNode->name()]; + double rescaling = mergingNodeScaling / maxScaling; std::shared_ptr<Node> scalingNode = getPreviousScalingNode(mergingNode); //Log::info(" SCALING NODE : {} {}", scalingNode->type(), scalingNode->name()); - float scaling_factor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode, scaling_factor * rescaling); + + double currScalingFactor = getScalingFactor(scalingNode); + updateScalingFactor(scalingNode, currScalingFactor * rescaling); } } } @@ -699,8 +712,12 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap { // Thoses nodes always have a single parent std::shared_ptr<Node> parent = node->getParent(0); - signMap[node->name()].first = signMap[parent->name()].second; - signMap[node->name()].second = signMap[node->name()].first; + if (parent) + { + signMap[node->name()].first = signMap[parent->name()].second; + signMap[node->name()].second = signMap[node->name()].first; + } + } } @@ -735,8 +752,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ AIDGE_THROW_OR_ABORT(std::runtime_error,"Signs optimization can not be applied if network is not fully quantized ..."); } - float signedMax = (1 << (nbBits - 1)) - 1; - float unsignedMax = (1 << nbBits) - 1; + double signedMax = (1 << (nbBits - 1)) - 1; + double unsignedMax = (1 << nbBits) - 1; std::map<std::string, std::pair<bool, bool>> signMap; @@ -771,7 +788,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ if (nodeHasBias(node)) { bool inputIsUnsigned = signMap[node->name()].first; - float rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax; + double rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax; std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); @@ -783,7 +800,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ // Compensate the rescaling using the next Scaling node - float rescaling = 1.0 / signedMax; + double rescaling = 1.0 / signedMax; bool inputIsUnsigned = signMap[node->name()].first; bool outputIsUnsigned = signMap[node->name()].second; @@ -792,13 +809,14 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ rescaling *= outputIsUnsigned ? unsignedMax : signedMax; std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - float scaling_factor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode, scaling_factor * rescaling); + + double currScalingFactor = getScalingFactor(scalingNode); + updateScalingFactor(scalingNode, currScalingFactor * rescaling); } if (isMerging(node)) { - float rescaling = 1.0; + double rescaling = 1.0; bool inputIsUnsigned = signMap[node->name()].first; bool outputIsUnsigned = signMap[node->name()].second; @@ -808,9 +826,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ std::shared_ptr<Node> scalingNode = *(node->getChildren().begin()); // Assert if scalingNode is a Scaling ... - - float scaling_factor = getScalingFactor(scalingNode); - updateScalingFactor(scalingNode,scaling_factor * rescaling); + double currScalingFactor = getScalingFactor(scalingNode); // XXX bad naming + updateScalingFactor(scalingNode, currScalingFactor * rescaling); } // Handle the Scaling Nodes ... @@ -819,18 +836,17 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ { if (!noQuant) { - //[!!] replacement of Scaling Node by Quantizer - float currentSF = getScalingFactor(node); + // Replace the Scaling Node by Quantizer - std::shared_ptr<Node> quantizerNode = Quantizer(currentSF, - (signedMax + 1), signedMax, node->name()); - quantizerNode->getOperator()->setDataType(DataType::Float32); + std::shared_ptr<Node> quantizerNode = Quantizer(getScalingFactor(node), -(signedMax + 1), signedMax, node->name()); + quantizerNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) quantizerNode->getOperator()->setBackend("cpu"); - graphView->replace({node}, {quantizerNode}); + graphView->replace({node}, {quantizerNode}); if (optimizeSigns) { - float rescaling = 1.0; + double rescaling = 1.0; bool inputIsUnsigned = signMap[node->name()].first; bool outputIsUnsigned = signMap[node->name()].second; @@ -838,8 +854,8 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ rescaling /= inputIsUnsigned ? unsignedMax : signedMax; rescaling *= outputIsUnsigned ? unsignedMax : signedMax; - float scalingFactor = getScalingFactor(quantizerNode); - updateScalingFactor(quantizerNode,scalingFactor * rescaling); + double currScalingFactor = getScalingFactor(quantizerNode); + updateScalingFactor(quantizerNode, currScalingFactor * rescaling); if(outputIsUnsigned) { @@ -854,7 +870,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits) { // XXX Use the signMap to increase the resolution when possible ... - float signedMax = (1 << (nbBits - 1)) - 1; + double signedMax = (1 << (nbBits - 1)) - 1; std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); @@ -874,7 +890,8 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u std::string mulNodeName = makeUniqueName(node->name() + "_Mul", graphView); std::shared_ptr<Node> mulNode = Mul(mulNodeName); - mulNode->getOperator()->setDataType(DataType::Float32); + + mulNode->getOperator()->setDataType(DataType::Float64); // getDataType(parentNode) mulNode->getOperator()->setBackend("cpu"); graphView->insertParent(node, mulNode, 0, 0, 0); @@ -882,10 +899,11 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u // create and insert the producer node std::shared_ptr<Tensor> inputTensor = std::static_pointer_cast<Tensor> (mulNode->getOperator()->getRawInput(0)); - std::shared_ptr<Tensor> coeffTensor = std::make_shared<Tensor>(); - coeffTensor->setDataType(DataType::Float32); - coeffTensor->setBackend("cpu"); + + coeffTensor->setDataType(DataType::Float64); // getDataType(parentNode) + coeffTensor->setBackend("cpu"); + coeffTensor->resize(inputTensor->dims()); fillTensor(coeffTensor, 1); @@ -896,8 +914,9 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u // rescale the coeffs and edit scaling factor fillTensor(coeffTensor, signedMax); - float sf = getScalingFactor(node); - updateScalingFactor(node,sf/signedMax); + + double currScalingFactor = getScalingFactor(node); // XXX bad naming ! + updateScalingFactor(node, currScalingFactor / signedMax); // TODO : double check this !!! //std::cout << getTensorAbsoluteMax(coeffTensor) << std::endl; @@ -906,26 +925,24 @@ static void insertCompensationNodes(std::shared_ptr<GraphView> graphView, std::u } } -void - -performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant) +void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool noQuant) { std::vector<std::shared_ptr<Node>> nodeVector = retrieveNodeVector(graphView); for (std::shared_ptr<Node> node : nodeVector) { - //Use A meatoperator of type Scaling of MulCompensation instead + // Use A meatoperator of type Scaling of MulCompensation instead if (isAffine(node) || (node->type() == "Mul")) { std::shared_ptr<Node> scalingNode = (*node->getChildren().begin()); - float base = getScalingFactor(scalingNode); + double base = getScalingFactor(scalingNode); - float approx = std::pow(2, std::ceil(std::log2(base))); + double approx = std::pow(2, std::ceil(std::log2(base))); updateScalingFactor(scalingNode,approx); - float ratio = base / approx; + double ratio = base / approx; std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); rescaleTensor(weightTensor, ratio); @@ -947,19 +964,49 @@ static void printScalingFactors(std::shared_ptr<GraphView> graphView) { Log::info(" === SCALING FACTORS === "); for (auto node : retrieveNodeVector(graphView)) - if (node->type() == "Scaling") + if (node->type() == "Scaling" || node->type() == "Quantizer") { - float factor = getScalingFactor(node); - Log::info(" {:.6f} ({})", factor, node->name()); + double scalingFactor = getScalingFactor(node); + Log::info(" {:.6f} ({})", scalingFactor, node->name()); } } +static void setupDataType(std::shared_ptr<GraphView> graphView, std::vector<std::shared_ptr<Tensor>> inputDataSet, DataType dataType) +{ + graphView->setDataType(dataType); + + for (auto inputNode : graphView->inputNodes()) { + auto op = std::static_pointer_cast<OperatorTensor>(inputNode->getOperator()); + auto inputTensor = op->getInput(0); + if (inputTensor) + inputTensor->setDataType(dataType); + } + + for (auto tensor : inputDataSet) + tensor->setDataType(dataType); +} + +static void printRanges(std::shared_ptr<GraphView> graphView, std::map<std::string, double> valueRanges) +{ + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + scheduler.generateScheduling(); + + auto scheduling = scheduler.getStaticScheduling(); + for (auto node : scheduling) + if (node->type() == "Scaling") + fmt::println("{} range = {}", node->name(), valueRanges[node->name()]); +} + void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, std::vector<std::shared_ptr<Tensor>> inputDataSet, Clipping clippingMode, bool noQuant, bool optimizeSigns, bool singleShift, bool useCuda, bool verbose) { Log::info(" === QUANT PTQ 0.2.21 === "); graphView->setBackend("cpu"); + DataType initialDataType = (inputDataSet[0])->dataType(); + setupDataType(graphView, inputDataSet, DataType::Float64); + if (!checkArchitecture(graphView)) return; @@ -975,11 +1022,17 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, normalizeParameters(graphView); Log::info(" Computing the value ranges ..."); - std::map<std::string, float> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda); + std::map<std::string, double> valueRanges = computeRanges(graphView, inputDataSet, true, useCuda); + + //std::cout << " === RANGES (BEFORE ADJUST) ===" << std::endl; + //printRanges(graphView, valueRanges); Log::info(" Optimizing the clipping values ..."); valueRanges = adjustRanges(clippingMode, valueRanges, nbBits, graphView, inputDataSet, useCuda, verbose); + //std::cout << " === RANGES (AFTER ADJUST) ===" << std::endl; + //printRanges(graphView, valueRanges); + Log::info(" Normalizing the activations ..."); normalizeActivations(graphView, valueRanges); @@ -992,32 +1045,39 @@ void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::uint8_t nbBits, insertCompensationNodes(graphView, nbBits); Log::info(" Performing the Single-Shift approximation ..."); - performSingleShiftApproximation(graphView,noQuant); + performSingleShiftApproximation(graphView, noQuant); } - + if (verbose) printScalingFactors(graphView); - Log::info(" Resetting the scheduler ..."); - SequentialScheduler scheduler(graphView); - scheduler.resetScheduling(); + //std::cout << " === SCALINGS (BEFORE CAST) ===" << std::endl; + //printScalingFactors(graphView); + setupDataType(graphView, inputDataSet, initialDataType); if (useCuda) graphView->setBackend("cuda"); + //std::cout << " === SCALINGS (AFTER CAST) ===" << std::endl; + //printScalingFactors(graphView); + + Log::info(" Reseting the scheduler ..."); + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + Log::info(" Network is quantized !"); } -std::map<std::string, float> getWeightRanges(std::shared_ptr<GraphView> graphView) +std::map<std::string, double> getWeightRanges(std::shared_ptr<GraphView> graphView) { - std::map<std::string, float> weightRanges; + std::map<std::string, double> weightRanges; for (std::shared_ptr<Node> node : graphView->getNodes()) { if (isAffine(node)) { std::shared_ptr<Tensor> weightTensor = getWeightTensor(node); - float range = getTensorAbsoluteMax(weightTensor); + double range = getTensorAbsoluteMax(weightTensor); weightRanges.insert(std::make_pair(node->name(), range)); } } @@ -1038,7 +1098,7 @@ void clearBiases(std::shared_ptr<GraphView> graphView) void devPTQ(std::shared_ptr<GraphView> graphView) { for (std::shared_ptr<Node> node : graphView->getNodes()) - std::cout << " UUU : " << node->name() << std::endl; + fmt::println(" UUU : {}", node->name()); } } diff --git a/src/PTQ/PTQMetaOps.cpp b/src/PTQ/PTQMetaOps.cpp index 89590cbe6aeaf036a50902d5b63c1b46044d2c7f..77018c23aee2f1ef6f430389393fd35e97baa0f6 100644 --- a/src/PTQ/PTQMetaOps.cpp +++ b/src/PTQ/PTQMetaOps.cpp @@ -11,8 +11,8 @@ #include "aidge/quantization/PTQ/PTQMetaOps.hpp" -#include <array> #include <memory> +#include <string> #include <utility> //Operator @@ -28,100 +28,125 @@ #include "aidge/utils/Types.h" #include "aidge/operator/Identity.hpp" #include "aidge/data/Tensor.hpp" -std::shared_ptr<Aidge::Node> Quantizer(float scalingFactor, float clip_min,float clip_max,const std::string& name) +#include "aidge/operator/OperatorTensor.hpp" +#include "aidge/utils/Log.hpp" + + +namespace Aidge +{ + +std::shared_ptr<Node> Quantizer(double scalingFactor, double clipMin, double clipMax, const std::string& name) { - std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1>{scalingFactor}); - std::shared_ptr<Aidge::Node> mul_node = Aidge::Mul((!name.empty()) ? name + "_MulQuant" : ""); - - std::shared_ptr<Aidge::Node> producer_scaling_factor = addProducer(mul_node,1,{1},"ScalingFactor"); - producer_scaling_factor ->getOperator()->setOutput(0,ScalingFactorTensorAttached); - - std::shared_ptr<Aidge::Node> clip_node = Aidge::Clip((!name.empty()) ? name + "_ClipQuant" : "",clip_min,clip_max); - - std::shared_ptr<Aidge::GraphView> graph = Aidge::Sequential({ - mul_node, - Aidge::Round((!name.empty()) ? name + "_RoundQuant" : ""), - clip_node}); - - std::shared_ptr<Aidge::GraphView> connectedGV = getConnectedGraphView(mul_node); - std::shared_ptr<Aidge::Node> metaopNode = MetaOperator("Quantizer",connectedGV,{},name); - return metaopNode; + // create the nodes + + std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_MulQuant" : ""); + std::shared_ptr<Node> roundNode = Round((!name.empty()) ? name + "_RoundQuant" : ""); + std::shared_ptr<Node> clipNode = Clip((!name.empty()) ? name + "_ClipQuant" : "", clipMin, clipMax); + + // connect the scaling factor producer + + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); + std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); + + // create the metaop graph + + std::shared_ptr<GraphView> graphView = Sequential({mulNode, roundNode, clipNode}); + std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); // XXX why not use the graphView ??? + + // return the metaop + + std::shared_ptr<Node> metaopNode = MetaOperator("Quantizer", connectedGraphView, {}, name); // XXX alternative prototype + + return metaopNode; } -std::shared_ptr<Aidge::Node> Scaling(float scalingFactor,const std::string& name) +std::shared_ptr<Node> Scaling(double scalingFactor, const std::string& name) { - std::shared_ptr<Aidge::Tensor> ScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1>{scalingFactor}); - - std::shared_ptr<Aidge::Node> mul_node = Aidge::Mul((!name.empty()) ? name + "_Scaling" : ""); - - std::shared_ptr<Aidge::Node> producer_scaling_factor = addProducer(mul_node,1,{1},"ScalingFactor"); - producer_scaling_factor->getOperator()->setOutput(0, ScalingFactorTensorAttached); - std::shared_ptr<Aidge::GraphView> graph = Aidge::Sequential({mul_node}); - std::shared_ptr<Aidge::GraphView> connectedGV = getConnectedGraphView(mul_node); - Aidge::NodePtr metaopNode = MetaOperator("Scaling",connectedGV,{},name); + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); + + std::shared_ptr<Node> mulNode = Mul((!name.empty()) ? name + "_Scaling" : ""); + + std::shared_ptr<Node> scalingFactorProducer = addProducer<1>(mulNode, 1, {1}, "ScalingFactor"); + scalingFactorProducer->getOperator()->setOutput(0, scalingFactorTensor); + + std::shared_ptr<GraphView> graphView = Sequential({mulNode}); + std::shared_ptr<GraphView> connectedGraphView = getConnectedGraphView(mulNode); + + NodePtr metaopNode = MetaOperator("Scaling", connectedGraphView, {}, name); + return metaopNode; } -bool updateScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode, float newScalingFactor) +static std::shared_ptr<Node> getSubNode(std::shared_ptr<GraphView> graphView, std::string nodeType) { - if(MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") - { - AIDGE_ASSERT("Cannot use updatePTQMetaOpsScalingFactor on Node of type {}", MetaOpNode->type()); - } - std::shared_ptr<Aidge::Tensor> newScalingFactorTensorAttached = std::make_shared<Aidge::Tensor>(Aidge::Array1D<float, 1>{newScalingFactor}); - std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(MetaOpNode->getOperator()); - std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Nodes inside PTQ Metaop Node - for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List) - { - if(node->type() == "Mul") - { - node->input(1).first->getOperator()->setOutput(0, newScalingFactorTensorAttached); - return true; - } - } - AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type()); - return false; + std::shared_ptr<Node> mulNode = nullptr; + for(std::shared_ptr<Node> node : graphView->getNodes()) + if (node->type() == nodeType) + mulNode = node; + + return mulNode; } -float getScalingFactor(std::shared_ptr<Aidge::Node> MetaOpNode) + +void updateScalingFactor(std::shared_ptr<Node> metaOpNode, double scalingFactor) { - if(MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") - { - AIDGE_ASSERT("Cannot use getPTQMetaOpsScalingFactor on Node of type {}",MetaOpNode->type()); - return -1; + if(metaOpNode->type() != "Scaling" && metaOpNode->type() != "Quantizer") + Log::warn(" Cannot update the scaling factor on Node of type {}", metaOpNode->type()); + + std::shared_ptr<Tensor> scalingFactorTensor = std::make_shared<Tensor>(Array1D<double, 1> {scalingFactor}); + + std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(metaOpNode->getOperator()); + + std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); + + if (!mulNode) + Log::warn(" Invalid PTQ MetaOperator, no Mul node found inside ! "); + + mulNode->input(1).first->getOperator()->setOutput(0, scalingFactorTensor); +} + +double getScalingFactor(std::shared_ptr<Node> MetaOpNode) +{ + if (MetaOpNode->type() != "Scaling" && MetaOpNode->type() != "Quantizer") { + Log::warn(" Cannot get the scaling factor on Node of type {}", MetaOpNode->type()); + return 0; } - std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(MetaOpNode->getOperator()); - std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Nodes inside PTQ Metaop Node - for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List) - { - if(node->type() == "Mul") - { - std::shared_ptr<Aidge::Data> MulInput1Data = node->input(1).first->getOperator()->getRawOutput(0); - void* RawInputScalingFactor = std::static_pointer_cast<Aidge::Tensor>(MulInput1Data)->getImpl()->rawPtr(); - return (*(static_cast<float*>(RawInputScalingFactor))); - } + + std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op>(MetaOpNode->getOperator()); + + std::shared_ptr<Node> mulNode = getSubNode(metaOp->getMicroGraph(), "Mul"); + + if (!mulNode) { + Log::warn(" Invalid PTQ MetaOperator, no Mul found inside node of type {}", MetaOpNode->type()); + return 0; } - AIDGE_ASSERT("Invalid PTQ MetaOperator, no Mul node found inside node of type {}",MetaOpNode->type()); - return -1; + + auto scalingFactorTensor = std::static_pointer_cast<OperatorTensor>(mulNode->getOperator())->getInput(1); + std::shared_ptr<Tensor> fallback; + const Tensor& localTensor = scalingFactorTensor->refCastFrom(fallback, DataType::Float64, "cpu"); + + return localTensor.get<double>(0); } -bool setClipRange(std::shared_ptr<Aidge::Node> QuantizerNode,float min, float max) + + +void setClipRange(std::shared_ptr<Node> quantizerNode, double min, double max) { - if(QuantizerNode->type() != "Quantizer") - { - AIDGE_ASSERT("Cannot use setQuantizerClipRange on Node of type {}",QuantizerNode->type()); - return false; + if (quantizerNode->type() != "Quantizer") { + Log::warn(" Cannot set the clipping range on Node of type {}", quantizerNode->type()); + return; } - std::shared_ptr<Aidge::MetaOperator_Op> MetaOp = std::static_pointer_cast<Aidge::MetaOperator_Op>(QuantizerNode->getOperator()); - std::set<Aidge::NodePtr> Meta_Op_Node_List = MetaOp->getMicroGraph()->getNodes(); //List of Node inside - for(std::shared_ptr<Aidge::Node> node : Meta_Op_Node_List) - { - if(node->type() == "Clip") - { - std::shared_ptr<Aidge::Clip_Op> Clip_Node_Op = std::static_pointer_cast<Aidge::Clip_Op>(node->getOperator()); - Clip_Node_Op->max() = max; - Clip_Node_Op->min() = min; - return true; - } + + std::shared_ptr<MetaOperator_Op> metaOp = std::static_pointer_cast<MetaOperator_Op> (quantizerNode->getOperator()); + + std::shared_ptr<Node> clipNode = getSubNode(metaOp->getMicroGraph(), "Clip"); + + if (!clipNode) { + Log::warn(" Invalid PTQ MetaOperator, no Clip found inside node of type {}", quantizerNode->type()); + return; } - AIDGE_ASSERT("Invalid MetaOperator Quantizer, no clip node found inside Node of type {}",QuantizerNode->type()); - return false; + + std::shared_ptr<Clip_Op> clipOp = std::static_pointer_cast<Clip_Op>(clipNode->getOperator()); + clipOp->max() = max; + clipOp->min() = min; +} } \ No newline at end of file diff --git a/src/QAT/QAT_FixedQ.cpp b/src/QAT/QAT_FixedQ.cpp index d22074f15982f9fa0e92cfc4425af32e584db8cd..9160b4ae6add5ae0347e008962956dc90c3a36fd 100644 --- a/src/QAT/QAT_FixedQ.cpp +++ b/src/QAT/QAT_FixedQ.cpp @@ -91,7 +91,7 @@ static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator()); float inputStd = getTensorStd(op->getInput(0)); inputStats.insert(std::make_pair(node->name(), inputStd)); - std::cout << node->name() << " -> " << inputStd << std::endl; + fmt::println("{} -> {}", node->name(), inputStd); } } @@ -108,7 +108,7 @@ static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator()); float paramStd = getTensorStd(op->getInput(1)); paramStats.insert(std::make_pair(node->name(), paramStd)); - std::cout << node->name() << " -> " << paramStd << std::endl; + fmt::println("{} -> {}", node->name(), paramStd); } } @@ -156,7 +156,7 @@ void QuantFixedQ::devQAT(std::shared_ptr<GraphView> graphView) scheduler.generateScheduling(); auto s = scheduler.getStaticScheduling(); for (std::shared_ptr<Node> node : s) - std::cout << " name : " << node->name() << std::endl; + fmt::println(" name : {}", node->name()); } } \ No newline at end of file diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp index 38c818214361d007218b1300b73d60b3124e2b7d..9b51e846df498a9303b7373ae1c86d4b007a96f0 100644 --- a/src/QAT/QAT_LSQ.cpp +++ b/src/QAT/QAT_LSQ.cpp @@ -125,7 +125,7 @@ static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); float inputAbsMean = getTensorAbsMean(op->getInput(0)); inputStats.insert(std::make_pair(node->name(), inputAbsMean)); - std::cout << node->name() << " -> " << inputAbsMean << std::endl; + fmt::println("{} -> {}", node->name(), inputAbsMean); } } @@ -148,7 +148,7 @@ static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); float paramAbsMean = getTensorAbsMean(op->getInput(1)); paramStats.insert(std::make_pair(node->name(), paramAbsMean)); - std::cout << node->name() << " -> " << paramAbsMean << std::endl; + fmt::println("{} -> {}", node->name(), paramAbsMean); } } diff --git a/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu b/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu index 0d5490946af3a4ab172bafc13d9af8c191695b84..96065e41376a1facee8a05260f33a1ce68ceb92a 100644 --- a/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu +++ b/src/backend/cuda/operator/LSQImpl_CUDA_kernels.cu @@ -84,10 +84,11 @@ __global__ void LSQImpl_cuda_backward_kernel_(const std::size_t inputLength, const GI fullPrecScale = input[i] / stepSize[0]; /*****************************Data/Weights Gradient Computation************************/ - // STE method is simply apply: - grad_input[i] = grad_output[i]*( (fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) : - (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) : - GI(1.0)); + // STE method is simply applied : + // (we accumulate the gradient instead of replacing it) + grad_input[i] += grad_output[i] * ((fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) : + (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) : + GI(1.0)); /*****************************Step Size Gradient Computation*************************/ GI qData = fullPrecScale; @@ -142,7 +143,9 @@ void Aidge::LSQImpl_cuda_backward_kernel(const std::size_t inputLength, // for simplicity and foolproof-ness thrust::device_ptr<GI> grad_workspacePtr(grad_workspace); thrust::device_ptr<GI> grad_stepSizePtr(grad_stepSize); - grad_stepSizePtr[0] = thrust::reduce(grad_workspacePtr, grad_workspacePtr + inputLength, GI(0.0)); + + // We accumulate the stepSize gradient instead of replacing it + grad_stepSizePtr[0] += thrust::reduce(grad_workspacePtr, grad_workspacePtr + inputLength, GI(0.0)); //printf(" step grad = %f \n", (float) grad_stepSizePtr[0]); diff --git a/src/operator/FixedQ.cpp b/src/operator/FixedQ.cpp index 879174032bfcf5b2958b0950d0ed7410ba83331c..9828ce98f4918b3d2336c57fe018c9129804cf01 100644 --- a/src/operator/FixedQ.cpp +++ b/src/operator/FixedQ.cpp @@ -20,6 +20,17 @@ const std::string Aidge::FixedQ_Op::Type = "FixedQ"; +Aidge::FixedQ_Op::FixedQ_Op(const Aidge::FixedQ_Op& op) + : OperatorTensor(op), + mAttributes(op.mAttributes) +{ + if (op.mImpl){ + SET_IMPL_MACRO(FixedQ_Op, *this, op.backend()); + }else{ + mImpl = nullptr; + } +} + std::set<std::string> Aidge::FixedQ_Op::getAvailableBackends() const { return Registrar<FixedQ_Op>::getKeys(); } @@ -28,3 +39,12 @@ void Aidge::FixedQ_Op::setBackend(const std::string& name, DeviceIdx_t device) { SET_IMPL_MACRO(FixedQ_Op, *this, name); mOutputs[0]->setBackend(name, device); } + +//////////////////////////////////////////////////////////////////////////////// + +std::shared_ptr<Aidge::Node> Aidge::FixedQ(std::size_t nbBits, + float span, + bool isOutputUnsigned, + const std::string& name) { + return std::make_shared<Node>(std::make_shared<FixedQ_Op>(nbBits, span, isOutputUnsigned), name); +} \ No newline at end of file diff --git a/src/operator/SAT/DoReFa.cpp b/src/operator/SAT/DoReFa.cpp index b6124bad0e5f04c8e22e2d16c48dd4fe5de7945a..426e330e7f8426d256ca76a843548a91a62b036a 100644 --- a/src/operator/SAT/DoReFa.cpp +++ b/src/operator/SAT/DoReFa.cpp @@ -17,13 +17,38 @@ #include "aidge/data/Tensor.hpp" #include "aidge/utils/Types.h" -const std::string Aidge::DoReFa_Op::Type = "DoReFa"; +namespace Aidge { -std::set<std::string> Aidge::DoReFa_Op::getAvailableBackends() const { +const std::string DoReFa_Op::Type = "DoReFa"; + +DoReFa_Op::DoReFa_Op(const DoReFa_Op& op) + : OperatorTensor(op), + mAttributes(op.mAttributes) +{ + if (op.mImpl) { + SET_IMPL_MACRO(DoReFa_Op, *this, op.backend()); + } else { + mImpl = nullptr; + } +} + +std::shared_ptr<Operator> DoReFa_Op::clone() const { + return std::make_shared<DoReFa_Op>(*this); +} + +std::set<std::string> DoReFa_Op::getAvailableBackends() const { return Registrar<DoReFa_Op>::getKeys(); } -void Aidge::DoReFa_Op::setBackend(const std::string& name, DeviceIdx_t device) { +void DoReFa_Op::setBackend(const std::string& name, DeviceIdx_t device) { SET_IMPL_MACRO(DoReFa_Op, *this, name); mOutputs[0]->setBackend(name, device); -} \ No newline at end of file +} + +//////////////////////////////////////////////////////////////////////////////// + +std::shared_ptr<Node> DoReFa(size_t range, DoReFaMode mode, const std::string& name) { + return std::make_shared<Node>(std::make_shared<DoReFa_Op>(range, mode), name); +} + +} // namespace Aidge \ No newline at end of file diff --git a/src/operator/SAT/TanhClamp.cpp b/src/operator/SAT/TanhClamp.cpp index 2b8d63d7136c45589cba92018d2ecafe17d54e4e..a03fc7d3c602c3ff86551da19defe083a5cc6e3a 100644 --- a/src/operator/SAT/TanhClamp.cpp +++ b/src/operator/SAT/TanhClamp.cpp @@ -20,6 +20,20 @@ const std::string Aidge::TanhClamp_Op::Type = "TanhClamp"; +Aidge::TanhClamp_Op::TanhClamp_Op(const Aidge::TanhClamp_Op& op) + : OperatorTensor(op) +{ + if (op.mImpl) { + SET_IMPL_MACRO(TanhClamp_Op, *this, op.backend()); + } else { + mImpl = nullptr; + } +} + +std::shared_ptr<Aidge::Operator> Aidge::TanhClamp_Op::clone() const { + return std::make_shared<TanhClamp_Op>(*this); +} + bool Aidge::TanhClamp_Op::forwardDims(bool /*allowDataDependency*/) { if (inputsAssociated()) { @@ -40,5 +54,11 @@ void Aidge::TanhClamp_Op::setBackend(const std::string& name, DeviceIdx_t device mOutputs[0]->setBackend(name, device); // Scale output is always on CPU for now - mOutputs[1]->setBackend("cpu"); // XXX why ? + mOutputs[1]->setBackend("cpu"); // XXX why ? +} + +//////////////////////////////////////////////////////////////////////////////// + +std::shared_ptr<Aidge::Node> Aidge::TanhClamp(const std::string& name) { + return std::make_shared<Node>(std::make_shared<TanhClamp_Op>(), name); } \ No newline at end of file diff --git a/src/recipes/QuantRecipes.cpp b/src/recipes/QuantRecipes.cpp index 562948ca6d145278b1b6e564af5a13f3271ca3b4..6e1dcdb1b64c0a1e94c74ce66cb71f1a458bca35 100644 --- a/src/recipes/QuantRecipes.cpp +++ b/src/recipes/QuantRecipes.cpp @@ -59,7 +59,7 @@ void insertBatchNormNodes(std::shared_ptr<GraphView> graphView) { std::shared_ptr<Conv_Op<2>> convOperator = std::static_pointer_cast<Conv_Op<2>> (parentNode->getOperator()); int nb_channels = convOperator->getInput(1)->dims()[0]; - std::cout << " NB CHANNELS = " << nb_channels << std::endl; // TODO : remove this ... + fmt::println(" NB CHANNELS = {}", nb_channels); // TODO : remove this ... std::string batchnormNodeName = makeUniqueName(parentNode->name() + "_BN", graphView); std::shared_ptr<Node> batchnormNode = BatchNorm<2>(nb_channels, 1e-5, 0.1, false, batchnormNodeName); diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt index 9d9f81516b0cd2611484ee9e3e06e838833200db..cfdbf0ad393cca9daa300cddd807a7667523d2a0 100644 --- a/unit_tests/CMakeLists.txt +++ b/unit_tests/CMakeLists.txt @@ -1,12 +1,23 @@ -Include(FetchContent) +# Catch2 configuration +set(CATCH2_MIN_VERSION 3.3.0) -FetchContent_Declare( - Catch2 - GIT_REPOSITORY https://github.com/catchorg/Catch2.git - GIT_TAG v3.0.1 # or a later release -) +# Try to find system installed Catch2 +find_package(Catch2 ${CATCH2_MIN_VERSION} QUIET) -FetchContent_MakeAvailable(Catch2) +if(NOT Catch2_FOUND) + message(STATUS "Catch2 not found in system, retrieving from git") + Include(FetchContent) + + FetchContent_Declare( + Catch2 + GIT_REPOSITORY https://github.com/catchorg/Catch2.git + GIT_TAG devel # or a later release + ) + FetchContent_MakeAvailable(Catch2) + message(STATUS "Fetched Catch2 version ${Catch2_VERSION}") +else() + message(STATUS "Using system Catch2 version ${Catch2_VERSION}") +endif() file(GLOB_RECURSE src_files "*.cpp") diff --git a/unit_tests/Test_QuantPTQ.cpp b/unit_tests/Test_QuantPTQ.cpp index 36377e84e27e9de9cc28de323cfba5a44cb80904..e7211ce4092f789c8c6263671ad236b97934ffbb 100644 --- a/unit_tests/Test_QuantPTQ.cpp +++ b/unit_tests/Test_QuantPTQ.cpp @@ -1,21 +1,19 @@ -// #include <catch2/catch_test_macros.hpp> - -// #include "aidge/data/Tensor.hpp" -// #include "aidge/backend/TensorImpl.hpp" -// #include "aidge/backend/cpu.hpp" -// #include "aidge/operator/Conv.hpp" -// #include "aidge/operator/Scaling.hpp" -// #include "aidge/operator/GenericOperator.hpp" -// #include "aidge/graph/GraphView.hpp" -// #include "aidge/QuantPTQ.hpp" -// #include "aidge/scheduler/Scheduler.hpp" -// #include "aidge/hook/OutputRange.hpp" -// #include "aidge/operator/Producer.hpp" - -// #include <unordered_map> - -// using namespace Aidge; -// //using namespace Aidge_HELPER; +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +TEST_CASE("[tmp] basic test") { + REQUIRE(true == true); +} // TEST_CASE("[aidge_module_template/ref_cpp/quantization] PTQ : Quantize Graph") { @@ -79,7 +77,7 @@ // std::shared_ptr<Tensor> myInput = // std::make_shared<Tensor>( -// Array4D<float,2,3,5,5> { +// Array4D<float,2,3,5,5> { // { // { // {{ 0., 1., 2., 3., 4.}, @@ -124,7 +122,7 @@ // ); // auto dataProvider = Producer(myInput, "dataProvider"); -// Tensor myOutput = Array4D<float,2,4,3,3> { +// Tensor myOutput = Array4D<float,2,4,3,3> { // { // { // {{ 15226., 15577., 15928.}, @@ -188,9 +186,9 @@ // "%f" // "\n", // max_output_conv); - + // } - + // float max_output_relu = std::static_pointer_cast<OutputRange>(myReLU1->getOperator()->getHook("output_range"))->getOutput(0); // if(verbose) { // printf("[hook] OutputRange(forward) :: ReLU output max: " @@ -222,10 +220,10 @@ // "\n", // (nodePtr->type()).c_str(), (nodePtr->name()).c_str()); // } -// } - +// } + // SequentialScheduler scheduler_v2(g1); - + // scheduler_v2.forward(); // scheduler_v2.generateScheduling(false); // std::vector<std::shared_ptr<Node>> ordered_graph_view_v2 = scheduler_v2.getStaticScheduling(); @@ -242,7 +240,7 @@ // "\n", // (nodePtr->type()).c_str(), (nodePtr->name()).c_str()); // } -// } +// } // } \ No newline at end of file diff --git a/version.txt b/version.txt index 69367fd08f3ce302151ebc9779193d517dfa32de..9e11b32fcaa96816319e5d0dcff9fb2873f04061 100644 --- a/version.txt +++ b/version.txt @@ -1,2 +1 @@ -0.3.0 - +0.3.1