diff --git a/.gitignore b/.gitignore index 9fbfccca6dfda997d8a0dbfc4b373590feeecad8..18f1583283b848051e2915895eae6a62751a9d0b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# common +.cache + # C++ Build build*/ install*/ @@ -6,10 +9,13 @@ install*/ .vscode # Python +aidge_quantization/_version.py *.so __pycache__ *.pyc *.egg-info +aidge_quantization/_version.py +wheelhouse/* # Mermaid *.mmd @@ -18,4 +24,4 @@ __pycache__ xml*/ # ONNX -*.onnx \ No newline at end of file +*.onnx diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..973721488ec36598930899770a6885aeef0ae8c0 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,76 @@ +############################################################################### +# Aidge Continuous Integration and Deployment # +# # +############################################################################### + +stages: + - static_analysis + - build + - test + - coverage + - release + - deploy + +include: + - project: 'eclipse/aidge/gitlab_shared_files' + ref: 'main' + file: + #Â choose which jobs to run by including the corresponding files. + - '.gitlab/ci/ubuntu_cpp.gitlab-ci.yml' + + - '.gitlab/ci/ubuntu_python.gitlab-ci.yml' + - '.gitlab/ci/release/cibuildwheel_ubuntu.gitlab-ci.yml' + + - '.gitlab/ci/windows_cpp.gitlab-ci.yml' + + - '.gitlab/ci/windows_python.gitlab-ci.yml' + - '.gitlab/ci/release/cibuildwheel_windows.gitlab-ci.yml' + + +test:ubuntu_python: + before_script: + - !reference [.retrieve_deps:apt, script] + - source venv/bin/activate + - python -m pip install numpy unittest-xml-reporting + - python -m pip list + - DEPS_NAMES=("aidge_onnx") + - DEPENDENCY_JOB="build:ubuntu_python" + - !reference [.ubuntu:download:artifacts, script] + +coverage:ubuntu_python: + before_script: + - !reference [.retrieve_deps:apt, script] + - source venv/bin/activate + - python -m pip install numpy coverage + - DEPS_NAMES=("aidge_onnx") + - DEPENDENCY_JOB="build:ubuntu_python" + - !reference [.ubuntu:download:artifacts, script] + +release:pip:ubuntu: + before_script: + - !reference [.retrieve_deps:apt, script] + - DEPS_NAMES=("aidge_core" "aidge_backend_cpu" "aidge_onnx") + - DEPENDENCY_JOB="build:ubuntu_python" + - !reference [.ubuntu:download:repositories, script] # located in common.gitlab-ci.yml + - curl -sSL https://get.docker.com/ | sh + + +release:pip:windows: + before_script: + - (Get-WmiObject -Class Win32_OperatingSystem).OSArchitecture + - (Get-WmiObject -Class Win32_OperatingSystem).Caption + - $PSVersionTable.PSVersion + # Install Chocolatey + - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) + # Install dependencies + - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y + - choco install git -Y + - choco install python --version=$python_version -Y + # Update PATH + - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") + - python -m pip install cibuildwheel==2.17.0 + # Download repositories + - $DEPS_NAMES = "aidge_core","aidge_backend_cpu","aidge_onnx" + - $DEPENDENCY_JOB="build:windows_python" + - !reference [.windows:download:repositories, script] + diff --git a/.gitlab/ci/cibuildwheel_build_deps_before_build_wheel.ps1 b/.gitlab/ci/cibuildwheel_build_deps_before_build_wheel.ps1 new file mode 100644 index 0000000000000000000000000000000000000000..c2715ea5550432838d3cc8692e97204b278d2c85 --- /dev/null +++ b/.gitlab/ci/cibuildwheel_build_deps_before_build_wheel.ps1 @@ -0,0 +1,23 @@ +$ErrorActionPreference = "Stop" + +# Retrieve and clean the dependencies string from the environment variable +$AIDGE_DEPENDENCIES = $env:AIDGE_DEPENDENCIES -split ' ' +Write-Host "Aidge dependencies : $AIDGE_DEPENDENCIES" +if ( $($AIDGE_DEPENDENCIES.Length) -eq 0) { + Write-Host "- No dependencies provided for current repsitory" + New-Item -ItemType Directory -Force -Path ".\build" | Out-Null + Remove-Item -Path ".\build\*" -Recurse -Force + } else { + Write-Host "Retrieving given dependencies to build current package : $AIDGE_DEPENDENCIES" + foreach ($dep in $($AIDGE_DEPENDENCIES -split " ")) { + Write-Host "Retrieving : $dep" + $curr_loc=$(Get-Location) + Set-Location ../$dep + Get-Location + Get-ChildItem . + New-Item -Path ".\build" -ItemType Directory -Force | Out-Null + Get-ChildItem -Path ".\build" -File | Remove-Item -Force + python -m pip install . -v + Set-Location $curr_loc + } +} diff --git a/.gitlab/ci/cibuildwheel_build_deps_before_build_wheel.sh b/.gitlab/ci/cibuildwheel_build_deps_before_build_wheel.sh new file mode 100755 index 0000000000000000000000000000000000000000..4f74488ae41714a4ce03ba7514bf93842768c5ae --- /dev/null +++ b/.gitlab/ci/cibuildwheel_build_deps_before_build_wheel.sh @@ -0,0 +1,40 @@ +#!/bin/bash +set -e +if [[ "$1" == "" ]]; then + echo "build aidge deps in cibuildwheel container before building wheel." + echo "search path defines where the dependencies will be searched." + echo "Hint : In wheel containers, files are mounted on /host by default." + echo "\nusage : ./cibuildwheel_build_deps_before_build_wheel.sh $search_path" +fi +set -x +if [[ $AIDGE_DEPENDENCIES == "" ]]; then # case for aidge_ core + mkdir -p build # creating build if its not already there to hold the build of cpp files + rm -rf build/* # build from scratch +else + for repo in $AIDGE_DEPENDENCIES ; do # case for other projects + search_path=$1 + REPO_PATH=$(find $search_path ! -writable -prune -o -type d \ + -name "$repo" \ + -not -path "*/install/*" \ + -not -path "*/.git/*" \ + -not -path "*/miniconda/*" \ + -not -path "*/conda/*" \ + -not -path "*/.local/*" \ + -not -path "*/lib/*" \ + -not -path "*/$repo/$repo/*" \ + -not -path "*/proc/*" \ + -print -quit) + if [[ -z "$REPO_PATH" ]]; then + echo "ERROR : dependency $repo not found in search_path \"$search_path\". ABORTING." + exit -1 + fi + + cd $REPO_PATH + mkdir -p build # creating build if its not already there to hold the build of cpp files + rm -rf build/* # build from scratch + pip install . -v + cd - + done +fi +set +x +set +e diff --git a/CMakeLists.txt b/CMakeLists.txt index f3651d02eb897f27ade07ca421e20431dba70a37..fe4b894fcd3657cc17226376b64aa60c0ec6b873 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,11 +15,10 @@ project(${project}) ############################################## # Import utils CMakeLists set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") -include(PybindModuleCreation) ############################################## # Define options -option(PYBIND "python binding" ON) +option(PYBIND "python binding" OFF) option(WERROR "Warning as error" OFF) option(TEST "Enable tests" ON) option(COVERAGE "Enable coverage" OFF) @@ -33,8 +32,6 @@ if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) Include(CodeCoverage) endif() -# XXX HERE !!! - # ############################################## # Find system dependencies Include(FetchContent) @@ -49,8 +46,6 @@ set(FMT_SYSTEM_HEADERS ON) FetchContent_MakeAvailable(fmt) set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON) - - ############################################## # Find system dependencies find_package(aidge_core REQUIRED) @@ -81,14 +76,15 @@ target_include_directories(${module_name} # PYTHON BINDING if (PYBIND) - generate_python_binding(${project} ${module_name}) - # Handles Python + pybind11 headers dependencies + include(PybindModuleCreation) + generate_python_binding(${CMAKE_PROJECT_NAME} ${module_name}) + target_link_libraries(${module_name} - PUBLIC + PUBLIC pybind11::pybind11 PRIVATE - Python::Python + Python::Module ) endif() @@ -109,6 +105,10 @@ endif() ############################################## # Installation instructions +if(NOT $ENV{AIDGE_INSTALL} STREQUAL "") + set(CMAKE_INSTALL_PREFIX $ENV{AIDGE_INSTALL}) + message(WARNING "CMAKE_INSTALL_PREFIX set to env variable AIDGE_INSTALL by default = ${CMAKE_INSTALL_PREFIX}") +endif() include(GNUInstallDirs) set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/${project}) @@ -158,6 +158,9 @@ export(EXPORT ${project}-targets ############################################## ## Add test if(TEST) + if(PYBIND) + message(FATAL_ERROR "PYBIND and TEST are both enabled. But cannot compile with catch_2.\nChoose between pybind and Catch2 for compilation.") + endif() enable_testing() add_subdirectory(unit_tests) endif() diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..593d2ba4b8d8abb5a1054140c70b56299d3b0442 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,8 @@ +include README.md LICENCE +recursive-include aidge_quantization *.py +recursive-exclude aidge_quantization/unit_tests *.py + +recursive-include include *.hpp +recursive-include src *.cpp +recursive-include python_binding *.cpp +include CMakeLists.txt diff --git a/README.md b/README.md index e4440f17094c7cfda668f5161c3c44adc9081780..f62b1166341d6be5ee28972f92da92048b925d49 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,32 @@ # Aidge Quantization Module - You can find in this folder the library that implements the quantization algorithms. For the moment only Post Training Quantization (PTQ) is available. Its implementation does support multiple branch architectures. +[TOC] +## Installation +### Dependencies +- `GCC` +- `Make`/`Ninja` +- `CMake` +- `Python` (optional, if you have no intend to use this library in python with pybind) + +#### Aidge dependencies + - `aidge_core` The requirements for installing the library are the followings: - GCC, Make and CMake for the compilation pipeline - The AIDGE modules aidge_core, aidge_onnx and aidge_backend_cpu - Python (> 3.7) if you intend to use the pybind wrapper -## Pip installation - -In an environment which satisfies the previous requirements, run : - +### Pip installation ``` bash pip install . -v ``` +> **TIPS :** Use environment variables to change compilation options : +> - `AIDGE_INSTALL` : to set the installation folder. Defaults to /usr/local/lib. :warning: This path must be identical to aidge_core install path. +> - `AIDGE_PYTHON_BUILD_TYPE` : to set the compilation mode to **Debug** or **Release** +> - `AIDGE_BUILD_GEN` : to set the build backend with ## User guide diff --git a/aidge_quantization/unit_tests/test_ptq.py b/aidge_quantization/unit_tests/test_ptq.py index 91f6d82ff7f844bc235cd7dd3391d8fe8d82bf88..2f9d5f058cfbc8bf66a6f4184e89c4a60866a2ee 100644 --- a/aidge_quantization/unit_tests/test_ptq.py +++ b/aidge_quantization/unit_tests/test_ptq.py @@ -1,12 +1,15 @@ import unittest import gzip import numpy as np +from pathlib import Path import aidge_core import aidge_backend_cpu import aidge_onnx import aidge_quantization +from aidge_core import Log, Level + # -------------------------------------------------------------- # CONFIGS # -------------------------------------------------------------- @@ -52,12 +55,13 @@ class test_ptq(unittest.TestCase): # load the samples / labels (numpy) - self.samples = np.load(gzip.GzipFile('assets/mnist_samples.npy.gz', "r")) - self.labels = np.load(gzip.GzipFile('assets/mnist_labels.npy.gz', "r")) + curr_file_dir = Path(__file__).parent.resolve() + self.samples = np.load(gzip.GzipFile(curr_file_dir / 'assets/mnist_samples.npy.gz', "r")) + self.labels = np.load(gzip.GzipFile(curr_file_dir / 'assets/mnist_labels.npy.gz', "r")) # load the model in AIDGE - self.model = aidge_onnx.load_onnx("assets/" + MODEL_NAME, verbose=False) + self.model = aidge_onnx.load_onnx(curr_file_dir / "assets/" / MODEL_NAME, verbose=False) aidge_core.remove_flatten(self.model) self.model.set_datatype(aidge_core.dtype.float32) @@ -69,15 +73,16 @@ class test_ptq(unittest.TestCase): def test_model(self): + Log.set_console_level(Level.Info) # compute the base accuracy - accuracy = compute_accuracy(self.model, self.samples[0:NB_SAMPLES], self.labels) self.assertAlmostEqual(accuracy * 100, ACCURACIES[0], msg='base accuracy does not meet the baseline !', delta=0.1) def test_quant_model(self): - # create the calibration dataset + Log.set_console_level(Level.Info) + # create the calibration dataset tensors = [] for sample in self.samples[0:NB_SAMPLES]: sample = prepare_sample(sample) @@ -109,4 +114,4 @@ class test_ptq(unittest.TestCase): if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/cmake/PybindModuleCreation.cmake b/cmake/PybindModuleCreation.cmake index 87e70fc38c9e4ec4ddb44cbe5d7fb2a31c2e94d6..8f386bef59ed86dfa366eca5d4fccae24b28d24e 100644 --- a/cmake/PybindModuleCreation.cmake +++ b/cmake/PybindModuleCreation.cmake @@ -1,21 +1,25 @@ -function(generate_python_binding name target_to_bind) +function(generate_python_binding pybind_module_name target_to_bind) add_definitions(-DPYBIND) Include(FetchContent) + set(PYBIND_VERSION v2.10.4) + set(PYBIND11_FINDPYTHON ON) + message(STATUS "Retrieving pybind ${PYBIND_VERSION} from git") + FetchContent_Declare( - PyBind11 - GIT_REPOSITORY https://github.com/pybind/pybind11.git - GIT_TAG v2.10.4 # or a later release + PyBind11 + GIT_REPOSITORY https://github.com/pybind/pybind11.git + GIT_TAG ${PYBIND_VERSION} # or a later release ) # Use the New FindPython mode, recommanded. Requires CMake 3.15+ - find_package(Python COMPONENTS Interpreter Development) + find_package(Python COMPONENTS Interpreter Development.Module) FetchContent_MakeAvailable(PyBind11) - message(STATUS "Creating binding for module ${name}") + message(STATUS "Creating binding for module ${pybind_module_name}") file(GLOB_RECURSE pybind_src_files "python_binding/*.cpp") - pybind11_add_module(${name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install - target_include_directories(${name} PUBLIC "python_binding") - target_link_libraries(${name} PUBLIC ${target_to_bind}) + pybind11_add_module(${pybind_module_name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install + target_include_directories(${pybind_module_name} PUBLIC "python_binding") + target_link_libraries(${pybind_module_name} PUBLIC ${target_to_bind}) endfunction() diff --git a/include/aidge/backend/QuantizationCPU.hpp b/include/aidge/backend/QuantizationCPU.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4338e04b421ccef1c990840cbc417a2fe7a6d4aa --- /dev/null +++ b/include/aidge/backend/QuantizationCPU.hpp @@ -0,0 +1,22 @@ +// /******************************************************************************** +// * Copyright (c) 2023 CEA-List +// * +// * This program and the accompanying materials are made available under the +// * terms of the Eclipse Public License 2.0 which is available at +// * http://www.eclipse.org/legal/epl-2.0. +// * +// * SPDX-License-Identifier: EPL-2.0 +// * +// ********************************************************************************/ +#ifndef AIDGE_QUANTIZATION_CPU_IMPORTS_H_ +#define AIDGE_QUANTIZATION_CPU_IMPORTS_H_ + +#include "aidge/backend/cpu/operator/FixedQImpl.hpp" +#include "aidge/backend/cpu/operator/LSQImpl.hpp" + +#include "aidge/backend/cpu/operator/SAT/TanhClampImpl.hpp" +#include "aidge/backend/cpu/operator/SAT/DoReFaImpl.hpp" + +// ... + +#endif /* AIDGE_QUANTIZATION_CPU_IMPORTS_H_ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu/operator/FixedQImpl.hpp b/include/aidge/backend/cpu/operator/FixedQImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..def38482cb958387f7504e946fff54b378508ed8 --- /dev/null +++ b/include/aidge/backend/cpu/operator/FixedQImpl.hpp @@ -0,0 +1,37 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_FIXEDQIMPL_H_ +#define AIDGE_CPU_OPERATOR_FIXEDQIMPL_H_ + +#include <cstddef> // std::size_t +#include <memory> +#include <tuple> // std::tuple +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/FixedQ.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { + +// Operator implementation entry point for the backend +using FixedQImpl_cpu = OperatorImpl_cpu<FixedQ_Op, + void(const std::size_t, const float, const bool, const std::size_t, const void*, void*), + void(const std::size_t, const float, const bool, const std::size_t, const void*, const void*, void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(FixedQ_Op, "cpu", Aidge::FixedQImpl_cpu::create); + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_FIXEDQIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/FixedQImpl_kernels.hpp b/include/aidge/backend/cpu/operator/FixedQImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3e1f9d239fa31d29512a40674ef914486dfbd85a --- /dev/null +++ b/include/aidge/backend/cpu/operator/FixedQImpl_kernels.hpp @@ -0,0 +1,91 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_FIXEDQIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_FIXEDQIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" +#include "aidge/backend/cpu/operator/FixedQImpl.hpp" + +//#include <iostream> + +namespace Aidge { +template <class I, class O> +void FixedQImpl_cpu_forward_kernel( + std::size_t nbBits, + float span_, + bool isOutputUnsigned, + std::size_t inputLenght, + const void* input_, + void* output_) { + + I span = static_cast<I> (span_); + I stepSize = span / static_cast<I> (1 << (nbBits - 1)); + if (isOutputUnsigned) { + stepSize /= 2; + } + + const I upper = span - stepSize; + const I lower = isOutputUnsigned ? 0 : -span; + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + for (std::size_t i = 0; i < inputLenght; ++i) { + I clipped = std::max(lower, std::min(input[i], upper)); + output[i] = std::round(clipped / stepSize) * stepSize; + } + +} + +template <class I, class GI, class GO> +void FixedQImpl_cpu_backward_kernel( + std::size_t nbBits, + float span_, + bool isOutputUnsigned, + const std::size_t inputLenght, + const void* input_, + const void* grad_output_, + void* grad_input_) { + + I span = static_cast<I> (span_); + I stepSize = span / static_cast<I> (1 << (nbBits - 1)); + if (isOutputUnsigned) { + stepSize /= 2; + } + + const I upper = span - stepSize; + const I lower = isOutputUnsigned ? 0 : -span; + + const I* input = static_cast<const I*>(input_); + const GO* grad_output = static_cast<const GO*>(grad_output_); + GI* grad_input = static_cast<GI*>(grad_input_); + + for (std::size_t i = 0; i < inputLenght; ++i) { + // Clipped Straight Through Estimator + grad_input[i] = ((input[i] > lower) && (input[i] < upper)) ? grad_output[i] : 0; + } +} + +// Kernels registration to implementation entry point +REGISTRAR(FixedQImpl_cpu, + {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::FixedQImpl_cpu_forward_kernel<int, int>, Aidge::FixedQImpl_cpu_backward_kernel<int, int, int>}); +REGISTRAR(FixedQImpl_cpu, + {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::FixedQImpl_cpu_forward_kernel<float, float>, Aidge::FixedQImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(FixedQImpl_cpu, + {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::FixedQImpl_cpu_forward_kernel<double, double>, Aidge::FixedQImpl_cpu_backward_kernel<double, double, double>}); + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_FIXEDQIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/LSQImpl.hpp b/include/aidge/backend/cpu/operator/LSQImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f2a7d927e3aa6841ef500763f882c5aff2d0261a --- /dev/null +++ b/include/aidge/backend/cpu/operator/LSQImpl.hpp @@ -0,0 +1,48 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_LSQIMPL_H_ +#define AIDGE_CPU_OPERATOR_LSQIMPL_H_ + +#include <cstddef> // std::size_t +#include <memory> +#include <tuple> // std::tuple +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/LSQ.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// compute kernel registry for forward and backward + +using LSQImpl_cpu = OperatorImpl_cpu<LSQ_Op, + void(const std::size_t, + std::pair<int, int>&, + const void*, + const void*, + void*), + void(const std::size_t, + std::pair<int, int>&, + const void*, + const void*, + const void*, + void*, + void*)>; + + +// Implementation entry point registration to Operator +REGISTRAR(LSQ_Op, "cpu", Aidge::LSQImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_LSQIMPL_H_ */ + diff --git a/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp b/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ddb820997837ec9b3603c6007497c8161145d587 --- /dev/null +++ b/include/aidge/backend/cpu/operator/LSQImpl_kernels.hpp @@ -0,0 +1,137 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_LSQIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_LSQIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/LSQImpl.hpp" + +namespace Aidge { +template <class I, class O> +void LSQImpl_cpu_forward_kernel(std::size_t inputLength, + const std::pair<int, int>& range, + const void* input_, + const void* stepSize_, + void* output_) +{ + const I* input = static_cast<const I*>(input_); + const I* stepSize = static_cast<const I*>(stepSize_); + O* output = static_cast<O*>(output_); + + const O bitRangesLowerBound = static_cast<O>(range.first * stepSize[0]); + const O bitRangesUpperBound = static_cast<O>(range.second * stepSize[0]); + +//#pragma omp parallel for if (inputLength > 16) + for (unsigned int i = 0; i < inputLength; i++) { + const O qData = input[i] / stepSize[0]; + + output[i] = + (qData <= static_cast<O>(range.first)) ? bitRangesLowerBound : + (qData >= static_cast<O>(range.second)) ? bitRangesUpperBound : + std::round(qData) * stepSize[0]; + } +} + +template <class I, class GI, class GO> +void LSQImpl_cpu_backward_kernel(const std::size_t inputLength, + const std::pair<int, int>& range, + const void* input_, + const void* stepSize_, + const void* grad_output_, + void* grad_input_, + void* grad_stepSize_) +{ + const I* input = static_cast<const I*>(input_); + const I* stepSize = static_cast<const I*>(stepSize_); + const GO* grad_output = static_cast<const GO*>(grad_output_); + GI* grad_input = static_cast<GI*>(grad_input_); + GI* grad_stepSize = static_cast<GI*>(grad_stepSize_); + + GI diffStepSize = GI(0.0); + +#pragma omp parallel for schedule(static, 256) reduction(+:diffStepSize) if(inputLength > 16) + for(unsigned int i=0; i < inputLength / 4; i++) { + const GI fullPrecScale_1 = input[4*i] / stepSize[0]; + const GI fullPrecScale_2 = input[4*i+1] / stepSize[0]; + const GI fullPrecScale_3 = input[4*i+2] / stepSize[0]; + const GI fullPrecScale_4 = input[4*i+3] / stepSize[0]; + /*****************Features Gradient Computation********************/ + // STE method is simply applied + grad_input[4*i] = grad_output[4*i]*((fullPrecScale_1 <= static_cast<GI>(range.first)) ? GI(0.0) : + (fullPrecScale_1 >= static_cast<GI>(range.second)) ? GI(0.0) : + GI(1.0)); + grad_input[4*i+1] = grad_output[4*i+1]*((fullPrecScale_2 <= static_cast<GI>(range.first)) ? GI(0.0) : + (fullPrecScale_2 >= static_cast<GI>(range.second)) ? GI(0.0) : + GI(1.0)); + grad_input[4*i+2] = grad_output[4*i+2]*((fullPrecScale_3 <= static_cast<GI>(range.first)) ? GI(0.0) : + (fullPrecScale_3 >= static_cast<GI>(range.second)) ? GI(0.0) : + GI(1.0)); + grad_input[4*i+3] = grad_output[4*i+3]*((fullPrecScale_4 <= static_cast<GI>(range.first)) ? GI(0.0) : + (fullPrecScale_4 >= static_cast<GI>(range.second)) ? GI(0.0) : + GI(1.0)); + + /*****************Step Size Gradient Computation******************/ + //1st: clip the gradient in interval [rangeMin, rangeMax] and take account of qError + GI qData_1 = fullPrecScale_1; + qData_1 = ((qData_1 <= static_cast<GI>(range.first)) ? static_cast<GI>(range.first) : + (qData_1 >= static_cast<GI>(range.second)) ? static_cast<GI>(range.second) : + round(qData_1) - qData_1); + GI qData_2 = fullPrecScale_2; + qData_2 = ((qData_2 <= static_cast<GI>(range.first)) ? static_cast<GI>(range.first) : + (qData_2 >= static_cast<GI>(range.second)) ? static_cast<GI>(range.second) : + round(qData_2) - qData_2); + GI qData_3 = fullPrecScale_3; + qData_3 = ((qData_3 <= static_cast<GI>(range.first)) ? static_cast<GI>(range.first) : + (qData_3 >= static_cast<GI>(range.second)) ? static_cast<GI>(range.second) : + round(qData_3) - qData_3); + GI qData_4 = fullPrecScale_4; + qData_4 = ((qData_4 <= static_cast<GI>(range.first)) ? static_cast<GI>(range.first) : + (qData_4 >= static_cast<GI>(range.second)) ? static_cast<GI>(range.second) : + round(qData_4) - qData_4); + //2nd: Multiplie backward data with clipped grad + diffStepSize += ((qData_1*grad_output[4*i] + qData_2*grad_output[4*i+1])+(qData_3*grad_output[4*i+2] + qData_4*grad_output[4*i+3])); + } + + // Process remaining + for(unsigned int i=inputLength-inputLength%4; i<inputLength; ++i) { + const GI fullPrecScale = input[i] / stepSize[0]; + grad_input[i] = grad_output[i]*((fullPrecScale <= static_cast<GI>(range.first)) ? GI(0.0) : + (fullPrecScale >= static_cast<GI>(range.second)) ? GI(0.0) : + GI(1.0)); + GI qData = fullPrecScale; + qData = ((qData <= static_cast<GI>(range.first)) ? static_cast<GI>(range.first) : + (qData >= static_cast<GI>(range.second)) ? static_cast<GI>(range.second) : + round(qData) - qData); + diffStepSize += qData*grad_output[i]; + } + + const GI gradScaleFactor = static_cast<GI>(1.0f / std::sqrt(inputLength * range.second)); + // 3rd: Multiply Step Size gradient with scale factor + grad_stepSize[0] = diffStepSize * gradScaleFactor; +} + + +// Kernels registration to implementation entry point +REGISTRAR(LSQImpl_cpu, + {{DataType::Float16, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::LSQImpl_cpu_forward_kernel<half_float::half, half_float::half>, Aidge::LSQImpl_cpu_backward_kernel<half_float::half, half_float::half, half_float::half>}); +REGISTRAR(LSQImpl_cpu, + {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::LSQImpl_cpu_forward_kernel<float, float>, Aidge::LSQImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(LSQImpl_cpu, + {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::LSQImpl_cpu_forward_kernel<double, double>, Aidge::LSQImpl_cpu_backward_kernel<double, double, double>}); + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_LSQIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SAT/DoReFaImpl.hpp b/include/aidge/backend/cpu/operator/SAT/DoReFaImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..36bf6f9afffa926dc91814c654aec9515294c297 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SAT/DoReFaImpl.hpp @@ -0,0 +1,37 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_DOREFAIMPL_H_ +#define AIDGE_CPU_OPERATOR_DOREFAIMPL_H_ + +#include <cstddef> // std::size_t +#include <memory> +#include <tuple> // std::tuple +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/SAT/DoReFa.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { + +// Operator implementation entry point for the backend +using DoReFaImpl_cpu = OperatorImpl_cpu<DoReFa_Op, + void(const std::size_t, float, DoReFaMode, const void*, void*), + void(const std::size_t, float, DoReFaMode, const void*, const void*, void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(DoReFa_Op, "cpu", Aidge::DoReFaImpl_cpu::create); + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_DOREFAIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SAT/DoReFaImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SAT/DoReFaImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6c30947ae89dee7b75f8b53e2a24752d6b5d8ab5 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SAT/DoReFaImpl_kernels.hpp @@ -0,0 +1,75 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_DOREFAIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_DOREFAIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/SAT/DoReFaImpl.hpp" + +namespace Aidge { + +template <class I, class O> +void DoReFaImpl_cpu_forward_kernel(std::size_t inputLength, + float range, + DoReFaMode mode, + const void* input_, + void* output_) +{ + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + // Dorefa Quantization + //#pragma omp parallel for if (inputLength > 1024) + for (unsigned int i = 0; i < inputLength; ++i) { + if (mode == DoReFaMode::Default) { + auto q = I(0.5) * (input[i] + I(1.0)); + q = O(1.0f / range) * O(std::rintf(q * range)); + output[i] = q * O(2.0) - O(1.0); + } + else { + output[i] = O(1.0f / range) * O(std::rintf(input[i] * range)); + } + } +} + +template <class I, class GI, class GO> +void DoReFaImpl_cpu_backward_kernel(const std::size_t inputLength, + float /*range*/, + DoReFaMode /*mode*/, + const void* /*input_*/, + const void* grad_output_, + void* grad_input_) +{ + const GO* grad_output = static_cast<const GO*>(grad_output_); + GI* grad_input = static_cast<GI*>(grad_input_); + + //#pragma omp parallel for if (inputLength > 1024) + for (unsigned int i = 0; i < inputLength; ++i) { + grad_input[i] = grad_output[i]; + } +} + +// Kernels registration to implementation entry point +REGISTRAR(DoReFaImpl_cpu, + {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::DoReFaImpl_cpu_forward_kernel<int, int>, Aidge::DoReFaImpl_cpu_backward_kernel<int, int, int>}); +REGISTRAR(DoReFaImpl_cpu, + {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::DoReFaImpl_cpu_forward_kernel<float, float>, Aidge::DoReFaImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(DoReFaImpl_cpu, + {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::DoReFaImpl_cpu_forward_kernel<double, double>, Aidge::DoReFaImpl_cpu_backward_kernel<double, double, double>}); + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_DOREFAIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SAT/TanhClampImpl.hpp b/include/aidge/backend/cpu/operator/SAT/TanhClampImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..972ed411f2bdb85dc88c5209f3c6dc947c5401bf --- /dev/null +++ b/include/aidge/backend/cpu/operator/SAT/TanhClampImpl.hpp @@ -0,0 +1,35 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_TANHCLAMPIMPL_H_ +#define AIDGE_CPU_OPERATOR_TANHCLAMPIMPL_H_ + +#include <cstddef> // std::size_t +#include <memory> +#include <tuple> // std::tuple +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/SAT/TanhClamp.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Operator implementation entry point for the backend +using TanhClampImpl_cpu = OperatorImpl_cpu<TanhClamp_Op, + void(const std::size_t, const void*, void*, void*), + void(const std::size_t, const void*, const void*, void*, void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(TanhClamp_Op, "cpu", Aidge::TanhClampImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_TANHCLAMPIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SAT/TanhClampImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SAT/TanhClampImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5c9602de67c48a661d57aef4c8c381199bae2c29 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SAT/TanhClampImpl_kernels.hpp @@ -0,0 +1,84 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_TANHCLAMPIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_TANHCLAMPIMPL_KERNELS_H_ + +#include <cstddef> // std::size_t +#include <memory> +#include <tuple> // std::tuple +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/SAT/TanhClampImpl.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Kernels +template <class I, class O> +void TanhClampImpl_cpu_forward_kernel(std::size_t inputLength, + const void* input_, + void* scaling_, + void* output_) +{ + const I* input = static_cast<const I*>(input_); + I scaling = *static_cast<I*>(scaling_); + O* output = static_cast<O*>(output_); + + const auto minMax = std::minmax_element(input, input + inputLength); + const auto absMax = std::max(std::abs(*(minMax.first)), std::abs(*(minMax.second))); + scaling = std::tanh(absMax); + + //#pragma omp parallel for if (inputLength > 1024) + for (unsigned int i = 0; i < inputLength; ++i) { + output[i] = std::tanh(input[i]) / scaling; + } + + // Set the scaling output ... + *(static_cast<I*> (scaling_)) = scaling; +} + +template <class I, class GI, class GO> +void TanhClampImpl_cpu_backward_kernel(const std::size_t inputLength, + const void* input_, + const void* scaling_, + const void* grad_output_, + void* grad_input_) +{ + const I* input = static_cast<const I*>(input_); + const I scaling = *static_cast<const I*>(scaling_); + const GO* grad_output = static_cast<const GO*>(grad_output_); + GI* grad_input = static_cast<GI*>(grad_input_); + + //#pragma omp parallel for if (inputLength > 1024) + for (unsigned int i = 0; i < inputLength; ++i) { + const auto inv_cosh = GO(1 / std::cosh(input[i])); + const auto grad = inv_cosh * inv_cosh * GO(1 / scaling); + grad_input[i] = grad_output[i] * grad; + } +} + + +// Kernels registration to implementation entry point +REGISTRAR(TanhClampImpl_cpu, + {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::TanhClampImpl_cpu_forward_kernel<int, int>, Aidge::TanhClampImpl_cpu_backward_kernel<int, int, int>}); +REGISTRAR(TanhClampImpl_cpu, + {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::TanhClampImpl_cpu_forward_kernel<float, float>, Aidge::TanhClampImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(TanhClampImpl_cpu, + {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::TanhClampImpl_cpu_forward_kernel<double, double>, Aidge::TanhClampImpl_cpu_backward_kernel<double, double, double>}); + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_TANHCLAMPIMPL_KERNELS_H_ */ diff --git a/include/aidge/operator/FixedQ.hpp b/include/aidge/operator/FixedQ.hpp new file mode 100644 index 0000000000000000000000000000000000000000..96a52b4592bc05f34a47e04e664df27847a48e85 --- /dev/null +++ b/include/aidge/operator/FixedQ.hpp @@ -0,0 +1,101 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CORE_OPERATOR_FIXEDQ_H_ +#define AIDGE_CORE_OPERATOR_FIXEDQ_H_ + +#include <cassert> +#include <memory> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/graph/Node.hpp" +#include "aidge/operator/OperatorTensor.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/utils/StaticAttributes.hpp" + + +namespace Aidge { + +enum class FixedQAttr { NbBits, Span, IsOutputUnsigned }; + +class FixedQ_Op : public OperatorTensor, + public Registrable<FixedQ_Op, std::string, + std::function<std::shared_ptr<OperatorImpl>(const FixedQ_Op&)>> { + +public: + static const std::string Type; + +private: + using Attributes_ = StaticAttributes<FixedQAttr, std::size_t, float, bool>; + template <FixedQAttr e> using attr = typename Attributes_::template attr<e>; + const std::shared_ptr<Attributes_> mAttributes; + +public: + + FixedQ_Op(std::size_t nbBits, float span, bool isOutputUnsigned) : + OperatorTensor(Type, {InputCategory::Data}, 1), + mAttributes(std::make_shared<Attributes_>(attr<FixedQAttr::NbBits>(nbBits), attr<FixedQAttr::Span>(span), attr<FixedQAttr::IsOutputUnsigned>(isOutputUnsigned))) + {} + + /** + * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). + * @param op Operator to copy. + */ + FixedQ_Op(const FixedQ_Op& op) + : OperatorTensor(op), mAttributes(op.mAttributes) + { + if (op.mImpl){ + SET_IMPL_MACRO(FixedQ_Op, *this, op.backend()); + }else{ + mImpl = nullptr; + } + } + + /** + * @brief Clone the operator using its copy-constructor. + * @see Operator::FixedQ_Op + */ + std::shared_ptr<Operator> clone() const override { + return std::make_shared<FixedQ_Op>(*this); + } + std::set<std::string> getAvailableBackends() const override final; + void setBackend(const std::string& name, DeviceIdx_t device = 0) override final; + + inline std::shared_ptr<Attributes> attributes() const override { return mAttributes; } + inline std::size_t& nbBits() const noexcept { return mAttributes->getAttr<FixedQAttr::NbBits>(); } + inline float& span() const noexcept { return mAttributes->getAttr<FixedQAttr::Span>(); } + inline bool& isOutputUnsigned() const noexcept { return mAttributes->getAttr<FixedQAttr::IsOutputUnsigned>(); } + + + static const std::vector<std::string> getInputsName(){ + return {"data_input"}; + } + static const std::vector<std::string> getOutputsName(){ + return {"data_output"}; + } + + +}; + +inline std::shared_ptr<Node> FixedQ(std::size_t nbBits = 8, float span = 4.0f, bool isOutputUnsigned = false, const std::string& name = "") { + return std::make_shared<Node>(std::make_shared<FixedQ_Op>(nbBits, span, isOutputUnsigned), name); +} +} + +namespace { +template <> +const char* const EnumStrings<Aidge::FixedQAttr>::data[] = {"nb_bits", "span", "is_output_unsigned"}; +} + +#endif /* AIDGE_CORE_OPERATOR_FIXEDQ_H_ */ diff --git a/include/aidge/operator/LSQ.hpp b/include/aidge/operator/LSQ.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7ce23f01ea94ccfa3ffc7ec6b49a66e8c3048756 --- /dev/null +++ b/include/aidge/operator/LSQ.hpp @@ -0,0 +1,107 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CORE_OPERATOR_LSQ_H_ +#define AIDGE_CORE_OPERATOR_LSQ_H_ + +#include <cassert> +#include <memory> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/graph/Node.hpp" +#include "aidge/operator/OperatorTensor.hpp" +#include "aidge/operator/Producer.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/StaticAttributes.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { + +enum class LSQAttr { Range }; + +/** + * LSQ is the weights AND activations quantizer for the LSQ method. + */ +class LSQ_Op : public OperatorTensor, + public Registrable<LSQ_Op, std::string, std::function<std::shared_ptr<OperatorImpl>(const LSQ_Op &)>> { +public: + static const std::string Type; + +private: + using Attributes_ = StaticAttributes<LSQAttr, std::pair<int, int>>; + template <LSQAttr e> using attr = typename Attributes_::template attr<e>; + const std::shared_ptr<Attributes_> mAttributes; + +public: + LSQ_Op(const std::pair<int, int>& range = {0, 255}) + : OperatorTensor(Type, {InputCategory::Data, InputCategory::Param}, 1), + mAttributes(std::make_shared<Attributes_>( + attr<LSQAttr::Range>(range))) + {} + + /** + * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). + * @param op Operator to copy. + */ + LSQ_Op(const LSQ_Op& op) + : OperatorTensor(op), + mAttributes(op.mAttributes) + { + if (op.mImpl){ + SET_IMPL_MACRO(LSQ_Op, *this, op.backend()); + }else{ + mImpl = nullptr; + } + } + + /** + * @brief Clone the operator using its copy-constructor. + * @see Operator::LSQ_Op + */ + std::shared_ptr<Operator> clone() const override { + return std::make_shared<LSQ_Op>(*this); + } + + bool forwardDims(bool allowDataDependency = false) override final; + std::set<std::string> getAvailableBackends() const override final; + void setBackend(const std::string& name, DeviceIdx_t device = 0) override final; + + inline std::shared_ptr<Attributes> attributes() const override { return mAttributes; } + inline std::pair<int, int>& range() const noexcept { return mAttributes->getAttr<LSQAttr::Range>(); } + + static const std::vector<std::string> getInputsName(){ + return {"data_input", "step_size"}; + } + static const std::vector<std::string> getOutputsName(){ + return {"data_output"}; + } +}; + +/** + * Range should be (with N the number of bits): + * - {0, 2^N - 1} in place of ReLU activations + * - {-2^(N-1), 2^(N-1) - 1} in for weights quantization + */ +inline std::shared_ptr<Node> LSQ(const std::pair<int, int>& range = {0, 255}, const std::string& name = "") { + auto lsq = std::make_shared<Node>(std::make_shared<LSQ_Op>(range), name); + addProducer(lsq, 1, {1}, "ss"); + return lsq; +} +} + +namespace { +template <> +const char *const EnumStrings<Aidge::LSQAttr>::data[] = {"range"}; +} + +#endif /* AIDGE_CORE_OPERATOR_LSQ_H_ */ diff --git a/include/aidge/operator/SAT/DoReFa.hpp b/include/aidge/operator/SAT/DoReFa.hpp new file mode 100644 index 0000000000000000000000000000000000000000..92ce1677b1b28e303c8488b55dd00cfafb519457 --- /dev/null +++ b/include/aidge/operator/SAT/DoReFa.hpp @@ -0,0 +1,109 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CORE_OPERATOR_DOREFA_H_ +#define AIDGE_CORE_OPERATOR_DOREFA_H_ + +#include <cassert> +#include <memory> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/graph/Node.hpp" +#include "aidge/operator/OperatorTensor.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/StaticAttributes.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { + +enum class DoReFaAttr { Range, Mode }; +enum class DoReFaMode { + Default, // Original SAT paper (not including 0) + Symmetric, // Symmetric range including 0 + //Asymmetric, + //FullRange +}; + +/** + * DoReFa is the weights quantizer for the 2nd training phase (quantization) of the SAT method. + */ +class DoReFa_Op : public OperatorTensor, + public Registrable<DoReFa_Op, std::string, std::function<std::shared_ptr<OperatorImpl>(const DoReFa_Op&)>> { +public: + static const std::string Type; + +private: + using Attributes_ = StaticAttributes<DoReFaAttr, size_t, DoReFaMode>; + template <DoReFaAttr e> using attr = typename Attributes_::template attr<e>; + const std::shared_ptr<Attributes_> mAttributes; + +public: + DoReFa_Op(size_t range = 255, DoReFaMode mode = DoReFaMode::Default) + : OperatorTensor(Type, {InputCategory::Param}, 1), + mAttributes(std::make_shared<Attributes_>( + attr<DoReFaAttr::Range>(range), + attr<DoReFaAttr::Mode>(mode))) + {} + + /** + * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). + * @param op Operator to copy. + */ + DoReFa_Op(const DoReFa_Op& op) + : OperatorTensor(op), + mAttributes(op.mAttributes) + { + if (op.mImpl){ + SET_IMPL_MACRO(DoReFa_Op, *this, op.backend()); + }else{ + mImpl = nullptr; + } + } + + /** + * @brief Clone the operator using its copy-constructor. + * @see Operator::DoReFa_Op + */ + std::shared_ptr<Operator> clone() const override { + return std::make_shared<DoReFa_Op>(*this); + } + + std::set<std::string> getAvailableBackends() const override final; + void setBackend(const std::string& name, DeviceIdx_t device = 0) override final; + + inline std::shared_ptr<Attributes> attributes() const override { return mAttributes; } + inline size_t& range() const noexcept { return mAttributes->getAttr<DoReFaAttr::Range>(); } + inline DoReFaMode& mode() const noexcept { return mAttributes->getAttr<DoReFaAttr::Mode>(); } + + static const std::vector<std::string> getInputsName(){ + return {"data_input"}; + } + static const std::vector<std::string> getOutputsName(){ + return {"data_output"}; + } +}; + +inline std::shared_ptr<Node> DoReFa(size_t range = 255, DoReFaMode mode = DoReFaMode::Default, const std::string& name = "") { + return std::make_shared<Node>(std::make_shared<DoReFa_Op>(range, mode), name); +} +} + +namespace { +template <> +const char *const EnumStrings<Aidge::DoReFaAttr>::data[] = {"range", "mode"}; + +template <> +const char *const EnumStrings<Aidge::DoReFaMode>::data[] = {"default", "symmetric", "asymmetric", "full_range"}; +} + +#endif /* AIDGE_CORE_OPERATOR_DOREFA_H_ */ diff --git a/include/aidge/operator/SAT/TanhClamp.hpp b/include/aidge/operator/SAT/TanhClamp.hpp new file mode 100644 index 0000000000000000000000000000000000000000..def43b872c021e539efe5658b592ceec9b3b5d4d --- /dev/null +++ b/include/aidge/operator/SAT/TanhClamp.hpp @@ -0,0 +1,83 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CORE_OPERATOR_TANHCLAMP_H_ +#define AIDGE_CORE_OPERATOR_TANHCLAMP_H_ + +#include <cassert> +#include <memory> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/graph/Node.hpp" +#include "aidge/operator/OperatorTensor.hpp" +#include "aidge/operator/Producer.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/StaticAttributes.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { + +/** + * TanhClamp is the weights clamping for the 1st training phase (clamping) of the SAT method. + */ +class TanhClamp_Op : public OperatorTensor, + public Registrable<TanhClamp_Op, std::string, std::function<std::shared_ptr<OperatorImpl>(const TanhClamp_Op&)>> { + +public: + static const std::string Type; + + TanhClamp_Op() + : OperatorTensor(Type, {InputCategory::Data}, 2) + {} + + /** + * @brief Copy-constructor. Copy the operator attributes and its output tensor(s), but not its input tensors (the new operator has no input associated). + * @param op Operator to copy. + */ + TanhClamp_Op(const TanhClamp_Op& op) + : OperatorTensor(op) + { + if (op.mImpl){ + SET_IMPL_MACRO(TanhClamp_Op, *this, op.backend()); + }else{ + mImpl = nullptr; + } + } + + /** + * @brief Clone the operator using its copy-constructor. + * @see Operator::TanhClamp_Op + */ + std::shared_ptr<Operator> clone() const override { + return std::make_shared<TanhClamp_Op>(*this); + } + + bool forwardDims(bool allowDataDependency = false) override final; + std::set<std::string> getAvailableBackends() const override final; + + void setBackend(const std::string& name, DeviceIdx_t device = 0) override final; + + static const std::vector<std::string> getInputsName(){ + return {"data_input"}; + } + static const std::vector<std::string> getOutputsName(){ + return {"data_output", "scaling"}; + } +}; + +inline std::shared_ptr<Node> TanhClamp(const std::string& name = "") { + return std::make_shared<Node>(std::make_shared<TanhClamp_Op>(), name); +} +} + +#endif /* AIDGE_CORE_OPERATOR_TANHCLAMP_H_ */ diff --git a/include/aidge/quantization/PTQ/Clip.hpp b/include/aidge/quantization/PTQ/Clip.hpp index 740f044faa9be15ab093d471bda0cabb04f3ce53..6fb100979f272b7c7ee0bc64cddd73b374d4ee4a 100644 --- a/include/aidge/quantization/PTQ/Clip.hpp +++ b/include/aidge/quantization/PTQ/Clip.hpp @@ -26,7 +26,7 @@ namespace Aidge /** * @brief Kind of clipping policy to apply during the activation quantization */ - enum Clipping {MAX = 1, MSE, AA, KL}; + enum class Clipping {MAX = 1, MSE, AA, KL}; /** * @brief Compute the histograms of the activations of each node contained in the map of the ranges (passed as argument). diff --git a/include/aidge/quantization/PTQ/PTQ.hpp b/include/aidge/quantization/PTQ/PTQ.hpp index d24831c3460aac037091636450a7290c47e1775b..b3e25d5c405368b8ebcbd5220831da7e7d3446d1 100644 --- a/include/aidge/quantization/PTQ/PTQ.hpp +++ b/include/aidge/quantization/PTQ/PTQ.hpp @@ -26,12 +26,12 @@ namespace Aidge { /** * @brief Set of the types of the nodes which contain affine transforms (that is Y = A.X + B) */ - static const std::set<std::string> affineNodeTypes({"FC", "Conv", "ConvDepthWise", "PaddedConv", "PaddedConvDepthWise"}); + static const std::set<std::string> affineNodeTypes({"FC", "Conv2D", "ConvDepthWise2D", "PaddedConv2D", "PaddedConvDepthWise2D"}); /** * @brief Set of the types of the nodes which does not affect the PTQ process */ - static const std::set<std::string> seamlessNodeTypes({"Pad", "MaxPooling", "AvgPooling", "PaddedMaxPooling", "PaddedAvgPooling", "GlobalAveragePooling", "Reshape", "Transpose", "Gather"}); + static const std::set<std::string> seamlessNodeTypes({"Pad2D", "MaxPooling2D", "AvgPooling2D", "PaddedMaxPooling2D", "PaddedAvgPooling2D", "GlobalAveragePooling", "Reshape", "Transpose", "Gather"}); /** * @brief Set of the types of the nodes that merge multiple branches into one @@ -74,6 +74,10 @@ namespace Aidge { */ bool checkArchitecture(std::shared_ptr<GraphView> graphView); + + void prepareNetwork(std::shared_ptr<GraphView> graphView); + + /** * @brief Insert a scaling node after each affine node of the GraphView. * Also insert a scaling node in every purely residual branches. @@ -154,7 +158,6 @@ namespace Aidge { * @param graphView The GraphView under test. */ void devPTQ(std::shared_ptr<GraphView> graphView); - } #endif /* AIDGE_QUANTIZATION_PTQ_PTQ_H_ */ diff --git a/include/aidge/quantization/QAT/QAT_FixedQ.hpp b/include/aidge/quantization/QAT/QAT_FixedQ.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ecbe7422ea85db1771d91e161c93740993ebbe2b --- /dev/null +++ b/include/aidge/quantization/QAT/QAT_FixedQ.hpp @@ -0,0 +1,50 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ +#define AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ + +#include "aidge/graph/Node.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/data/Tensor.hpp" + +namespace Aidge { +namespace QuantFixedQ { + +/** + * @brief Insert the FixedQ quantizer nodes in a given GraphView + * @param graphView The GraphView containing the graph to quantize. + * @param nbBits Number of quantization bits. + * @param span Fixed output span of the quantizers. + */ +void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float span); + +/** + * @brief Given a GraphView with parameters properly initialized and some calibration data, + * insert the FixedQ quantizer nodes, and adjust their output spans. + * @param graphView The GraphView containing the graph to quantize. + * @param nbBits Number of quantization bits. + * @param calibrationData Calibration data used to adjust the spans. + * @param scale Multiplicative constant applied to the spans. + */ +void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData, float scale); + +/** + * @brief Developement and test routine. + * @param graphView The GraphView under test. + */ +void devQAT(std::shared_ptr<GraphView> graphView); + +} +} + +#endif /* AIDGE_QUANTIZATION_QAT_FIXEDQ_H_ */ + diff --git a/include/aidge/quantization/QAT/QAT_LSQ.hpp b/include/aidge/quantization/QAT/QAT_LSQ.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4970be07fae8737a1c2863600757bb81ff3a65f9 --- /dev/null +++ b/include/aidge/quantization/QAT/QAT_LSQ.hpp @@ -0,0 +1,44 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_QUANTIZATION_QAT_LSQ_H_ +#define AIDGE_QUANTIZATION_QAT_LSQ_H_ + +#include "aidge/graph/Node.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/data/Tensor.hpp" + +namespace Aidge { +namespace QuantLSQ { + +/** + * @brief Insert the LSQ quantizer nodes in a given GraphView + * @param graphView The GraphView containing the graph to quantize. + * @param nbBits Number of quantization bits. + * @param span Fixed output span of the quantizers. + */ +void insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float step_size); + +/** + * @brief Given a GraphView with parameters properly initialized and some calibration data, + * insert the LSQ quantizer nodes, and adjust their step-sizes. + * @param graphView The GraphView containing the graph to quantize. + * @param nbBits Number of quantization bits. + * @param calibrationData Calibration data used to adjust the spans. + * @param scale Multiplicative constant applied to the spans. + */ +void insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData); + +} +} + +#endif /* AIDGE_QUANTIZATION_QAT_LSQ_H_ */ + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..86501fe29f48c77353abe5ddf7e312204c13cf88 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,71 @@ +[project] +name = "aidge_quantization" +description="Quantization algorithms to compress aidge networks." +dependencies = [ + "numpy>=1.21.6", +] +requires-python = ">= 3.7" +readme = "README.md" +license = { file = "LICENSE" } +classifiers = [ + "Development Status :: 2 - Pre-Alpha", + "Programming Language :: Python :: 3" + ] +dynamic = ["version"] #Â defined in tool.setuptools_scm +# version="1" + +[build-system] +requires = [ + "setuptools>=64", + "setuptools_scm[toml]==7.1.0", + "cmake>=3.15.3.post1", + "toml" +] +build-backend = "setuptools.build_meta" + +##################################################### +# SETUPTOOLS +[tool.setuptools] +include-package-data = true +[tool.setuptools.packages.find] +where = ["."] # list of folders that contain the packages (["."] by default) +include = ["aidge_quantization*"] # package names should match these glob patterns (["*"] by default) +exclude = ["aidge_quantization.unit_tests*"] # exclude packages matching these glob patterns (empty by default) + +# SETUPTOOLS_SCM +[tool.setuptools_scm] +write_to = "aidge_quantization/_version.py" + +##################################################### +# CIBUILDWHEEL +[tool.cibuildwheel] +build-frontend = "build" +test-requires = "pytest" +test-command = "pytest {package}/aidge_quantization/unit_tests" +# uncomment to run cibuildwheel locally on selected distros +# build=[ +# "cp38-manylinux_x86_64", +# "cp39-manylinux_x86_64", +# "cp310-manylinux_x86_64", +# "cp38-win_amd64", +# "cp39-win_amd64", +# "cp310-win_amd64", +# ] +## AIDGE DEPENDENCIES DECLARATION +[tool.cibuildwheel.environment] +AIDGE_DEPENDENCIES = "aidge_core aidge_backend_cpu aidge_onnx" # format => "dep_1 dep_2 ... dep_n" +AIDGE_INSTALL="/AIDGE_INSTALL_CIBUILDWHEEL" +[tool.cibuildwheel.linux] +before-test = [ + "bash .gitlab/ci/cibuildwheel_build_deps_before_build_wheel.sh /host" +] +before-build = [ + "bash .gitlab/ci/cibuildwheel_build_deps_before_build_wheel.sh /host" +] +[tool.cibuildwheel.windows] +before-build = [ + "powershell -File .\\.gitlab\\ci\\cibuildwheel_build_deps_before_build_wheel.ps1" +] +before-test = [ + "powershell -File .\\.gitlab\\ci\\cibuildwheel_build_deps_before_build_wheel.ps1" +] diff --git a/python_binding/operator/SAT/pybind_DoReFa.cpp b/python_binding/operator/SAT/pybind_DoReFa.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4e66665f71cf22ee48dd814692e7645c515e4dc1 --- /dev/null +++ b/python_binding/operator/SAT/pybind_DoReFa.cpp @@ -0,0 +1,34 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/SAT/DoReFa.hpp" +#include "aidge/operator/OperatorTensor.hpp" + +namespace py = pybind11; +namespace Aidge { + +void init_DoReFa(py::module& m) { + py::enum_<DoReFaMode>(m, "DoReFaMode") + .value("Default", DoReFaMode::Default) + .value("Symmetric", DoReFaMode::Symmetric) + .export_values(); + + py::class_<DoReFa_Op, std::shared_ptr<DoReFa_Op>, OperatorTensor>(m, "DoReFaOp", py::multiple_inheritance()) + .def(py::init<size_t, DoReFaMode>(), py::arg("range") = 255, py::arg("mode") = DoReFaMode::Default) + .def_static("get_inputs_name", &DoReFa_Op::getInputsName) + .def_static("get_outputs_name", &DoReFa_Op::getOutputsName); + declare_registrable<DoReFa_Op>(m, "DoReFaOp"); + m.def("DoReFa", &DoReFa, py::arg("range") = 255, py::arg("mode") = DoReFaMode::Default, py::arg("name") = ""); +} +} // namespace Aidge diff --git a/python_binding/operator/SAT/pybind_TanhClamp.cpp b/python_binding/operator/SAT/pybind_TanhClamp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ed019b20e73bf916cf26da5a443fb23c41370d09 --- /dev/null +++ b/python_binding/operator/SAT/pybind_TanhClamp.cpp @@ -0,0 +1,29 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/SAT/TanhClamp.hpp" +#include "aidge/operator/OperatorTensor.hpp" + +namespace py = pybind11; +namespace Aidge { + +void init_TanhClamp(py::module& m) { + py::class_<TanhClamp_Op, std::shared_ptr<TanhClamp_Op>, OperatorTensor>(m, "TanhClampOp", py::multiple_inheritance()) + .def(py::init<>()) + .def_static("get_inputs_name", &TanhClamp_Op::getInputsName) + .def_static("get_outputs_name", &TanhClamp_Op::getOutputsName); + declare_registrable<TanhClamp_Op>(m, "TanhClampOp"); + m.def("TanhClamp", &TanhClamp, py::arg("name") = ""); +} +} // namespace Aidge diff --git a/python_binding/operator/pybind_FixedQ.cpp b/python_binding/operator/pybind_FixedQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a85fde261e3b3651446e031abb15518a960fac68 --- /dev/null +++ b/python_binding/operator/pybind_FixedQ.cpp @@ -0,0 +1,29 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/FixedQ.hpp" +#include "aidge/operator/OperatorTensor.hpp" + +namespace py = pybind11; +namespace Aidge { + +void init_FixedQ(py::module& m) { + py::class_<FixedQ_Op, std::shared_ptr<FixedQ_Op>, OperatorTensor>(m, "FixedQOp", py::multiple_inheritance()) + .def(py::init<std::size_t, float, bool>(), py::arg("nb_bits"), py::arg("span"), py::arg("is_output_unsigned")) + .def_static("get_inputs_name", &FixedQ_Op::getInputsName) + .def_static("get_outputs_name", &FixedQ_Op::getOutputsName); + declare_registrable<FixedQ_Op>(m, "FixedQOp"); + m.def("FixedQ", &FixedQ, py::arg("nb_bits") = 8, py::arg("span") = 4.0f, py::arg("is_output_unsigned") = false, py::arg("name") = ""); +} +} // namespace Aidge \ No newline at end of file diff --git a/python_binding/operator/pybind_LSQ.cpp b/python_binding/operator/pybind_LSQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9f5fe467c484067caed951aad873196c7fc8eef4 --- /dev/null +++ b/python_binding/operator/pybind_LSQ.cpp @@ -0,0 +1,29 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/LSQ.hpp" +#include "aidge/operator/OperatorTensor.hpp" + +namespace py = pybind11; +namespace Aidge { + +void init_LSQ(py::module& m) { + py::class_<LSQ_Op, std::shared_ptr<LSQ_Op>, OperatorTensor>(m, "LSQOp", py::multiple_inheritance()) + .def(py::init<const std::pair<int, int>&>(), py::arg("range") = std::pair<int, int>{0, 255}) + .def_static("get_inputs_name", &LSQ_Op::getInputsName) + .def_static("get_outputs_name", &LSQ_Op::getOutputsName); + declare_registrable<LSQ_Op>(m, "LSQOp"); + m.def("LSQ", &LSQ, py::arg("range") = std::pair<int, int>{0, 255}, py::arg("name") = ""); +} +} // namespace Aidge diff --git a/python_binding/pybind_PTQ.cpp b/python_binding/pybind_PTQ.cpp index 1d9359c20b74943943104223a283edda8d0d74b5..3f9a470dec0b89f76dbcb4803972ad5fa9a1aa0a 100644 --- a/python_binding/pybind_PTQ.cpp +++ b/python_binding/pybind_PTQ.cpp @@ -14,9 +14,9 @@ #include <string> -#include "aidge/PTQ/Clip.hpp" -#include "aidge/PTQ/CLE.hpp" -#include "aidge/PTQ/PTQ.hpp" +#include "aidge/quantization/PTQ/Clip.hpp" +#include "aidge/quantization/PTQ/CLE.hpp" +#include "aidge/quantization/PTQ/PTQ.hpp" #include "aidge/hook/Hook.hpp" #include "aidge/graph/GraphView.hpp" @@ -24,7 +24,7 @@ namespace py = pybind11; namespace Aidge { -void init_QuantPTQ(py::module &m) { +void init_PTQ(py::module &m) { py::enum_<Clipping>(m, "Clipping", "Kind of clipping policy to apply during the activation quantization") .value("MAX", Clipping::MAX) @@ -32,7 +32,7 @@ void init_QuantPTQ(py::module &m) { .value("AA" , Clipping::AA) .value("KL" , Clipping::KL); - m.def("check_architecture", &checkArchitecture, py::arg("network"), + m.def("check_architecture", &checkArchitecture, py::arg("network"), R"mydelimiter( Determine whether an input GraphView can be quantized or not. :param network: The GraphView to be checked. @@ -41,15 +41,15 @@ void init_QuantPTQ(py::module &m) { :rtype: bool )mydelimiter"); - m.def("insert_scaling_nodes", &insertScalingNodes, py::arg("network"), + m.def("insert_scaling_nodes", &insertScalingNodes, py::arg("network"), R"mydelimiter( - Insert a scaling node after each affine node of the GraphView. + Insert a scaling node after each affine node of the GraphView. Also insert a scaling node in every purely residual branches. :param network: The GraphView containing the affine nodes. :type network: :py:class:`aidge_core.GraphView` )mydelimiter"); - m.def("normalize_parameters", &normalizeParameters, py::arg("network"), + m.def("normalize_parameters", &normalizeParameters, py::arg("network"), R"mydelimiter( Normalize the parameters of each parametrized node, so that they fit in the [-1:1] range. :param network: The GraphView containing the parametrized nodes. @@ -70,17 +70,17 @@ void init_QuantPTQ(py::module &m) { )mydelimiter"); m.def("normalize_activations", &normalizeActivations, py::arg("network"), py::arg("value_ranges"), - R"mydelimiter( + R"mydelimiter( Normalize the activations of each affine node so that they fit in the [-1:1] range. This is done by reconfiguring the scaling nodes, as well as rescaling the weights and biases tensors. :param network: The GraphView containing the affine nodes. :type network: :py:class:`aidge_core.GraphView` - :param value_ranges: The node output value ranges computed over the calibration dataset. + :param value_ranges: The node output value ranges computed over the calibration dataset. :type value_ranges: list of float. )mydelimiter"); - m.def("quantize_normalized_network", &quantizeNormalizedNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("apply_rounding"), py::arg("optimize_signs"), py::arg("verbose"), - R"mydelimiter( + m.def("quantize_normalized_network", &quantizeNormalizedNetwork, py::arg("network"), py::arg("nb_bits"), py::arg("apply_rounding"), py::arg("optimize_signs"), py::arg("verbose") = false, + R"mydelimiter( Quantize an already normalized (in term of parameters and activations) network. :param network: The GraphView to be quantized. :type network: :py:class:`aidge_core.GraphView` @@ -94,14 +94,14 @@ void init_QuantPTQ(py::module &m) { :type verbose: bool )mydelimiter"); - m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("clipping_mode") = "MAX", py::arg("apply_rounding") = true, py::arg("optimize_signs") = false, py::arg("single_shift") = false, py::arg("verbose") = false, - R"mydelimiter( + m.def("quantize_network", &quantizeNetwork ,py::arg("network"), py::arg("nb_bits"), py::arg("input_dataset"), py::arg("clipping_mode") = Clipping::MAX, py::arg("apply_rounding") = true, py::arg("optimize_signs") = false, py::arg("single_shift") = false, py::arg("verbose") = false, + R"mydelimiter( Main quantization routine. Performs every step of the quantization pipeline. :param network: The GraphView to be quantized. :type network: :py:class:`aidge_core.GraphView` :param nb_bits: The desired number of bits of the quantization. :type nb_bits: int - :param input_dataset: The input dataset on which the value ranges are computed. + :param input_dataset: The input dataset on which the value ranges are computed. :type input_dataset: list of :py:class:`aidge_core.Tensor` :param clipping_mode: Type of the clipping optimization. Can be either 'MAX', 'MSE', 'AA' or 'KL'. :type clipping_mode: string @@ -109,13 +109,13 @@ void init_QuantPTQ(py::module &m) { :type apply_rounding: bool :param optimize_signs: Whether to take account of the IO signs of the operators or not. :type optimize_signs: bool - :param single_shift: Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights. + :param single_shift: Whether to convert the scaling factors into powers of two. If true the approximations are compensated using the previous nodes weights. :type single_shift: bool :param verbose: Whether to print internal informations about the quantization process. :type verbose: bool )mydelimiter"); - m.def("compute_histograms", &computeHistograms, py::arg("value_ranges"), py::arg("nb_bins"), py::arg("network"), py::arg("input_dataset"), + m.def("compute_histograms", &computeHistograms, py::arg("value_ranges"), py::arg("nb_bins"), py::arg("network"), py::arg("input_dataset"), R"mydelimiter( Compute the histograms of the activations of each node contained in the map of the ranges (passed as argument). :param value_ranges: A map associating each considered node name to its corresponding output range. @@ -154,11 +154,11 @@ void init_QuantPTQ(py::module &m) { :rtype: float )mydelimiter"); - m.def("adjust_ranges", &adjustRanges, py::arg("clipping_mode"), py::arg("value_ranges"), py::arg("nb_bits"), py::arg("network"), py::arg("input_dataset"), py::arg("verbose"), + m.def("adjust_ranges", &adjustRanges, py::arg("clipping_mode"), py::arg("value_ranges"), py::arg("nb_bits"), py::arg("network"), py::arg("input_dataset"), py::arg("verbose") = false, R"mydelimiter( - Return a corrected map of the provided activation ranges. - To do so compute the optimal clipping values for every node and multiply the input ranges by those values. - The method used to compute the clippings can be eihter 'MSE', 'AA', 'KL' or 'MAX'. + Return a corrected map of the provided activation ranges. + To do so compute the optimal clipping values for every node and multiply the input ranges by those values. + The method used to compute the clippings can be eihter 'MSE', 'AA', 'KL' or 'MAX'. :param clipping_mode: The method used to compute the optimal clippings. :type clipping_mode: enum :param value_ranges: The map associating each affine node to its output range. @@ -176,30 +176,30 @@ void init_QuantPTQ(py::module &m) { )mydelimiter"); - m.def("compute_sign_map", &computeSignMap, py::arg("network"), py::arg("verbose"), + m.def("compute_sign_map", &computeSignMap, py::arg("network"), py::arg("verbose") = false, R"mydelimiter( - For each node, compute the sign of its input and output values. + For each node, compute the sign of its input and output values. The goal of the routine is to maximize the number of unsigned IOs in order to double the value resolution when possible. :param network: The GraphView to analyze. - :type network: :py:class:`aidge_core.GraphView` + :type network: :py:class:`aidge_core.GraphView` :param verbose: Whether to print the sign map or not. - :type verbose: bool + :type verbose: bool :return: A map associating a pair of signs to each node of the GraphView (a sign for the input and one for the output). - :rtype: dict + :rtype: dict )mydelimiter"); m.def("cross_layer_equalization", &crossLayerEqualization, py::arg("network"), py::arg("target_delta"), R"mydelimiter( - Equalize the ranges of the nodes parameters by proceding iteratively. + Equalize the ranges of the nodes parameters by proceding iteratively. Can only be applied to single branch networks (otherwise does not edit the graphView). :param network: The GraphView to process. - :type network: :py:class:`aidge_core.GraphView` + :type network: :py:class:`aidge_core.GraphView` :param target_delta: the stopping criterion (typical value : 0.01) - :type target_delta: float + :type target_delta: float )mydelimiter"); m.def("get_weight_ranges", &getWeightRanges, py::arg("network"), - R"mydelimiter( + R"mydelimiter( Compute the weight ranges of every affine nodes. Provided for debugging purposes. :param network: The GraphView containing the affine nodes. :type network: :py:class:`aidge_core.GraphView` @@ -207,23 +207,22 @@ void init_QuantPTQ(py::module &m) { :rtype: dict )mydelimiter"); - m.def("clear_biases", &clearBiases, py::arg("network"), + m.def("clear_biases", &clearBiases, py::arg("network"), R"mydelimiter( Clear the affine nodes biases. Provided form debugging purposes. :param network: The GraphView containing the affine nodes. :type network: :py:class:`aidge_core.GraphView` )mydelimiter"); - m.def("dev_ptq", &devPTQ, py::arg("network"), + m.def("dev_ptq", &devPTQ, py::arg("network"), R"mydelimiter( Developement and test routine. :param network: The GraphView under test. - :type network: :py:class:`aidge_core.GraphView` + :type network: :py:class:`aidge_core.GraphView` )mydelimiter"); -} -PYBIND11_MODULE(aidge_quantization, m) { - init_QuantPTQ(m); + m.def("prepare_network", &prepareNetwork, py::arg("network"), "prepare the network fo the PTQ"); + } } // namespace Aidge diff --git a/python_binding/pybind_QAT_FixedQ.cpp b/python_binding/pybind_QAT_FixedQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b247b6f52ef1c357131ef1e1803f57bda7bbc5de --- /dev/null +++ b/python_binding/pybind_QAT_FixedQ.cpp @@ -0,0 +1,33 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> +#include <pybind11/stl.h> + +#include "aidge/quantization/QAT/QAT_FixedQ.hpp" +#include "aidge/hook/Hook.hpp" +#include "aidge/graph/GraphView.hpp" + +namespace py = pybind11; + +namespace Aidge { + +void init_QAT_FixedQ(py::module &m) { + + auto mQuantFixedQ = m.def_submodule("fixedq"); + + mQuantFixedQ.def("insert_quantizers", &QuantFixedQ::insertQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("span")); + + mQuantFixedQ.def("insert_and_init_quantizers", &QuantFixedQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data"), py::arg("scale")); + + mQuantFixedQ.def("dev_qat", &QuantFixedQ::devQAT, py::arg("network")); +} +} // namespace Aidge diff --git a/python_binding/pybind_QAT_LSQ.cpp b/python_binding/pybind_QAT_LSQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6b392da633ae16ad668072360628f54aa3997f07 --- /dev/null +++ b/python_binding/pybind_QAT_LSQ.cpp @@ -0,0 +1,31 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> +#include <pybind11/stl.h> + +#include "aidge/quantization/QAT/QAT_LSQ.hpp" +#include "aidge/hook/Hook.hpp" +#include "aidge/graph/GraphView.hpp" + +namespace py = pybind11; + +namespace Aidge { + +void init_QAT_LSQ(py::module &m) { + + auto mQuantLSQ = m.def_submodule("lsq"); + + mQuantLSQ.def("insert_quantizers", &QuantLSQ::insertQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("step_size")); + + mQuantLSQ.def("insert_and_init_quantizers", &QuantLSQ::insertAndInitQuantizers, py::arg("network"), py::arg("nb_bits"), py::arg("calibration_data")); +} +} // namespace Aidge diff --git a/python_binding/pybind_Quantization.cpp b/python_binding/pybind_Quantization.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f998d8c242aa0d8f3215b1577ca4cfeba73fa265 --- /dev/null +++ b/python_binding/pybind_Quantization.cpp @@ -0,0 +1,48 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <pybind11/pybind11.h> +#include <pybind11/stl.h> + +#include "aidge/backend/QuantizationCPU.hpp" +//#include "aidge/backend/QuantizationCUDA.hpp" + +namespace py = pybind11; + +namespace Aidge +{ + +// operators +void init_FixedQ(py::module& m); +void init_LSQ(py::module& m); +void init_TanhClamp(py::module& m); +void init_DoReFa(py::module& m); + + +// quantization routines +void init_PTQ(py::module &m); +void init_QAT_FixedQ(py::module &m); +void init_QAT_LSQ(py::module &m); + + +PYBIND11_MODULE(aidge_quantization, m) +{ + init_FixedQ(m); + init_LSQ(m); + init_TanhClamp(m); + init_DoReFa(m); + + init_PTQ(m); + init_QAT_FixedQ(m); + init_QAT_LSQ(m); +} + +} // namespace Aidge diff --git a/setup.py b/setup.py index 16305afdfdfa5de2e328460d9e96c77eb96a9d98..7c93d282641ce59a2ba611f9abdcb28be8987269 100644 --- a/setup.py +++ b/setup.py @@ -1,37 +1,24 @@ #!/usr/bin/env python3 -""" Aidge - -#TODO To change -POC of the next framework named Aidge -""" - -DOCLINES = (__doc__ or '').split("\n") - import sys import os -# Python supported version checks -if sys.version_info[:2] < (3, 7): - raise RuntimeError("Python version >= 3.7 required.") - - -CLASSIFIERS = """\ -Development Status :: 2 - Pre-Alpha -""" - import shutil import pathlib -import subprocess import multiprocessing from math import ceil +import toml + from setuptools import setup, Extension -from setuptools import find_packages from setuptools.command.build_ext import build_ext + def get_project_name() -> str: - return open(pathlib.Path().absolute() / "project_name.txt", "r").read() + with open(pathlib.Path().absolute() / "pyproject.toml", "r") as file: + project_toml = toml.load(file) + return project_toml["project"]["name"] + def get_project_version() -> str: aidge_root = pathlib.Path().absolute() @@ -43,8 +30,8 @@ class CMakeExtension(Extension): def __init__(self, name): super().__init__(name, sources=[]) -class CMakeBuild(build_ext): +class CMakeBuild(build_ext): def run(self): # This lists the number of processors available on the machine # The compilation will use half of them @@ -62,17 +49,48 @@ class CMakeBuild(build_ext): os.chdir(str(build_temp)) - # Impose to use the executable of the python - # used to launch setup.py to setup PythonInterp - param_py = "-DPYTHON_EXECUTABLE=" + sys.executable - - install_path = os.path.join(sys.prefix, "lib", "libAidge") if "AIDGE_INSTALL" not in os.environ else os.environ["AIDGE_INSTALL"] + python_executable = sys.executable + print(f"python executable : {python_executable}") + + compile_type = ( + "Release" + if "AIDGE_PYTHON_BUILD_TYPE" not in os.environ + else os.environ["AIDGE_PYTHON_BUILD_TYPE"] + ) + + install_path = ( + os.path.join(sys.prefix, "lib", "libAidge") + if "AIDGE_INSTALL" not in os.environ + else os.environ["AIDGE_INSTALL"] + ) + + build_gen = ( + ["-G", os.environ["AIDGE_BUILD_GEN"]] + if "AIDGE_BUILD_GEN" in os.environ + else [] + ) + + self.spawn( + [ + "cmake", + *build_gen, + str(cwd), + "-DTEST=OFF", + f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}", + f"-DCMAKE_BUILD_TYPE={compile_type}", + "-DPYBIND=ON", + "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", + "-DCOVERAGE=OFF", + ] + ) - self.spawn(['cmake', str(cwd), param_py, '-DTEST=OFF', f'-DCMAKE_INSTALL_PREFIX:PATH={install_path}']) if not self.dry_run: - self.spawn(['make', 'all', 'install', '-j', max_jobs]) + self.spawn( + ["cmake", "--build", ".", "--config", compile_type, "-j", max_jobs] + ) + self.spawn(["cmake", "--install", ".", "--config", compile_type]) os.chdir(str(cwd)) - + aidge_package = build_lib / (get_project_name()) # Get "aidge core" package @@ -81,8 +99,8 @@ class CMakeBuild(build_ext): # Copy all shared object files from build_temp/lib to aidge_package for root, _, files in os.walk(build_temp.absolute()): for file in files: - if file.endswith('.so') and (root != str(aidge_package.absolute())): - currentFile=os.path.join(root, file) + if file.endswith(".so") and (root != str(aidge_package.absolute())): + currentFile = os.path.join(root, file) shutil.copy(currentFile, str(aidge_package.absolute())) # Copy version.txt in aidge_package @@ -90,23 +108,12 @@ class CMakeBuild(build_ext): shutil.copy("version.txt", str(aidge_package.absolute())) -if __name__ == '__main__': - +if __name__ == "__main__": setup( - name=get_project_name(), - version=get_project_version(), - python_requires='>=3.7', - description=DOCLINES[0], - long_description_content_type="text/markdown", - long_description="\n".join(DOCLINES[2:]), - classifiers=[c for c in CLASSIFIERS.split('\n') if c], - platforms=["Linux"], - packages=find_packages(where="."), include_package_data=True, ext_modules=[CMakeExtension(get_project_name())], cmdclass={ - 'build_ext': CMakeBuild, + "build_ext": CMakeBuild, }, zip_safe=False, - ) diff --git a/src/PTQ/CLE.cpp b/src/PTQ/CLE.cpp index df9b31edc614a8d220747d36832aff710f663bff..45b34fbad408a81785baefe578cdfac5f49a9627 100644 --- a/src/PTQ/CLE.cpp +++ b/src/PTQ/CLE.cpp @@ -84,11 +84,11 @@ void crossLayerEqualization(std::shared_ptr<GraphView> graphView, float targetDe do { maxRangeDelta = 0.0; - /* - std::cout << " ----- " << std::endl; - for (std::shared_ptr<Node> node : affineNodeVector) - std::cout << getTensorAbsoluteMax(getWeightTensor(node)) << std::endl; - */ + + //std::cout << " ----- " << std::endl; + //for (std::shared_ptr<Node> node : affineNodeVector) + // std::cout << getTensorAbsoluteMax(getWeightTensor(node)) << std::endl; + for (size_t i = 0; i < (affineNodeVector.size() - 1); i++) { std::shared_ptr<Node> n1 = affineNodeVector[i]; diff --git a/src/PTQ/Clip.cpp b/src/PTQ/Clip.cpp index 502685b12105c4746b0a509525a14cabce3a6eff..5ec5f869e2d48ec228040ab8de59142a374ddb09 100644 --- a/src/PTQ/Clip.cpp +++ b/src/PTQ/Clip.cpp @@ -53,7 +53,7 @@ std::map<std::string, std::vector<int>> computeHistograms(std::map<std::string, for (std::shared_ptr<Tensor> inputTensor : inputDataSet) { - Log::info(" IT (BIS) : {}", it++); + Log::debug(" IT (BIS) : {}", it++); // Inference ... @@ -225,4 +225,4 @@ std::map<std::string, float> adjustRanges(Clipping clippingMode, std::map<std::s return valueRanges; } -} \ No newline at end of file +} diff --git a/src/PTQ/PTQ.cpp b/src/PTQ/PTQ.cpp index 6409b7bc2a783b62d74abef63e7aade33ac771df..05a5b8cbac4433dc7c44ce2c9ad3f31b75c67b1d 100644 --- a/src/PTQ/PTQ.cpp +++ b/src/PTQ/PTQ.cpp @@ -69,7 +69,7 @@ bool isMerging(std::shared_ptr<Node> node) bool checkArchitecture(std::shared_ptr<GraphView> graphView) { - std::set<std::string> otherNodeTypes({"Flatten", "Softmax", "ReLU", "Producer"}); + std::set<std::string> otherNodeTypes({"Flatten", "Softmax", "BatchNorm2D", "ReLU", "Producer"}); for (std::shared_ptr<Node> node : graphView->getNodes()) { @@ -128,8 +128,7 @@ static float getTensorAbsoluteMax(std::shared_ptr <Tensor> tensor) return maxValue; } -static void removeMatchingNodes(std::vector<std::shared_ptr<Node>>& nodeVector, std::string nodeType) -{ +/* std::vector<std::shared_ptr<Node>>::iterator iter = nodeVector.begin(); while (iter != nodeVector.end()) { @@ -138,6 +137,17 @@ static void removeMatchingNodes(std::vector<std::shared_ptr<Node>>& nodeVector, else ++iter; } +*/ + +// TODO : pass nodeVector by reference ... +static std::vector<std::shared_ptr<Node>> removeMatchingNodes(std::vector<std::shared_ptr<Node>> nodeVector, std::string nodeType) +{ + std::vector<std::shared_ptr<Node>> remainingNodes; + for (std::shared_ptr<Node> node : nodeVector) + if (node->type() != nodeType) + remainingNodes.push_back(node); + + return remainingNodes; } static void fixScheduling(std::vector<std::shared_ptr<Node>>& nodeVector) { @@ -181,10 +191,14 @@ std::vector<std::shared_ptr<Node>> retrieveNodeVector(std::shared_ptr<GraphView> nodeVector = scheduler.getStaticScheduling(); + //std::cout << " RNV : NB OF NODES = " << nodeVector.size() << std::endl; + //for (auto node : nodeVector) + // std::cout << node->type() << std::endl; + fixScheduling(nodeVector); - removeMatchingNodes(nodeVector, "Producer"); + nodeVector = removeMatchingNodes(nodeVector, "Producer"); - if (verbose) + if (verbose) { Log::info("NB OF NODES = {}", nodeVector.size()); for (std::shared_ptr<Node> node : nodeVector) @@ -216,7 +230,7 @@ static void popSoftMax(std::shared_ptr<GraphView> graphView) } } -static void prepareNetwork(std::shared_ptr<GraphView> graphView) +void prepareNetwork(std::shared_ptr<GraphView> graphView) { removeFlatten(graphView); @@ -339,6 +353,18 @@ static std::shared_ptr<Node> getPreviousScalingNode(std::shared_ptr<Node> mergin return currNode; } +// XXX double check this ! +static bool nodeHasBias(std::shared_ptr<Node> node) +{ + if (node->getParents().size() == 3) + { + std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); + if (biasTensor) + return true; + } + return false; +} + void normalizeParameters(std::shared_ptr<GraphView> graphView) { // CREATE THE ACCUMULATED RATIO MAP /////////////////////////////////////// @@ -387,15 +413,12 @@ void normalizeParameters(std::shared_ptr<GraphView> graphView) accumulatedRatios[node->name()] = accumulatedRatios[prevNode->name()] * ratio; } - // Handle the bias ... - bool nodeHasBias = (node->getParents().size() == 3); - if (nodeHasBias) + // Handle the bias .. + + if (nodeHasBias(node)) { std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); - - // Check that a bias is present (as it is optional) - if (biasTensor) - rescaleTensor(biasTensor, accumulatedRatios[node->name()] ); + rescaleTensor(biasTensor, accumulatedRatios[node->name()] ); } } @@ -478,7 +501,7 @@ std::map<std::string, float> computeRanges(std::shared_ptr<GraphView> graphView, for (std::shared_ptr<Tensor> sample : inputDataSet) { - Log::info(" IT : {}", it++); + Log::debug(" IT : {}", it++); // Inference ... @@ -569,8 +592,9 @@ void normalizeActivations(std::shared_ptr<GraphView> graphView, std::map<std::st if (isAffine(prevNode)) { - bool prevNodeHasBias = (prevNode->getParents().size() == 3); - if (prevNodeHasBias) { + bool prevNodeHasBias = nodeHasBias(prevNode); + if (prevNodeHasBias) + { std::shared_ptr<Tensor> biasTensor = getBiasTensor(prevNode); rescaleTensor(biasTensor, 1.0 / prevScalingFactor); } @@ -722,14 +746,14 @@ std::map<std::string, std::pair<bool, bool>> computeSignMap(std::shared_ptr<Grap // SANITY CHECK (TEMPORARY) for (std::shared_ptr<Node> node : nodeVector) - if (node != firstNode) + { + for (std::shared_ptr<Node> child : node->getChildren()) { - for (std::shared_ptr<Node> parent : node->getParents()) - if (parent->type() != "Producer") - if (signMap[parent->name()].second != signMap[node->name()].first) - Log::error(" computeSignMap : link is not sane ! ({} -> {})", parent->name(), node->name()); + if (signMap[node->name()].second != signMap[child->name()].first) + Log::error(" computeSignMap : link is not sane ! ({} -> {})", node->name(), child->name()); } - + } + return signMap; } @@ -769,8 +793,7 @@ void quantizeNormalizedNetwork(std::shared_ptr<GraphView> graphView, std::uint8_ // Rescale the bias tensor - bool nodeHasBias = (node->getParents().size() == 3); - if (nodeHasBias) + if (nodeHasBias(node)) { bool inputIsUnsigned = signMap[node->name()].first; float rescaling = inputIsUnsigned ? unsignedMax * signedMax : signedMax * signedMax; @@ -919,8 +942,7 @@ void performSingleShiftApproximation(std::shared_ptr<GraphView> graphView, bool if (applyRounding) roundTensor(weightTensor); - bool nodeHasBias = (node->getParents().size() == 3); - if (nodeHasBias) + if (nodeHasBias(node)) { std::shared_ptr<Tensor> biasTensor = getBiasTensor(node); rescaleTensor(biasTensor, ratio); diff --git a/src/QAT/QAT_FixedQ.cpp b/src/QAT/QAT_FixedQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bfc88dc6c17c560a037eb71d2b0f13eafe7ef052 --- /dev/null +++ b/src/QAT/QAT_FixedQ.cpp @@ -0,0 +1,162 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/quantization/QAT/QAT_FixedQ.hpp" +#include "aidge/operator/FixedQ.hpp" + +#include "aidge/data/Tensor.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/scheduler/Scheduler.hpp" +#include "aidge/graph/Matching.hpp" + +namespace Aidge { + +void QuantFixedQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float span) +{ + const auto matches = SinglePassGraphMatching(graphView).match("(Conv#|FC#)"); + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + // INPUT QUANTIZERS INSERTION + + auto inputQuantizerName = linearNode->name() + "_fixedq_i"; // TODO : double check this, and use createUniqueName() + auto inputQuantizerNode = FixedQ(nbBits, span, false, inputQuantizerName); + + // Absorb the ReLU when possible ... + + bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ??? + + if (nodeHasParent) { + auto parentNode = linearNode->getParents()[0]; + if (parentNode->type() == "ReLU") { + auto inputQuantizerOp = std::static_pointer_cast<FixedQ_Op> (inputQuantizerNode->getOperator()); + inputQuantizerOp->isOutputUnsigned() = true; + graphView->replace({parentNode}, {}); + } + } + + // We need to handle the case where the linear node is the first one ... + + if (nodeHasParent) { + graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0); + } else { + inputQuantizerNode->addChild(graphView); + graphView->add(inputQuantizerNode); + } + + // PARAM QUANTIZERS INSERTION + + auto paramQuantizerName = linearNode->name() + "_fixedq_p"; // TODO : double check this, and use createUniqueName() + auto paramQuantizerNode = FixedQ(nbBits, span, false, paramQuantizerName); + graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0); + } +} + +static float getTensorStd(std::shared_ptr<Tensor> tensor) +{ + float acc = 0; + float * castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr()); + for(std::size_t i = 0; i < tensor->size(); i++) + acc += castedTensor[i] * castedTensor[i]; + acc /= static_cast<float> (tensor->size()); + return std::sqrt(acc); +} + +static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData) +{ + // Propagate the calibration tensor + + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + scheduler.forward(true, {calibrationData}); + + // Store the input tensor statistics + + std::map<std::string, float> inputStats; + for (auto node : graphView->getNodes()) + { + if (node->type() == "FC" || node->type() == "Conv") // TODO: use graph matching !!! + { + const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator()); + float inputStd = getTensorStd(op->getInput(0)); + inputStats.insert(std::make_pair(node->name(), inputStd)); + std::cout << node->name() << " -> " << inputStd << std::endl; + } + } + + return inputStats; +} + +static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView) +{ + std::map<std::string, float> paramStats; + for (auto node : graphView->getNodes()) + { + if (node->type() == "FC" || node->type() == "Conv") // TODO: use graph matching !!! + { + const auto op = std::static_pointer_cast<FixedQ_Op>(node->getOperator()); + float paramStd = getTensorStd(op->getInput(1)); + paramStats.insert(std::make_pair(node->name(), paramStd)); + std::cout << node->name() << " -> " << paramStd << std::endl; + } + } + + return paramStats; +} + +static void adjustQuantizersSpans(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats, float scale = 4.0f) +{ + const auto matches = SinglePassGraphMatching(graphView).match("(Conv#|FC#)"); + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + // Adjust the input quantizers spans + + auto inputQuantNode = linearNode->getParent(0); + auto inputQuantOp = std::static_pointer_cast<FixedQ_Op>(inputQuantNode->getOperator()); + inputQuantOp->span() = inputStats[linearNode->name()] * scale; + + // Adjust the param quantizers spans + + auto paramQuantNode = linearNode->getParent(1); + auto paramQuantOp = std::static_pointer_cast<FixedQ_Op>(paramQuantNode->getOperator()); + paramQuantOp->span() = paramStats[linearNode->name()] * scale; + } +} + +void QuantFixedQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData, float scale) +{ + // Collect the tensor statisics + auto inputStats = collectInputStats(graphView, calibrationData); + auto paramStats = collectParamStats(graphView); + + // Insert the quantizers + insertQuantizers(graphView, nbBits, 1.0); + + // Adjust the quantizers spans + adjustQuantizersSpans(graphView, inputStats, paramStats, scale); +} + +void QuantFixedQ::devQAT(std::shared_ptr<GraphView> graphView) +{ + SequentialScheduler scheduler(graphView); + scheduler.generateScheduling(); + auto s = scheduler.getStaticScheduling(); + for (std::shared_ptr<Node> node : s) + std::cout << " name : " << node->name() << std::endl; +} + +} \ No newline at end of file diff --git a/src/QAT/QAT_LSQ.cpp b/src/QAT/QAT_LSQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1f18ae93aac2f620ed32ca8bc42fa82850246e0a --- /dev/null +++ b/src/QAT/QAT_LSQ.cpp @@ -0,0 +1,177 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/quantization/QAT/QAT_LSQ.hpp" +#include "aidge/operator/LSQ.hpp" + +#include "aidge/data/Tensor.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/scheduler/Scheduler.hpp" +#include "aidge/graph/Matching.hpp" + +namespace Aidge { + +void QuantLSQ::insertQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, float stepSize) +{ + const auto matches = SinglePassGraphMatching(graphView).match("(Conv#|FC#)"); + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + std::pair<int, int> signedRange = {-std::pow(2, nbBits - 1), std::pow(2, nbBits - 1) - 1}; + std::pair<int, int> unsignedRange = {0, std::pow(2, nbBits) - 1}; + + // INPUT QUANTIZERS INSERTION + + auto inputQuantizerName = linearNode->name() + "_lsq_i"; // TODO : double check this, and use createUniqueName() + auto inputQuantizerNode = LSQ(signedRange, inputQuantizerName); + + // Set the step size + + auto inputStepSizeOp = inputQuantizerNode->getParent(1)->getOperator(); + inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); + + // Absorb the ReLU when possible ... + + bool nodeHasParent = static_cast<bool> (linearNode->getParents()[0]); // XXX is this safe ??? + + if (nodeHasParent) { + auto parentNode = linearNode->getParents()[0]; + if (parentNode->type() == "ReLU") { + auto inputQuantizerOp = std::static_pointer_cast<LSQ_Op> (inputQuantizerNode->getOperator()); + inputQuantizerOp->range() = unsignedRange; + graphView->replace({parentNode}, {}); + } + } + + // We need to handle the case where the linear node is the first one ... + + if (nodeHasParent) { + graphView->insertParent(linearNode, inputQuantizerNode, 0, 0, 0); + } else { + inputQuantizerNode->addChild(graphView); + graphView->add(inputQuantizerNode); + } + + // PARAM QUANTIZERS INSERTION + + auto paramQuantizerName = linearNode->name() + "_lsq_p"; // TODO : double check this, and use createUniqueName() + auto paramQuantizerNode = LSQ(signedRange, paramQuantizerName); + graphView->insertParent(linearNode, paramQuantizerNode, 1, 0, 0); + + // Set the step size + + auto paramStepSizeOp = paramQuantizerNode->getParent(1)->getOperator(); + paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); + + } +} + +static float getTensorAbsMean(std::shared_ptr<Tensor> tensor) +{ + float acc = 0; + float* castedTensor = static_cast<float *> (tensor->getImpl()->rawPtr()); + for(std::size_t i = 0; i < tensor->size(); i++) + acc += std::abs(castedTensor[i]); + acc /= static_cast<float> (tensor->size()); + return acc; +} + +static std::map<std::string, float> collectInputStats(std::shared_ptr<GraphView> graphView, std::shared_ptr<Tensor> calibrationData) +{ + // Propagate the calibration tensor + + SequentialScheduler scheduler(graphView); + scheduler.resetScheduling(); + scheduler.forward(true, {calibrationData}); + + // Store the input tensor statistics + + std::map<std::string, float> inputStats; + for (auto node : graphView->getNodes()) + { + if (node->type() == "FC" || node->type() == "Conv") // TODO: use graph matching !!! + { + const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); + float inputAbsMean = getTensorAbsMean(op->getInput(0)); + inputStats.insert(std::make_pair(node->name(), inputAbsMean)); + std::cout << node->name() << " -> " << inputAbsMean << std::endl; + } + } + + return inputStats; +} + +static std::map<std::string, float> collectParamStats(std::shared_ptr<GraphView> graphView) +{ + std::map<std::string, float> paramStats; + for (auto node : graphView->getNodes()) + { + if (node->type() == "FC" || node->type() == "Conv") // TODO: use graph matching !!! + { + const auto op = std::static_pointer_cast<LSQ_Op>(node->getOperator()); + float paramAbsMean = getTensorAbsMean(op->getInput(1)); + paramStats.insert(std::make_pair(node->name(), paramAbsMean)); + std::cout << node->name() << " -> " << paramAbsMean << std::endl; + } + } + + return paramStats; +} + +static void adjustQuantizersStepSizes(std::shared_ptr<GraphView> graphView, std::map<std::string, float> inputStats, std::map<std::string, float> paramStats) +{ + const auto matches = SinglePassGraphMatching(graphView).match("(Conv#|FC#)"); + + for (const auto& match : matches) + { + auto linearNode = match.graph->rootNode(); + + // INPUT QUANTIZERS STEP-SIZES + + auto inputQuantNode = linearNode->getParent(0); + auto inputQuantOp = std::static_pointer_cast<LSQ_Op>(inputQuantNode->getOperator()); + + float absMean = inputStats[linearNode->name()]; + float stepSize = 2.0f * (absMean / std::sqrt(inputQuantOp->range().second)); + + auto inputStepSizeOp = inputQuantNode->getParent(1)->getOperator(); + inputStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); + + // PARAM QUANTIZERS STEP-SIZES + + auto paramQuantNode = linearNode->getParent(1); + auto paramQuantOp = std::static_pointer_cast<LSQ_Op>(paramQuantNode->getOperator()); + + absMean = paramStats[linearNode->name()]; + stepSize = 2.0f * (absMean / std::sqrt(paramQuantOp->range().second)); + + auto paramStepSizeOp = paramQuantNode->getParent(1)->getOperator(); + paramStepSizeOp->setOutput(0, std::make_shared<Tensor>(Array1D<float, 1>({{stepSize}}))); + } +} + +void QuantLSQ::insertAndInitQuantizers(std::shared_ptr<GraphView> graphView, size_t nbBits, std::shared_ptr<Tensor> calibrationData) +{ + // Collect the tensor statisics + auto inputStats = collectInputStats(graphView, calibrationData); + auto paramStats = collectParamStats(graphView); + + // Insert the quantizers + insertQuantizers(graphView, nbBits, 1.0); + + // Adjust the quantizers step-sizes + adjustQuantizersStepSizes(graphView, inputStats, paramStats); +} + +} \ No newline at end of file diff --git a/src/backend/cpu/operator/FixedQImpl.cpp b/src/backend/cpu/operator/FixedQImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3ea44adaf9987be6e873ce4f067d2b6e9c7b1674 --- /dev/null +++ b/src/backend/cpu/operator/FixedQImpl.cpp @@ -0,0 +1,76 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <memory> +#include <vector> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/FixedQ.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/utils/ErrorHandling.hpp" + +#include "aidge/backend/cpu/operator/FixedQImpl.hpp" +#include "aidge/backend/cpu/operator/FixedQImpl_kernels.hpp" + +// template<> +// Aidge::Elts_t Aidge::FixedQImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) { +// // this implementation can be in-place +// return Elts_t::DataElts(0); +// } + +template<> +void Aidge::FixedQImpl_cpu::forward() +{ + const FixedQ_Op& op_ = dynamic_cast<const FixedQ_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + AIDGE_ASSERT(in0, "missing input #0"); + + // Find the correct kernel type + const auto impl = Registrar<FixedQImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward( + op_.nbBits(), + op_.span(), + op_.isOutputUnsigned(), + in0->size(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0)) + ); +} + + +template<> +void Aidge::FixedQImpl_cpu::backward() +{ + const FixedQ_Op& op_ = dynamic_cast<const FixedQ_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); + std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); + AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); + + // Find the correct kernel type + const auto impl = Registrar<FixedQImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.backward( + op_.nbBits(), + op_.span(), + op_.isOutputUnsigned(), + gra_int0->size(), + getCPUPtr(in0), + getCPUPtr(gra_out0), + getCPUPtr(gra_int0) + ); +} diff --git a/src/backend/cpu/operator/LSQImpl.cpp b/src/backend/cpu/operator/LSQImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..32b494376d9e4e8ea2923fe7770ffddb821eeb11 --- /dev/null +++ b/src/backend/cpu/operator/LSQImpl.cpp @@ -0,0 +1,63 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <memory> +#include <vector> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/LSQ.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/utils/ErrorHandling.hpp" + +#include "aidge/backend/cpu/operator/LSQImpl.hpp" +#include "aidge/backend/cpu/operator/LSQImpl_kernels.hpp" + +template<> +void Aidge::LSQImpl_cpu::forward() { + const LSQ_Op& op_ = dynamic_cast<const LSQ_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> in1 = op_.getInput(1); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + + // Find the correct kernel type + auto impl = Registrar<LSQImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward(in0->size(), + op_.range(), + getCPUPtr(in0), + getCPUPtr(in1), + getCPUPtr(out0)); +} +template<> +void Aidge::LSQImpl_cpu::backward() { + const LSQ_Op& op_ = dynamic_cast<const LSQ_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> in1 = op_.getInput(1); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); + std::shared_ptr<Tensor> gra_int1 = op_.getInput(1)->grad(); + std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); + + // Find the correct kernel type + auto impl = Registrar<LSQImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.backward( + gra_int0->size(), + op_.range(), + getCPUPtr(in0), + getCPUPtr(in1), + getCPUPtr(gra_out0), + getCPUPtr(gra_int0), + getCPUPtr(gra_int1)); +} diff --git a/src/backend/cpu/operator/SAT/DoReFaImpl.cpp b/src/backend/cpu/operator/SAT/DoReFaImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..58aec791f575b50b46aeea956bfc50def18a6ce4 --- /dev/null +++ b/src/backend/cpu/operator/SAT/DoReFaImpl.cpp @@ -0,0 +1,65 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <memory> +#include <vector> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/SAT/DoReFa.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/utils/ErrorHandling.hpp" + +#include "aidge/backend/cpu/operator/SAT/DoReFaImpl.hpp" +#include "aidge/backend/cpu/operator/SAT/DoReFaImpl_kernels.hpp" + +template<> +void Aidge::DoReFaImpl_cpu::forward() { + + const DoReFa_Op& op_ = dynamic_cast<const DoReFa_Op&>(mOp); + + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + + // Find the correct kernel type + auto impl = Registrar<DoReFaImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward( + in0->size(), + op_.range(), + op_.mode(), + getCPUPtr(in0), + getCPUPtr(out0)); +} + +template<> +void Aidge::DoReFaImpl_cpu::backward() { + + const DoReFa_Op& op_ = dynamic_cast<const DoReFa_Op&>(mOp); + + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); + std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); + + // Find the correct kernel type + auto impl = Registrar<DoReFaImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.backward( + gra_int0->size(), + op_.range(), + op_.mode(), + getCPUPtr(in0), + getCPUPtr(gra_out0), + getCPUPtr(gra_int0)); +} diff --git a/src/backend/cpu/operator/SAT/TanhClampImpl.cpp b/src/backend/cpu/operator/SAT/TanhClampImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f908715478572c11e2b30654334e620e965e2d32 --- /dev/null +++ b/src/backend/cpu/operator/SAT/TanhClampImpl.cpp @@ -0,0 +1,62 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <memory> +#include <vector> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/SAT/TanhClamp.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/utils/ErrorHandling.hpp" + +#include "aidge/backend/cpu/operator/SAT/TanhClampImpl.hpp" +#include "aidge/backend/cpu/operator/SAT/TanhClampImpl_kernels.hpp" + +template<> +void Aidge::TanhClampImpl_cpu::forward() { + + const TanhClamp_Op& op_ = dynamic_cast<const TanhClamp_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + std::shared_ptr<Tensor> scaling = op_.getOutput(1); + + // Find the correct kernel type + auto impl = Registrar<TanhClampImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward( + in0->size(), + getCPUPtr(in0), + getCPUPtr(scaling), + getCPUPtr(out0)); +} + +template<> +void Aidge::TanhClampImpl_cpu::backward() { + const TanhClamp_Op& op_ = dynamic_cast<const TanhClamp_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + std::shared_ptr<Tensor> scaling = op_.getOutput(1); + std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); + std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); + + // Find the correct kernel type + auto impl = Registrar<TanhClampImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.backward( + gra_int0->size(), + getCPUPtr(in0), + getCPUPtr(scaling), + getCPUPtr(gra_out0), + getCPUPtr(gra_int0)); +} diff --git a/src/operator/FixedQ.cpp b/src/operator/FixedQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..879174032bfcf5b2958b0950d0ed7410ba83331c --- /dev/null +++ b/src/operator/FixedQ.cpp @@ -0,0 +1,30 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/operator/FixedQ.hpp" + +#include <memory> +#include <string> + +#include "aidge/data/Tensor.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +const std::string Aidge::FixedQ_Op::Type = "FixedQ"; + +std::set<std::string> Aidge::FixedQ_Op::getAvailableBackends() const { + return Registrar<FixedQ_Op>::getKeys(); +} + +void Aidge::FixedQ_Op::setBackend(const std::string& name, DeviceIdx_t device) { + SET_IMPL_MACRO(FixedQ_Op, *this, name); + mOutputs[0]->setBackend(name, device); +} diff --git a/src/operator/LSQ.cpp b/src/operator/LSQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1fd79365b0b70e8c4fa9f43df397e38f7a75e685 --- /dev/null +++ b/src/operator/LSQ.cpp @@ -0,0 +1,49 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/operator/LSQ.hpp" + +#include <memory> +#include <string> + +#include "aidge/data/Tensor.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +const std::string Aidge::LSQ_Op::Type = "LSQ"; + +bool Aidge::LSQ_Op::forwardDims(bool /*allowDataDependency*/) { + + // TODO : check if the step size is a scalar ! + if (inputsAssociated()) { + const auto inputsDims = getInput(0)->dims(); + mOutputs[0]->resize(inputsDims); + return true; + } + return false; +} + +std::set<std::string> Aidge::LSQ_Op::getAvailableBackends() const { + return Registrar<LSQ_Op>::getKeys(); +} + +void Aidge::LSQ_Op::setBackend(const std::string& name, DeviceIdx_t device) { + SET_IMPL_MACRO(LSQ_Op, *this, name); + mOutputs[0]->setBackend(name, device); + + // By default, automatically set backend for alphas inputs + if (getInput(1)) { + getInput(1)->setBackend(name, device); + } + else { + Log::notice("LSQ_Op::setBackend(): could not set backend for step_size input, because input is not connected"); + } +} diff --git a/src/operator/SAT/DoReFa.cpp b/src/operator/SAT/DoReFa.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b6124bad0e5f04c8e22e2d16c48dd4fe5de7945a --- /dev/null +++ b/src/operator/SAT/DoReFa.cpp @@ -0,0 +1,29 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/operator/SAT/DoReFa.hpp" + +#include <memory> +#include <string> + +#include "aidge/data/Tensor.hpp" +#include "aidge/utils/Types.h" + +const std::string Aidge::DoReFa_Op::Type = "DoReFa"; + +std::set<std::string> Aidge::DoReFa_Op::getAvailableBackends() const { + return Registrar<DoReFa_Op>::getKeys(); +} + +void Aidge::DoReFa_Op::setBackend(const std::string& name, DeviceIdx_t device) { + SET_IMPL_MACRO(DoReFa_Op, *this, name); + mOutputs[0]->setBackend(name, device); +} \ No newline at end of file diff --git a/src/operator/SAT/TanhClamp.cpp b/src/operator/SAT/TanhClamp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2b8d63d7136c45589cba92018d2ecafe17d54e4e --- /dev/null +++ b/src/operator/SAT/TanhClamp.cpp @@ -0,0 +1,44 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/operator/SAT/TanhClamp.hpp" + +#include <memory> +#include <string> + +#include "aidge/data/Tensor.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +const std::string Aidge::TanhClamp_Op::Type = "TanhClamp"; + +bool Aidge::TanhClamp_Op::forwardDims(bool /*allowDataDependency*/) { + + if (inputsAssociated()) { + const auto inputsDims = getInput(0)->dims(); + mOutputs[0]->resize(inputsDims); + mOutputs[1]->resize({1}); + return true; + } + return false; +} + +std::set<std::string> Aidge::TanhClamp_Op::getAvailableBackends() const { + return Registrar<TanhClamp_Op>::getKeys(); +} + +void Aidge::TanhClamp_Op::setBackend(const std::string& name, DeviceIdx_t device) { + SET_IMPL_MACRO(TanhClamp_Op, *this, name); + mOutputs[0]->setBackend(name, device); + + // Scale output is always on CPU for now + mOutputs[1]->setBackend("cpu"); // XXX why ? +} \ No newline at end of file diff --git a/version.txt b/version.txt index 8294c184368c0ec9f84fbcc80c6b36326940c770..5faa42c8a89ea0f5ab797259dce62bb190eb28c6 100644 --- a/version.txt +++ b/version.txt @@ -1 +1,2 @@ -0.1.2 \ No newline at end of file +0.2.0 +