diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..10cbf7d48f97b9587aa40af98252f1feaa874eac --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,143 @@ +cmake_minimum_required(VERSION 3.15) + + +file(READ "${CMAKE_SOURCE_DIR}/version.txt" version) +file(READ "${CMAKE_SOURCE_DIR}/project_name.txt" project) + +message(STATUS "Project name: ${project}") +message(STATUS "Project version: ${version}") + +# Note : project name is {project} and python module name is also {project} +set(module_name _${project}) # target name + +project(${project}) + +############################################## +# Import utils CMakeLists +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") +#include(PybindModuleCreation) + +############################################## +# Define options +option(PYBIND "python binding" ON) +option(WERROR "Warning as error" OFF) +option(TEST "Enable tests" ON) +option(COVERAGE "Enable coverage" OFF) + +############################################## +# Import utils CMakeLists +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") +#include(PybindModuleCreation) + +if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) + Include(CodeCoverage) +endif() + +############################################## +# Find system dependencies +find_package(aidge_core REQUIRED) +find_package(aidge_backend_cpu REQUIRED) + +############################################## +# Create target and set properties + +file(GLOB_RECURSE src_files "src/*.cpp") +file(GLOB_RECURSE inc_files "include/*.hpp") + +add_library(${module_name} ${src_files} ${inc_files}) +target_link_libraries(${module_name} + PUBLIC + _aidge_core # _ is added because we link the target not the project + _aidge_backend_cpu +) +#Set target properties +set_property(TARGET ${module_name} PROPERTY POSITION_INDEPENDENT_CODE ON) + +target_include_directories(${module_name} + PUBLIC + $<INSTALL_INTERFACE:include> + $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src +) + +# PYTHON BINDING +if (PYBIND) + generate_python_binding(${project} ${module_name}) + + # Handles Python + pybind11 headers dependencies + target_link_libraries(${module_name} + PUBLIC + pybind11::pybind11 + PRIVATE + Python::Python + ) +endif() + +target_compile_features(${module_name} PRIVATE cxx_std_14) + +target_compile_options(${module_name} PRIVATE + $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>: + -Wall -Wextra -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow $<$<BOOL:${WERROR}>:-Werror>>) +target_compile_options(${module_name} PRIVATE + $<$<CXX_COMPILER_ID:MSVC>: + /W4>) + +if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) + append_coverage_compiler_flags() +endif() + +############################################## +# Installation instructions + +include(GNUInstallDirs) +set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/${project}) + +install(TARGETS ${module_name} EXPORT ${project}-targets + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) + +install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + +#Export the targets to a script + +install(EXPORT ${project}-targets + FILE "${project}-targets.cmake" + DESTINATION ${INSTALL_CONFIGDIR} + COMPONENT ${module_name} +) + +#Create a ConfigVersion.cmake file +include(CMakePackageConfigHelpers) +write_basic_package_version_file( + "${CMAKE_CURRENT_BINARY_DIR}/${project}-config-version.cmake" + VERSION ${version} + COMPATIBILITY AnyNewerVersion +) + +configure_package_config_file("${project}-config.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/${project}-config.cmake" + INSTALL_DESTINATION ${INSTALL_CONFIGDIR} +) + +#Install the config, configversion and custom find modules +install(FILES + "${CMAKE_CURRENT_BINARY_DIR}/${project}-config.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/${project}-config-version.cmake" + DESTINATION ${INSTALL_CONFIGDIR} +) + +############################################## +## Exporting from the build tree +export(EXPORT ${project}-targets + FILE "${CMAKE_CURRENT_BINARY_DIR}/${project}-targets.cmake") + + +############################################## +## Add test +if(TEST) + enable_testing() + add_subdirectory(unit_tests) +endif() diff --git a/README.md b/README.md index cb2edadb488365a331888f10c84505aff49885c7..9eedf04ae0fdf1b32e8f72d00d83e67c9cb912af 100644 --- a/README.md +++ b/README.md @@ -1,69 +1,45 @@ -# Aidge +# Aidge module quantization -[](LICENSE) [](https://eclipse-aidge.readthedocs.io/en/latest/?badge=latest) +This is C++ module for Post-training quantization only for the moment. +Python binding and Quantization-aware training will be added later. -The Eclipse Aidge platform is a comprehensive solution for fast and accurate Deep Neural Network (DNN) simulation and full and automated DNN-based applications building. The platform integrates database construction, data pre-processing, network building, benchmarking and hardware export to various targets. It is particularly useful for DNN design and exploration, allowing simple and fast prototyping of DNN with different topologies. It is possible to define and learn multiple network topology variations and compare the performances (in terms of recognition rate and computationnal cost) automatically. Export hardware targets include CPU, DSP and GPU with OpenMP, OpenCL, Cuda, cuDNN and TensorRT programming models as well as custom hardware IP code generation with High-Level Synthesis for FPGA and dedicated configurable DNN accelerator IP. +## General steps for PTQ developement +1) Simple case : conv/fc cell only, no bias +2) Simple case with the bias : conv/fc cell only, bias +2) More complex case : provide all for usual types of networks : mobilenet/resnet. -| Module | Status | Coverage | -| -------- | ------- | ------- | -| [aidge_core](https://gitlab.eclipse.org/eclipse/aidge/aidge_core) |  |   | -| [aidge_backend_cpu](https://gitlab.eclipse.org/eclipse/aidge/aidge_backend_cpu) |  |   | -| [aidge_onnx](https://gitlab.eclipse.org/eclipse/aidge/aidge_onnx) |  |  | +## Technical part +We separate normalization of the network, which is the step0 for PTQ, and quantization part. This fact will help to debug different networks in the future. -## Installation +The methods themselves are based on the ones written in [DeepNetQuantization class.](https://github.com/CEA-LIST/N2D2/blob/master/src/DeepNetQuantization.cpp) -### Build on Linux using pip -Each Aidge module are built independantly from one another. -To install Aidge on Linux using pip, follow those steps : -1. Create your python environnement with python >= 3.7. For example using virtualenv : -``` bash -virtualenv -p python3.8 py_env_aidge -source py_env_aidge/bin/activate -``` +## Weights (and bias) quantization -2. Set the desired install path : -``` bash -export AIDGE_INSTALL = '<path_to_aidge>/install' -``` +1) Normalization between -1 and 1; +2) Quantization to the correct range with scaling factor (2^nbBits - 1) - 1; -3. First build aidge_core : -``` bash -cd aidge/aidge_core/ -pip install . -v -``` +## Activations quantization -4. Then build other modules (for example aidge_backend_cpu, aidge_onnx) : -``` bash -cd aidge/aidge_backend_cpu -pip install . -v -``` +1) Normalization between -1 and 1 (or 0 and 1); +2) Quantization to the correct range with the scaling factor; -## Docker Image +Both for normalization and quantization we need the cell theshold : the max value of the cell outputs. +This the most naive approach, which can be replaced with more sophisticated methods to find the optimal (instead of the maximum) value to cut on. -Feel free to use one of the Dockerfiles available in the [`docker`](docker) folder. +To get the correct value of the activation scaling we need to take into account 3 factors: +1) Bias scaling +2) Scaling factor of the current cell +3) Parent scaling factor -To build the image, run where your DockerFile is -``` -docker build --pull --rm -f "name_of_os.Dockerfile" -t aidge:myenv . -``` +## Additional methods -Then to run a container, run -``` -docker run --name mycontainer aidge:myenv -``` +There are additional methods called during the [PTQ method](https://github.com/CEA-LIST/N2D2/blob/master/src/utils/Helper.cpp#L640): -## Contributing +1) [remove dropout](https://github.com/CEA-LIST/N2D2/blob/master/src/utils/Helper.cpp#L646) +2) [batch normalization fusion with conv](https://github.com/CEA-LIST/N2D2/blob/master/src/utils/Helper.cpp#L649) +3) [fuse padding](https://github.com/CEA-LIST/N2D2/blob/master/src/utils/Helper.cpp#L694) +4) [cross-layer equalization](https://github.com/CEA-LIST/N2D2/blob/master/src/utils/Helper.cpp#L699) -If you would like to contribute to the Aidge project, we’re happy to have your help! -Everyone is welcome to contribute code via merge requests, to file issues on Gitlab, -to help people asking for help, fix bugs that people have filed, -to add to our documentation, or to help out in any other way. -We grant commit access (which includes full rights to the issue database, such as being able to edit labels) -to people who have gained our trust and demonstrated a commitment to Aidge. - -## License - -Aidge has a Eclipse Public License 2.0, as found in the [LICENSE](LICENSE). \ No newline at end of file diff --git a/aidge_quantization-config.cmake.in b/aidge_quantization-config.cmake.in new file mode 100644 index 0000000000000000000000000000000000000000..0361c06c6aa9bdcb9a2380d3f1c0b7996e77cd84 --- /dev/null +++ b/aidge_quantization-config.cmake.in @@ -0,0 +1,5 @@ +@PACKAGE_INIT@ + +include(${CMAKE_CURRENT_LIST_DIR}/aidge_quantization-config-version.cmake) + +include(${CMAKE_CURRENT_LIST_DIR}/aidge_quantization-targets.cmake) diff --git a/include/aidge/QuantPTQ.hpp b/include/aidge/QuantPTQ.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2967f9b0881a52400756a1778e7a98490f31f570 --- /dev/null +++ b/include/aidge/QuantPTQ.hpp @@ -0,0 +1,42 @@ +///\file QuantPTQ.h +///\brief QuantPTQ __insert lib module brief description__ +///\version file 1.0.0 +///\date Creation 14 February 2023 +///\date 14 February 2023 +///\par ChangeLog +///\par +/// v1.0.0, 14 February 2023<br> +/// - Initial version. +///\author ik243221 +///\copyright +/// Copyright (c) 2023 CEA, LIST, Embedded Artificial Intelligence Laboratory. All +/// rights reserved. + +#ifndef QuantPTQ_H_ +#define QuantPTQ_H_ + + +//#include <aidge/aidge.hpp> +#include "aidge/backend/cpu.hpp" + +#include <numeric> +#include <vector> +#include <cmath> +#include <cstdint> +#include <unordered_map> + +using namespace Aidge; + +namespace Aidge_HELPER{ + + float getCellThreshold(std::shared_ptr<Node> node); + float getMaxParentsScaling(std::shared_ptr<Node> node); + void rescaleParentsToScaling(std::shared_ptr<Node> node, std::unordered_map<std::string, long double>& scalingForCells, long double scaling); + + long double quantizeFreeParams(std::shared_ptr<Node> node, std::size_t nbBits, std::unordered_map<std::string, long double> biasScalings, bool verbose); + long double quantizeActivation(std::shared_ptr<Node> node, std::size_t nbBits, std::unordered_map<std::string, long double> biasScalings, std::unordered_map<std::string, long double> activationScalings, bool verbose); + void quantizeNetwork(std::vector<std::shared_ptr<Node>> orderedGraphView, std::size_t nbBits, bool verbose); + //void quantizeNetwork(std::shared_ptr<GraphView> graphView, std::size_t nbBits, bool verbose); +} + +#endif /* QuantPTQ_H_ */ diff --git a/project_name.txt b/project_name.txt new file mode 100644 index 0000000000000000000000000000000000000000..5b95ae655875a9066bc5294e08a2efa7dbc883f5 --- /dev/null +++ b/project_name.txt @@ -0,0 +1 @@ +aidge_quantization \ No newline at end of file diff --git a/src/QuantPTQ.cpp b/src/QuantPTQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1a32faa3c703e0668250c0ebde5cede3cb4f1bd7 --- /dev/null +++ b/src/QuantPTQ.cpp @@ -0,0 +1,528 @@ +///\file QuantPTQ.h +///\brief QuantPTQ __insert lib module brief description__ +///\version file 1.0.0 +///\date Creation 14 February 2023 +///\date 14 February 2023 +///\par ChangeLog +///\par +/// v1.0.0, 14 February 2023<br> +/// - Initial version. +///\author ik243221 +///\copyright +/// Copyright (c) 2023 CEA, LIST, Embedded Artificial Intelligence Laboratory. All +/// rights reserved. + + +#include <numeric> +#include <vector> +#include <cmath> +#include <cstdint> +#include <unordered_map> + +#include "aidge/QuantPTQ.hpp" +using namespace Aidge; + +#include "aidge/hook/outputRange.hpp" +#include "aidge/operator/Scaling.hpp" +#include "aidge/graph/GraphView.hpp" + +namespace Aidge_HELPER{ + +//getMaxParentsScaling +long double getMaxParentsScaling(std::shared_ptr<Node> node, + std::unordered_map<std::string, long double>& scalingForCells){ + long double maxParentsScaling = 0.0; + //std::cout << " inside getMaxParentsScaling " << std::endl; + const std::vector<std::shared_ptr<Node>> parentsNodes = node->getParents(); + + //std::cout << "after parentsNodes " << std::endl; + //std::cout << "size of parents nodes = " << parentsNodes.size() << std::endl; + + for(const std::shared_ptr<Node>& parentNode: parentsNodes) { + //std::cout << " parent node type = " << parentNode->type() << std::endl; + //std::cout << " parent node name = " << parentNode->name() << std::endl; + const long double parentScaling = (parentNode->type()=="Producer" || scalingForCells.empty())?1.0:scalingForCells.at(parentNode->name()); + //std::cout << " parentScaling = " << parentScaling << std::endl; + maxParentsScaling = std::max(maxParentsScaling, parentScaling); + //std::cout << " maxParentsScaling = " << maxParentsScaling << std::endl; + assert(parentScaling > 0.0); + } + + return maxParentsScaling; +} + + //rescaleParentsToScaling +void rescaleParentsToScaling(std::shared_ptr<Node> node, + std::unordered_map<std::string, long double>& scalingForCells, + long double scaling) +{ + //std::cout << " inside rescaleParentsToScaling " << std::endl; + + // Get a copy, the loop modify the graph + const std::vector<std::shared_ptr<Node>> parentsNodes = node->getParents(); + + for(const std::shared_ptr<Node>& parentNode: parentsNodes) { + + //std::cout << " parent node type = " << parentNode->type() << std::endl; + //std::cout << " parent node name = " << parentNode->name() << std::endl; + const long double parentScaling = (parentNode->type()=="Producer" || scalingForCells.empty())?1.0:scalingForCells.at(parentNode->name()); + + if(parentScaling == scaling) { + continue; + } + + //std::cout << "parentScaling = " << parentScaling << std::endl; + //std::cout << "scaling = " << scaling << std::endl; + + assert(parentScaling < scaling); + + //IF parentScaling is smaller than scaling + //create scaling operator and add it to the graphView + /* + std::shared_ptr<Node> scaling_op = Scaling(scaling, parentCell->name()+"rescale_branch"); + scaling_op->getOperator()->setDatatype(DataType::Float32); + scaling_op->getOperator()->setBackend("cpu"); + */ + + //TODO : get all graphviews node is attached to and insert a scaling cell before it. + /* + void Aidge::GraphView::insert(Node & newNode, Node & inNode, + std::initializer_list<Node> outNodes, + IOIndex_t tensorIdx) { + printf("Not implemented yet.\n"); + } + */ + //insert is not implemented yet...have to be smth like + //graphView->insert(scaling_op, parentCell, node); + + //N2D2 version + /* + auto scalingCell = Registrar<ScalingCell>::create<Float_T>(getCellModelType(*parentCell)) + (mDeepNet, + mDeepNet.generateNewCellName(parentCell->getName() + "_rescale_branch"), + parentCell->getNbOutputs(), + Scaling::floatingPointScaling( + std::vector<Float_T>(parentCell->getNbOutputs(), + parentScaling/scaling), false, std::vector<Float_T>(0.0f)) + ); + + mDeepNet.addCellBetween(scalingCell, parentCell, cell); + */ + } +} + +//N2D2 version +/* +std::unordered_map<std::string, long double> N2D2::DeepNetQuantization::quantizeFreeParemeters(std::size_t nbBits) { +#ifdef VERBOSE_QUANT + std::cout << " Quantizing free parameters:" << std::endl; +#endif + + std::unordered_map<std::string, long double> biasScalings; + + std::vector<std::vector<std::string>> layers = mDeepNet.getLayers(); + for (auto itLayer = layers.begin() + 1; itLayer != layers.end(); ++itLayer) { + for (auto itCell = itLayer->begin(); itCell != itLayer->end(); ++itCell) { + std::shared_ptr<Cell> cell = mDeepNet.getCell(*itCell); + if(!cell) { + throw std::runtime_error("Invalid cell."); + } + + long double biasScaling = getMaxParentsScaling(cell, biasScalings); + rescaleParentsToScaling(cell, biasScalings, biasScaling); + + + const long double wQuantScaling = std::pow(2, nbBits - 1) - 1; + const long double bQuantScaling = DeepNetExport::isCellInputsUnsigned(*cell, mDeepNet)? + wQuantScaling*(std::pow(2, nbBits) - 1): + wQuantScaling*(std::pow(2, nbBits - 1) - 1); + + + const std::pair<Float_T, Float_T> wMinMax + = cell->getFreeParametersRange(Cell::Multiplicative); + const Float_T wScalingCell = Utils::max_abs(wMinMax.first, wMinMax.second); + if(wScalingCell != 0.0) { + cell->processFreeParameters([&](Float_T w) { return w*(wQuantScaling/wScalingCell); }, + Cell::Multiplicative); + + biasScaling *= wScalingCell; + } + + cell->processFreeParameters([&](Float_T b) { return b*(bQuantScaling/biasScaling); }, + Cell::Additive); + biasScalings[cell->getName()] = biasScaling; + +#ifdef VERBOSE_QUANT + std::cout << " - " << cell->getName() << ": " << biasScaling + << std::endl; +#endif + } + } + + fuseScalingCells(); + + return biasScalings; +} +*/ + +long double quantizeFreeParams(std::shared_ptr<Node> node, std::size_t nbBits, std::unordered_map<std::string, long double> biasScalings, bool verbose){ + + + long double biasScaling = getMaxParentsScaling(node, biasScalings); + rescaleParentsToScaling(node, biasScalings, biasScaling); + + //weights are input 1 + std::shared_ptr<Tensor> weight_tensor = node->getOperator()->getInput(1); + if(verbose){ + printf("Weight init :\n"); + weight_tensor->print(); + } + + float max_value = 0.; + float * casted_weight_tensor = static_cast<float *>(weight_tensor->getImpl()->rawPtr()); + for(size_t i = 0; i<weight_tensor->size(); i++) { + if(abs(casted_weight_tensor[i]) > max_value){ + max_value = abs(casted_weight_tensor[i]); + } + } + + const long double wQuantScaling = std::pow(2, nbBits - 1) - 1; + //find out if the input to the cell is signed/unsigned, n2d2 example : + //return cellFrame.getOutputsRange().first >= 0.0; + /* + virtual std::pair<double, double> getOutputsRange() const { + const double inf = std::numeric_limits<double>::infinity(); + return mActivation?mActivation->getOutputRange():std::make_pair(-inf, inf); + } + */ + bool unsigned_input = true; + const long double bQuantScaling = unsigned_input?wQuantScaling*(std::pow(2, nbBits) - 1): + wQuantScaling*(std::pow(2, nbBits - 1) - 1); + + //bias also has to be scaled by the max of weights + biasScaling *= max_value; + + //new empty tensor + std::shared_ptr<Tensor> quant_weight_tensor = std::static_pointer_cast<Tensor>(weight_tensor); + + for(size_t i = 0; i<weight_tensor->size(); i++) { + static_cast<float *>(quant_weight_tensor->getImpl()->rawPtr())[i] = std::round(static_cast<float *>(weight_tensor->getImpl()->rawPtr())[i]*wQuantScaling/max_value); + } + + if(verbose){ + printf("Weight quantized :\n"); + quant_weight_tensor->print(); + } + + std::shared_ptr<Tensor> bias_tensor = node->getOperator()->getInput(2); + if(verbose){ + printf("Bias init :\n"); + bias_tensor->print(); + } + + std::shared_ptr<Tensor> quant_bias_tensor = std::static_pointer_cast<Tensor>(bias_tensor); + for(size_t i = 0; i<bias_tensor->size(); i++) { + static_cast<float *>(quant_bias_tensor->getImpl()->rawPtr())[i] = static_cast<float *>(bias_tensor->getImpl()->rawPtr())[i]*bQuantScaling/biasScaling; + } + + if(verbose){ + printf("Bias quantized :\n"); + quant_bias_tensor->print(); + } + + //replace weights in the node + weight_tensor->getImpl()->setRawPtr(quant_weight_tensor->getImpl()->rawPtr()); + //replace bias in the node + bias_tensor->getImpl()->setRawPtr(quant_bias_tensor->getImpl()->rawPtr()); + + return biasScaling; +} + +//N2D2 version with Histogram +/* + double N2D2::DeepNetQuantization::getCellThreshold(const std::string& cellName, + const std::unordered_map<std::string, Histogram>& outputsHistogram, + const std::unordered_map<std::string, RangeStats>& outputsRange, + std::size_t nbBits, ClippingMode actClippingMode, double quantileValue) + { + switch(actClippingMode) { + case ClippingMode::KL_DIVERGENCE: + return outputsHistogram.at(cellName).calibrateKLDivergence(nbBits); + case ClippingMode::MSE: + return outputsHistogram.at(cellName).calibrateMSE(nbBits); + case ClippingMode::QUANTILE: + return outputsHistogram.at(cellName).getQuantileValue(quantileValue); + default: { + const auto& range = outputsRange.at(cellName); + return Utils::max_abs(range.minVal(), range.maxVal()); + } + } + } +*/ +float getCellThreshold(std::shared_ptr<Node> node){ + float max_output = std::static_pointer_cast<OutputRange>(node->getOperator()->getHook("output_range"))->getOutput(0); + return max_output; +} + + +//N2D2 quantizeActivation method +/* +void N2D2::DeepNetQuantization::quantizeActivations( + const std::unordered_map<std::string, Histogram>& outputsHistogram, + const std::unordered_map<std::string, RangeStats>& outputsRange, + std::unordered_map<std::string, long double>& biasScalings, + std::size_t nbBits, ClippingMode actClippingMode, double quantileValue) +{ +#ifdef VERBOSE_QUANT + std::cout << " Quantizing activations:" << std::endl; +#endif + + std::unordered_map<std::string, long double> activationScalings; + + std::vector<std::vector<std::string>> layers = mDeepNet.getLayers(); + for (auto itLayer = layers.begin() + 1; itLayer != layers.end(); ++itLayer) { + for (auto itCell = itLayer->begin(); itCell != itLayer->end(); ++itCell) { + std::shared_ptr<Cell> cell = mDeepNet.getCell(*itCell); + std::shared_ptr<Cell_Frame_Top> cellFrame = std::dynamic_pointer_cast<Cell_Frame_Top>(cell); + if(!cell || !cellFrame) { + throw std::runtime_error("Invalid cell."); + } + + const long double prevActivationScaling = getMaxParentsScaling(cell, activationScalings); + rescaleParentsToScaling(cell, activationScalings, prevActivationScaling); + + + long double activationScaling; + + const std::shared_ptr<Activation>& activation = cellFrame->getActivation(); + if(cell->getType() == ElemWiseCell::Type) { + activationScaling = getCellThreshold(cell->getName(), + outputsHistogram, outputsRange, + nbBits, ClippingMode::NONE); + } + else if(cell->getType() == PaddingCell::Type || + cell->getType() == PoolCell::Type || + cell->getType() == ResizeCell::Type || + cell->getType() == ScalingCell::Type || + cell->getType() == SoftmaxCell::Type || + cell->getType() == TransposeCell::Type || + cell->getType() == ReshapeCell::Type) + { + activationScalings[cell->getName()] = prevActivationScaling; + continue; + } + else if(activation) { + const bool clip = cell->getNbOutputs() > 2 && + (activation->getType() == RectifierActivation::Type || + activation->getType() == LinearActivation::Type || + activation->getType() == SaturationActivation::Type); + + + auto childrenCells = mDeepNet.getChildCells(cell->getName()); + const bool isNextCellMaxPool = childrenCells.size() == 1 && + childrenCells[0]->getType() == PoolCell::Type && + dynamic_cast<const PoolCell&>(*childrenCells[0]).getPooling() == PoolCell::Max; + + + const std::string cellStatsName = clip && isNextCellMaxPool?childrenCells[0]->getName(): + cell->getName(); + activationScaling = getCellThreshold(cellStatsName, + outputsHistogram, outputsRange, + nbBits, clip?actClippingMode:ClippingMode::NONE, quantileValue); + } + else { + throw std::runtime_error("Quantization of cell '" + cell->getName() + "' of type '" + + cell->getType() + "' is not supported yet."); + } + + const long double biasScaling = biasScalings.at(cell->getName()); + +#ifdef VERBOSE_QUANT + std::cout << " - " << cell->getName() << ": " + << "prev=" << prevActivationScaling + << ", act=" << activationScaling + << ", bias=" << biasScaling << std::endl; +#endif + + activationScaling /= biasScaling; + activationScaling = (activationScaling == 0.0)?1.0:activationScaling; + + activationScalings[cell->getName()] = activationScaling; + + cell->processFreeParameters([&](Float_T d) { return d/prevActivationScaling; }, + Cell::Additive); + + + const long double actQuantScaling = getActivationQuantizationScaling(*cell, nbBits); + auto scalingCell = Registrar<ScalingCell>::create<Float_T>(getCellModelType(*cell)) + (mDeepNet, + mDeepNet.generateNewCellName(cell->getName() + "_rescale_act"), + cell->getNbOutputs(), + Scaling::floatingPointScaling( + std::vector<Float_T>(cell->getNbOutputs(), + (prevActivationScaling/activationScaling)/actQuantScaling + ), + false, + std::vector<Float_T>(0.0f) + ) + ); + + mDeepNet.addCellAfter(scalingCell, cell); + + activationScalings[scalingCell->getName()] = activationScalings[cell->getName()]; + biasScalings[scalingCell->getName()] = biasScaling; + +#ifdef VERBOSE_QUANT + std::cout << " quant=" << actQuantScaling + << ", global scaling=" << Utils::cnotice << activationScaling + << Utils::cdef << " -> cell scaling=" << Utils::cwarning + << ((prevActivationScaling/activationScaling) + /actQuantScaling) + << Utils::cdef << std::endl; +#endif + } + } + + fuseScalingCells(); +} +*/ +long double quantizeActivation(std::shared_ptr<Node> node, std::size_t nbBits, std::unordered_map<std::string, long double> biasScalings, std::unordered_map<std::string, long double> activationScalings, bool verbose){ + + const long double prevActivationScaling = getMaxParentsScaling(node, activationScalings); + rescaleParentsToScaling(node, activationScalings, prevActivationScaling); + + long double activationScaling; + long double biasScaling = 1.0; + + if(node->type() == "ElemWise") { + /* + activationScaling = getCellThreshold(node->name(), + outputsHistogram, outputsRange, + nbBits, ClippingMode::NONE); + */ + activationScaling = getCellThreshold(node); + } + else if(node->type() == "Padding" || + node->type() == "Pool" || + node->type() == "Resize" || + node->type() == "Scaling" || + node->type() == "Softmax" || + node->type() == "Transpose" || + node->type() == "Reshape") + { + return prevActivationScaling; + } + else if(node->type() == "ReLU" + || node->type() == "Linear" + || node->type() == "Saturation") { + //TODO :: nbOutputs > 2 is relevant for clip, check it + //const bool clip = node->nbOutputs() > 2 && isLinearActivation; + activationScaling = getCellThreshold(node); + + } + else { + throw std::runtime_error("Quantization of cell '" + node->name() + "' of type '" + + node->type() + "' is not supported yet."); + } + + //get the parent conv/fc bias + const std::vector<std::shared_ptr<Node>> parentsNodes = node->getParents(); + + for(const std::shared_ptr<Node>& parentNode: parentsNodes) { + if(parentNode->type() == "Conv"){ + biasScaling = biasScalings[parentNode->name()]; + } + } + + activationScaling /= biasScaling; + activationScaling = (activationScaling == 0.0)?1.0:activationScaling; + + return activationScaling; +} + +void quantizeNetwork(std::vector<std::shared_ptr<Node>> orderedGraphView, std::size_t nbBits, bool verbose){ +//void quantizeNetwork(std::shared_ptr<GraphView> g1, std::size_t nbBits, bool verbose){ + + //keep all bias scalings here + std::unordered_map<std::string, long double> biasScalings; + //and activations + std::unordered_map<std::string, long double> activationScalings; + + //loop on all nodes for weights/bias quantization + for (const std::shared_ptr<Node>& nodePtr : orderedGraphView) { + + if(verbose){ + printf("Node type %s, node name %s has children : \n", nodePtr->type().c_str(), nodePtr->name().c_str()); + + std::set<std::shared_ptr<Node>> childrenNodes = nodePtr->getChildren(); + for (const std::shared_ptr<Node>& child_node : childrenNodes) + { + printf("- type %s, and name %s \n", child_node->type().c_str(), child_node->name().c_str()); + } + const std::vector<std::shared_ptr<Node>> parentsNodes = nodePtr->getParents(); + printf("And %zu parents: \n", parentsNodes.size()); + for(const std::shared_ptr<Node>& parent_node: parentsNodes) { + printf("- name %s : \n", parent_node->name().c_str()); + } + } + + if (nodePtr->type() == "Conv") { + biasScalings[nodePtr->name()] = quantizeFreeParams(nodePtr, nbBits, biasScalings, verbose); + if(verbose){ + std::cout << "outside quantizeFreeParams :: biasScalings[node->name()] = " << biasScalings[nodePtr->name()] << std::endl; + } + } else if(nodePtr->type() == "ReLU") { + activationScalings[nodePtr->name()] = quantizeActivation(nodePtr, nbBits, biasScalings, activationScalings, verbose); + if(verbose){ + std::cout << "outside quantizeActivation :: activationScalings[node->name()] = " << activationScalings[nodePtr->name()] << std::endl; + } + + //TODO : + //correct bias for previous activation scaling here ... + /* + cell->processFreeParameters([&](Float_T d) { return d/prevActivationScaling; }, + Cell::Additive); + */ + + std::shared_ptr<Node> scaling_node = Scaling(activationScalings[nodePtr->name()], (nodePtr->name()+"_rescale_act").c_str()); + scaling_node->getOperator()->setDatatype(DataType::Float32); + scaling_node->getOperator()->setBackend("cpu"); + + for (auto& graphPtr : nodePtr->views()) { + graphPtr->addChild(scaling_node); + } + //add scaling cell + /* + const long double actQuantScaling = getActivationQuantizationScaling(*cell, nbBits); + auto scalingCell = Registrar<ScalingCell>::create<Float_T>(getCellModelType(*cell)) + (mDeepNet, + mDeepNet.generateNewCellName(cell->getName() + "_rescale_act"), + cell->getNbOutputs(), + Scaling::floatingPointScaling( + std::vector<Float_T>(cell->getNbOutputs(), + (prevActivationScaling/activationScaling)/actQuantScaling + ), + false, + std::vector<Float_T>(0.0f) + ) + ); + + mDeepNet.addCellAfter(scalingCell, cell); + */ + + //add activation/bias to the maps for scaling cell ? + //activationScalings[scalingCell->getName()] = activationScalings[cell->getName()]; + //biasScalings[scalingCell->getName()] = biasScaling; + } + else{ + std::cout << "this is not Conv or ReLu, moving on" << std::endl; + } + } + +} +} + +//TODO : +//Scaling cell insert missing - for now it's a problem for activation quantization, as in simple cases there is no need to additional rescaling in rescaleParentsToScaling ... +//activation is no more attached to cell, now it's the child of conv/fc - quantize only after Relu/? +//Figure out how to check the range of operator : need to add the function to every possible operator ? diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..9d9f81516b0cd2611484ee9e3e06e838833200db --- /dev/null +++ b/unit_tests/CMakeLists.txt @@ -0,0 +1,22 @@ +Include(FetchContent) + +FetchContent_Declare( + Catch2 + GIT_REPOSITORY https://github.com/catchorg/Catch2.git + GIT_TAG v3.0.1 # or a later release +) + +FetchContent_MakeAvailable(Catch2) + +file(GLOB_RECURSE src_files "*.cpp") + +add_executable(tests${module_name} ${src_files}) + +target_link_libraries(tests${module_name} PUBLIC ${module_name}) + +target_link_libraries(tests${module_name} PRIVATE Catch2::Catch2WithMain) + +list(APPEND CMAKE_MODULE_PATH ${catch2_SOURCE_DIR}/extras) +include(CTest) +include(Catch) +catch_discover_tests(tests${module_name}) diff --git a/unit_tests/Test_QuantPTQ.cpp b/unit_tests/Test_QuantPTQ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b3e00739930f507075dae1dfbfd0ed3971ad7c1c --- /dev/null +++ b/unit_tests/Test_QuantPTQ.cpp @@ -0,0 +1,248 @@ +#include <catch2/catch_test_macros.hpp> + +#include "aidge/data/Tensor.hpp" +#include "aidge/backend/TensorImpl.hpp" +#include "aidge/backend/cpu.hpp" +#include "aidge/operator/Conv.hpp" +#include "aidge/operator/Scaling.hpp" +#include "aidge/operator/GenericOperator.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/QuantPTQ.hpp" +#include "aidge/scheduler/Scheduler.hpp" +#include "aidge/hook/outputRange.hpp" +#include "aidge/operator/Producer.hpp" + +#include <unordered_map> + +using namespace Aidge; +using namespace Aidge_HELPER; + +TEST_CASE("[aidge_module_template/ref_cpp/quantization] PTQ : Quantize Graph") { + + std::shared_ptr<GraphView> g1 = std::make_shared<GraphView>("TestGraph"); + + bool verbose = true; + + std::shared_ptr<Node> myConv1 = Conv(3,4,{3,3}, "myConv1"); + myConv1->getOperator()->setDatatype(DataType::Float32); + myConv1->getOperator()->setBackend("cpu"); + + Tensor myWeights = Array4D<float,4,3,3,3> { + { + { + {{ 0., 1., 2.}, + { 3., 4., 5.}, + { 6., 7., 8.}}, + {{ 9., 10., 11.}, + { 12., 13., 14.}, + { 15., 16., 17.}}, + {{ 18., 19., 20.}, + { 21., 22., 23.}, + { 24., 25., 26.}} + }, + { + {{ 27., 28., 29.}, + { 30., 31., 32.}, + { 33., 34., 35.}}, + {{ 36., 37., 38.}, + { 39., 40., 41.}, + { 42., 43., 44.}}, + {{ 45., 46., 47.}, + { 48., 49., 50.}, + { 51., 52., 53.}} + }, + { + {{ 54., 55., 56.}, + { 57., 58., 59.}, + { 60., 61., 62.}}, + {{ 63., 64., 65.}, + { 66., 67., 68.}, + { 69., 70., 71.}}, + {{ 72., 73., 74.}, + { 75., 76., 77.}, + { 78., 79., 80.}} + }, + { + {{ 81., 82., 83.}, + { 84., 85., 86.}, + { 87., 88., 89.}}, + {{ 90., 91., 92.}, + { 93., 94., 95.}, + { 96., 97., 98.}}, + {{ 99., 100., 101.}, + {102., 103., 104.}, + {105., 106., 107.}} + } + } + }; + Tensor myBias = Array1D<float,4> {{7.,0.,9.,0.}}; + + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>( + Array4D<float,2,3,5,5> { + { + { + {{ 0., 1., 2., 3., 4.}, + { 5., 6., 7., 8., 9.}, + { 10, 11, 12, 13, 14.}, + { 15, 16, 17, 18, 19.}, + { 20, 21, 22, 23, 24.}}, + + {{ 25, 26., 27., 28., 29.}, + { 30., 31., 32., 33., 34.}, + { 35., 36., 37., 38., 39.}, + { 40., 41., 42., 43., 44.}, + { 45., 46., 47., 48., 49.}}, + + {{ 50., 51., 52., 53., 54.}, + { 55., 56., 57., 58., 59.}, + { 60., 61., 62., 63., 64.}, + { 65., 66., 67., 68., 69.}, + { 70., 71., 72., 73., 74.}} + }, + { + {{ 75., 76., 77., 78., 79.}, + { 80., 81., 82., 83., 84.}, + { 85., 86., 87., 88., 89.}, + { 90., 91., 92., 93., 94.}, + { 95., 96., 97., 98., 99.}}, + + {{100, 101, 102, 103, 104.}, + {105, 106, 107, 108, 109.}, + {110, 111, 112, 113, 114.}, + {115, 116, 117, 118, 119.}, + {120, 121, 122, 123, 124.}}, + + {{125, 126, 127, 128, 129.}, + {130, 131, 132, 133, 134.}, + {135, 136, 137, 138, 139.}, + {140, 141, 142, 143, 144.}, + {145, 146, 147, 148, 149.}} + } + } + } + ); + + auto dataProvider = Producer(myInput, "dataProvider"); + Tensor myOutput = Array4D<float,2,4,3,3> { + { + { + {{ 15226., 15577., 15928.}, + { 16981., 17332., 17683.}, + { 18736., 19087., 19438.}}, + {{ 37818., 38898., 39978.}, + { 43218., 44298., 45378.}, + { 48618., 49698., 50778.}}, + {{ 60426., 62235., 64044.}, + { 69471., 71280., 73089.}, + { 78516., 80325., 82134.}}, + {{ 83016., 85554., 88092.}, + { 95706., 98244., 100782.}, + {108396., 110934., 113472.}} + }, + { + {{ 41551., 41902., 42253.}, + { 43306., 43657., 44008.}, + { 45061., 45412., 45763.}}, + {{118818., 119898., 120978.}, + {124218., 125298., 126378.}, + {129618., 130698., 131778.}}, + {{196101., 197910., 199719.}, + {205146., 206955., 208764.}, + {214191., 216000., 217809.}}, + {{273366., 275904., 278442.}, + {286056., 288594., 291132.}, + {298746., 301284., 303822.}} + } + } + }; + myConv1->getOperator()->input(0) = *myInput; + myConv1->getOperator()->input(1) = myWeights; + myConv1->getOperator()->input(2) = myBias; + myConv1->getOperator()->computeOutputDims(); + myConv1->getOperator()->addHook("output_range"); + + dataProvider->getOperator()->setDatatype(DataType::Float32); + dataProvider->getOperator()->setBackend("cpu"); + dataProvider->addChild(myConv1, 0); + + g1->add(myConv1); + + std::shared_ptr<Node> myReLU1 = ReLU("ReLu1"); + myReLU1->getOperator()->setDatatype(DataType::Float32); + myReLU1->getOperator()->setBackend("cpu"); + myReLU1->getOperator()->computeOutputDims(); + myReLU1->getOperator()->addHook("output_range"); + g1->addChild(myReLU1); + g1->setBackend("cpu"); + g1->forwardDims(); + + //check hook functioning + SequentialScheduler scheduler(g1); + scheduler.forward(); + + float max_output_conv = std::static_pointer_cast<OutputRange>(myConv1->getOperator()->getHook("output_range"))->getOutput(0); + if(verbose) { + printf("[hook] OutputRange(forward) :: Conv output max: " + "\x1b[1;37m" + "%f" + "\n", + max_output_conv); + + } + + float max_output_relu = std::static_pointer_cast<OutputRange>(myReLU1->getOperator()->getHook("output_range"))->getOutput(0); + if(verbose) { + printf("[hook] OutputRange(forward) :: ReLU output max: " + "\x1b[1;37m" + "%f" + "\n", + max_output_relu); + } + + //no need to do this anymore, forward does it autimatically now ... + //scheduler.generateScheduling(true); + std::vector<std::shared_ptr<Node>> ordered_graph_view = scheduler.getStaticScheduling(); + + printf("Going to quantize network :\n"); + + quantizeNetwork(ordered_graph_view, 8, verbose); + + printf("After quantize network !!! \n"); + + if(verbose) { + printf("Graph after quantization :\n"); + for (const std::shared_ptr<Node>& nodePtr : g1->getNodes()) { + printf("\t- node type: " + "\x1b[1;37m" + "%s" + " , node name: " + "\x1b[1;37m" + "%s" + "\n", + (nodePtr->type()).c_str(), (nodePtr->name()).c_str()); + } + } + + SequentialScheduler scheduler_v2(g1); + + scheduler_v2.forward(); + scheduler_v2.generateScheduling(false); + std::vector<std::shared_ptr<Node>> ordered_graph_view_v2 = scheduler_v2.getStaticScheduling(); + + if(verbose) { + printf("Ordered graph after quantization :\n"); + for (const std::shared_ptr<Node>& nodePtr : ordered_graph_view_v2) { + printf("\t- node type: " + "\x1b[1;37m" + "%s" + " , node name: " + "\x1b[1;37m" + "%s" + "\n", + (nodePtr->type()).c_str(), (nodePtr->name()).c_str()); + } + } + + +} \ No newline at end of file diff --git a/unit_tests/hook/Test_execTime.cpp b/unit_tests/hook/Test_execTime.cpp new file mode 100644 index 0000000000000000000000000000000000000000..26dce611bc691cd7fe6e1bc6bdf3b5c6d233541f --- /dev/null +++ b/unit_tests/hook/Test_execTime.cpp @@ -0,0 +1,159 @@ +#include <catch2/catch_test_macros.hpp> + +//#include "aidge/aidge.hpp" +#include "aidge/backend/cpu.hpp" +#include "aidge/QuantPTQ.hpp" +#include "aidge/hook/execTime.hpp" + +using namespace Aidge; +using namespace Aidge_HELPER; + +#include <iostream> +#include <ctime> +#include <chrono> +#include <iomanip> + +TEST_CASE("[hook] ExecTime(forward)") { + std::shared_ptr<Node> myConv1 = Conv(3,4,{3,3}, "myConv1"); + myConv1->getOperator()->setDatatype(DataType::Float32); + myConv1->getOperator()->setBackend("cpu"); + + Tensor myWeights = Array4D<float,4,3,3,3> { + { + { + {{ 0., 1., 2.}, + { 3., 4., 5.}, + { 6., 7., 8.}}, + {{ 9., 10., 11.}, + { 12., 13., 14.}, + { 15., 16., 17.}}, + {{ 18., 19., 20.}, + { 21., 22., 23.}, + { 24., 25., 26.}} + }, + { + {{ 27., 28., 29.}, + { 30., 31., 32.}, + { 33., 34., 35.}}, + {{ 36., 37., 38.}, + { 39., 40., 41.}, + { 42., 43., 44.}}, + {{ 45., 46., 47.}, + { 48., 49., 50.}, + { 51., 52., 53.}} + }, + { + {{ 54., 55., 56.}, + { 57., 58., 59.}, + { 60., 61., 62.}}, + {{ 63., 64., 65.}, + { 66., 67., 68.}, + { 69., 70., 71.}}, + {{ 72., 73., 74.}, + { 75., 76., 77.}, + { 78., 79., 80.}} + }, + { + {{ 81., 82., 83.}, + { 84., 85., 86.}, + { 87., 88., 89.}}, + {{ 90., 91., 92.}, + { 93., 94., 95.}, + { 96., 97., 98.}}, + {{ 99., 100., 101.}, + {102., 103., 104.}, + {105., 106., 107.}} + } + } + }; + Tensor myBias = Array1D<float,4> {{7.,0.,9.,0.}}; + Tensor myInput = Array4D<float,2,3,5,5> { + { + { + {{ 0., 1., 2., 3., 4.}, + { 5., 6., 7., 8., 9.}, + { 10, 11, 12, 13, 14.}, + { 15, 16, 17, 18, 19.}, + { 20, 21, 22, 23, 24.}}, + + {{ 25, 26., 27., 28., 29.}, + { 30., 31., 32., 33., 34.}, + { 35., 36., 37., 38., 39.}, + { 40., 41., 42., 43., 44.}, + { 45., 46., 47., 48., 49.}}, + + {{ 50., 51., 52., 53., 54.}, + { 55., 56., 57., 58., 59.}, + { 60., 61., 62., 63., 64.}, + { 65., 66., 67., 68., 69.}, + { 70., 71., 72., 73., 74.}} + }, + { + {{ 75., 76., 77., 78., 79.}, + { 80., 81., 82., 83., 84.}, + { 85., 86., 87., 88., 89.}, + { 90., 91., 92., 93., 94.}, + { 95., 96., 97., 98., 99.}}, + + {{100, 101, 102, 103, 104.}, + {105, 106, 107, 108, 109.}, + {110, 111, 112, 113, 114.}, + {115, 116, 117, 118, 119.}, + {120, 121, 122, 123, 124.}}, + + {{125, 126, 127, 128, 129.}, + {130, 131, 132, 133, 134.}, + {135, 136, 137, 138, 139.}, + {140, 141, 142, 143, 144.}, + {145, 146, 147, 148, 149.}} + } + } + }; + Tensor myOutput = Array4D<float,2,4,3,3> { + { + { + {{ 15226., 15577., 15928.}, + { 16981., 17332., 17683.}, + { 18736., 19087., 19438.}}, + {{ 37818., 38898., 39978.}, + { 43218., 44298., 45378.}, + { 48618., 49698., 50778.}}, + {{ 60426., 62235., 64044.}, + { 69471., 71280., 73089.}, + { 78516., 80325., 82134.}}, + {{ 83016., 85554., 88092.}, + { 95706., 98244., 100782.}, + {108396., 110934., 113472.}} + }, + { + {{ 41551., 41902., 42253.}, + { 43306., 43657., 44008.}, + { 45061., 45412., 45763.}}, + {{118818., 119898., 120978.}, + {124218., 125298., 126378.}, + {129618., 130698., 131778.}}, + {{196101., 197910., 199719.}, + {205146., 206955., 208764.}, + {214191., 216000., 217809.}}, + {{273366., 275904., 278442.}, + {286056., 288594., 291132.}, + {298746., 301284., 303822.}} + } + } + }; + myConv1->getOperator()->input(0) = myInput; + myConv1->getOperator()->input(1) = myWeights; + myConv1->getOperator()->input(2) = myBias; + myConv1->getOperator()->computeOutputDims(); + myConv1->getOperator()->addHook("execution_time"); + myConv1->forward(); + + //std::static_pointer_cast<Tensor>(myConv1->getOperator()->getOutput(0))->print(); + assert(*std::static_pointer_cast<Tensor>(myConv1->getOperator()->getOutput(0)) == myOutput); + //std::static_pointer_cast<Tensor>(myConv1->getOperator()->getInput(1))->print(); + + std::chrono::time_point<std::chrono::system_clock> time_conv = std::static_pointer_cast<ExecTime>(myConv1->getOperator()->getHook("execution_time"))->getTime(0); + const std::time_t t_c = std::chrono::system_clock::to_time_t(time_conv); + //std::cout << "the execution time of the module was " << std::put_time(std::localtime(&t_c), "%F %T.\n") << std::flush; + +} \ No newline at end of file diff --git a/unit_tests/hook/Test_outputRange.cpp b/unit_tests/hook/Test_outputRange.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2d84d04a8aede72bf1e2252effd678e8a9c1b13c --- /dev/null +++ b/unit_tests/hook/Test_outputRange.cpp @@ -0,0 +1,156 @@ +#include <catch2/catch_test_macros.hpp> +#include <iostream> +#include <ctime> +#include <chrono> +#include <iomanip> + +//#include "aidge/aidge.hpp" +#include "aidge/backend/cpu.hpp" +#include "aidge/QuantPTQ.hpp" +#include "aidge/hook/outputRange.hpp" + +using namespace Aidge; +using namespace Aidge_HELPER; + +TEST_CASE("[hook] OutputRange(forward)") { + std::shared_ptr<Node> myConv1 = Conv(3,4,{3,3}, "myConv1"); + myConv1->getOperator()->setDatatype(DataType::Float32); + myConv1->getOperator()->setBackend("cpu"); + + Tensor myWeights = Array4D<float,4,3,3,3> { + { + { + {{ 0., 1., 2.}, + { 3., 4., 5.}, + { 6., 7., 8.}}, + {{ 9., 10., 11.}, + { 12., 13., 14.}, + { 15., 16., 17.}}, + {{ 18., 19., 20.}, + { 21., 22., 23.}, + { 24., 25., 26.}} + }, + { + {{ 27., 28., 29.}, + { 30., 31., 32.}, + { 33., 34., 35.}}, + {{ 36., 37., 38.}, + { 39., 40., 41.}, + { 42., 43., 44.}}, + {{ 45., 46., 47.}, + { 48., 49., 50.}, + { 51., 52., 53.}} + }, + { + {{ 54., 55., 56.}, + { 57., 58., 59.}, + { 60., 61., 62.}}, + {{ 63., 64., 65.}, + { 66., 67., 68.}, + { 69., 70., 71.}}, + {{ 72., 73., 74.}, + { 75., 76., 77.}, + { 78., 79., 80.}} + }, + { + {{ 81., 82., 83.}, + { 84., 85., 86.}, + { 87., 88., 89.}}, + {{ 90., 91., 92.}, + { 93., 94., 95.}, + { 96., 97., 98.}}, + {{ 99., 100., 101.}, + {102., 103., 104.}, + {105., 106., 107.}} + } + } + }; + Tensor myBias = Array1D<float,4> {{7.,0.,9.,0.}}; + Tensor myInput = Array4D<float,2,3,5,5> { + { + { + {{ 0., 1., 2., 3., 4.}, + { 5., 6., 7., 8., 9.}, + { 10, 11, 12, 13, 14.}, + { 15, 16, 17, 18, 19.}, + { 20, 21, 22, 23, 24.}}, + + {{ 25, 26., 27., 28., 29.}, + { 30., 31., 32., 33., 34.}, + { 35., 36., 37., 38., 39.}, + { 40., 41., 42., 43., 44.}, + { 45., 46., 47., 48., 49.}}, + + {{ 50., 51., 52., 53., 54.}, + { 55., 56., 57., 58., 59.}, + { 60., 61., 62., 63., 64.}, + { 65., 66., 67., 68., 69.}, + { 70., 71., 72., 73., 74.}} + }, + { + {{ 75., 76., 77., 78., 79.}, + { 80., 81., 82., 83., 84.}, + { 85., 86., 87., 88., 89.}, + { 90., 91., 92., 93., 94.}, + { 95., 96., 97., 98., 99.}}, + + {{100, 101, 102, 103, 104.}, + {105, 106, 107, 108, 109.}, + {110, 111, 112, 113, 114.}, + {115, 116, 117, 118, 119.}, + {120, 121, 122, 123, 124.}}, + + {{125, 126, 127, 128, 129.}, + {130, 131, 132, 133, 134.}, + {135, 136, 137, 138, 139.}, + {140, 141, 142, 143, 144.}, + {145, 146, 147, 148, 149.}} + } + } + }; + Tensor myOutput = Array4D<float,2,4,3,3> { + { + { + {{ 15226., 15577., 15928.}, + { 16981., 17332., 17683.}, + { 18736., 19087., 19438.}}, + {{ 37818., 38898., 39978.}, + { 43218., 44298., 45378.}, + { 48618., 49698., 50778.}}, + {{ 60426., 62235., 64044.}, + { 69471., 71280., 73089.}, + { 78516., 80325., 82134.}}, + {{ 83016., 85554., 88092.}, + { 95706., 98244., 100782.}, + {108396., 110934., 113472.}} + }, + { + {{ 41551., 41902., 42253.}, + { 43306., 43657., 44008.}, + { 45061., 45412., 45763.}}, + {{118818., 119898., 120978.}, + {124218., 125298., 126378.}, + {129618., 130698., 131778.}}, + {{196101., 197910., 199719.}, + {205146., 206955., 208764.}, + {214191., 216000., 217809.}}, + {{273366., 275904., 278442.}, + {286056., 288594., 291132.}, + {298746., 301284., 303822.}} + } + } + }; + myConv1->getOperator()->input(0) = myInput; + myConv1->getOperator()->input(1) = myWeights; + myConv1->getOperator()->input(2) = myBias; + myConv1->getOperator()->computeOutputDims(); + myConv1->getOperator()->addHook("output_range"); + myConv1->forward(); + + //std::static_pointer_cast<Tensor>(myConv->getOperator()->getOutput(0))->print(); + assert(*std::static_pointer_cast<Tensor>(myConv1->getOperator()->getOutput(0)) == myOutput); + + float max_output = std::static_pointer_cast<OutputRange>(myConv1->getOperator()->getHook("output_range"))->getOutput(0); + //std::cout << "the output of the conv was " << max_output << std::flush; + +} \ No newline at end of file