diff --git a/.gitignore b/.gitignore index 9dc376af32fb97a97de4bd60b025fbe8d707ce56..a02270fd05be2af03ecce21025d9fd0c5f785e80 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ __pycache__ dist*/ aidge_export_cpp/_version.py wheelhouse/* +env_aidge/ # Temp test folders aidge_export_cpp/unit_tests/*_temp_test @@ -26,3 +27,6 @@ xml*/ # ONNX *.onnx + +# GDB +.gdb_history \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ba0bf4b4acd33d20324f919422346eeb18abea4e..4a443fc403ab341d770455b203524c7f6e65f42a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -12,12 +12,27 @@ stages: - deploy include: - - project: 'eclipse/aidge/gitlab_shared_files' + - project: 'eclipse/aidge/gitlab_shared_files' ref: 'main' - file: + file: # choose which jobs to run by including the corresponding files. - '.gitlab/ci/ubuntu_python.gitlab-ci.yml' - '.gitlab/ci/release/pip.gitlab-ci.yml' # Since aidge_export_cpp is a pure python package building on windows and on ubuntu doesn't differ - # - '.gitlab/ci/windows_python.gitlab-ci.yml' + # - '.gitlab/ci/windows_python.gitlab-ci.yml' + +test:ubuntu_python: + before_script: + - !reference [.setup:test:ubuntu_python, before_script] + - DEPS_NAMES=("aidge_onnx" "aidge_quantization") + - DEPENDENCY_JOB="build:ubuntu_python" + - !reference [.ubuntu:download:artifacts, script] + # Need to install extra dependence for tests: + - python -m pip install torch torchvision +coverage:ubuntu_python: + before_script: + - !reference [.setup:coverage:ubuntu_python, before_script] + - DEPS_NAMES=("aidge_onnx" "aidge_quantization") + - DEPENDENCY_JOB="build:ubuntu_python" + - !reference [.ubuntu:download:artifacts, script] \ No newline at end of file diff --git a/aidge_export_cpp/__init__.py b/aidge_export_cpp/__init__.py index d0a7ca586d7ec9379230c6e08ad9972edcfb9e70..7a5bb301b229424aa1c896f7fd9c5351d79faec1 100644 --- a/aidge_export_cpp/__init__.py +++ b/aidge_export_cpp/__init__.py @@ -2,9 +2,15 @@ r""" Aidge Export for CPP standalone projects """ -from .utils import ROOT +from pathlib import Path + +# Constants +FILE = Path(__file__).resolve() +ROOT = FILE.parents[0] + from .export_registry import ExportLibCpp +from .export_utils import * from .operators import * -from collections import defaultdict from .export import * from . import benchmark + diff --git a/aidge_export_cpp/export.py b/aidge_export_cpp/export.py index 42bf90f30eda3a2110cab5c8f0a315824f57d522..10741b0d7bd5116b4d5d833157d2bad504d9aca0 100644 --- a/aidge_export_cpp/export.py +++ b/aidge_export_cpp/export.py @@ -1,15 +1,94 @@ +import os +import shutil +import numpy as np +from pathlib import Path +from typing import List, Union + import aidge_core -from aidge_export_cpp import ExportLibCpp - -def export(export_folder_name, graphview, scheduler, mem_wrapping=False): - print("Warning: This function is deprecated, check tutorial https://eclipse.dev/aidge/source/Tutorial/export_cpp.html to find the new way to generate a C++ export.") - aidge_core.export_utils.scheduler_export( - scheduler, - export_folder_name, - ExportLibCpp, - memory_manager=aidge_core.mem_info.generate_optimized_memory_info, - memory_manager_args={ - "stats_folder": f"{export_folder_name}/stats", - "wrapping": mem_wrapping - } - ) +from aidge_core.mem_info import generate_optimized_memory_info +from aidge_core.export_utils import scheduler_export, generate_main_cpp, aidge2c, generate_file + +from aidge_export_cpp import ExportLibCpp, ROOT +from aidge_export_cpp.export_utils import read_log_file + + +def export(export_folder_name: str, + graphview: aidge_core.GraphView, + scheduler: Union[List[aidge_core.Node], + aidge_core.Scheduler], + inputs_tensor: aidge_core.Tensor = None, + labels: aidge_core.Tensor = None, + dev_mode: bool = False, + aidge_cmp: bool = False): + + """ Export an aidge_core.Scheduler to C++ code + + :param export_folder_name: Export folder name + :type export_folder_name: str + :param graph_view: An instance of :py:class:`aidge_core.graph_view`, providing access to nodes and + ordered input/output data within the computational graph. + :type graph_view: aidge_core.GraphView + :param scheduler: Scheduler instance managing the computation graph. + Uses `graph_view` and `get_sequential_static_scheduling` methods + :param inputs_tensor: **For future** argument to provide tensor to use in the main function, not implemented yet! + By default, the input of the given graph will be exported. + :type input_tensor: aidge_core.Tensor + to retrieve the computation graph layout and ordered nodes. + :type scheduler: aidge_core.Scheduler + :param labels: Argument to provide labels tensor to generate and use in the main function. + :type labels: aidge_core.Tensor + :param dev_mode: Wether or not the developer mode is enabled. If enabled, the export files + will be symlinks from the aidge_export_cpp module. Therefore, modifying + a file within the export will change the module as well. + :type dev_mode: boolean + """ + + export_folder_name = Path(export_folder_name) + + # Remove existing export + if os.path.isdir(export_folder_name): + print("Removing existing export directory...") + shutil.rmtree(export_folder_name) + + # Generate Model Files + """ + Perform the following tasks : + - Generate the parameters and layers config files + - Generate the forward.cpp file + - Copy all needed kernels + """ + + scheduler_export(scheduler, + export_folder_name, + ExportLibCpp, + memory_manager=generate_optimized_memory_info, + memory_manager_args={ + "stats_folder": f"{export_folder_name}/stats"}, + dev_mode=dev_mode) + + # Generate main file + generate_main_cpp(export_folder_name, graphview, labels=labels, inputs_tensor=inputs_tensor) + + # Generate log files (aidge_cmp option) + """ + If the aidge_cmp option has been enabled, the generated log_outputs will + be copied into the generated export in order to be used as reference. + """ + if aidge_cmp: + ranked_nodes = graphview.get_ranked_nodes_name("{0}[{1}#{3}]") + os.makedirs(export_folder_name / "data" / "aidge_outputs") + os.makedirs(export_folder_name / "data" / "export_outputs") + for node in graphview.get_nodes(): + if node.type() != "Producer": + file_path = 'log_outputs/' + ranked_nodes[node] + '/output_0.log' + data_t = aidge2c(node.get_operator().get_output(0).dtype()) + name = node.name() + '_output_0_aidge' + dims = node.get_operator().get_output(0).dims() + values = read_log_file(file_path) + + generate_file(export_folder_name / "data" / "aidge_outputs" / (node.name() + ".hpp"), + ROOT / "templates" / "data" / "aidge_tensor.jinja", + data_t=data_t, + name=name, + dims=dims, + values=values) diff --git a/aidge_export_cpp/export_registry.py b/aidge_export_cpp/export_registry.py index 876e4ffcfba3b4491737707fb42a75e8e20d3f51..ee54890a758cdd9a7de8bc25f3f403a75fe72c12 100644 --- a/aidge_export_cpp/export_registry.py +++ b/aidge_export_cpp/export_registry.py @@ -5,6 +5,8 @@ class ExportLibCpp(ExportLib): _name="export_cpp" static_files={ str(ROOT / "static" / "Makefile"): "", - str(ROOT / "static" / "include" / "network" / "typedefs.hpp"): "dnn/include/network", - str(ROOT / "static" / "include" / "network" / "utils.hpp"): "dnn/include/network", + str(ROOT / "static" / "typedefs.hpp"): "dnn/include/network", + str(ROOT / "static" / "utils.hpp"): "dnn/include/network", + str(ROOT / "static" / "rescaling_utils.hpp"): "dnn/include/network", + str(ROOT / "static" / "activation_utils.hpp"): "dnn/include/network", } diff --git a/aidge_export_cpp/export_utils.py b/aidge_export_cpp/export_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e22524fb9058dfb4c8b023d0df8fbe11e2ff791b --- /dev/null +++ b/aidge_export_cpp/export_utils.py @@ -0,0 +1,228 @@ +import os +from collections import OrderedDict + +import aidge_core +from aidge_core.export_utils import get_node_from_metaop + +def cpp_fuse_to_metaops(graph_view: aidge_core.GraphView): + """ + Fuse nodes into metaops adapted for the CPP Export + TODO: These recipes should be into aidge_core + + :param graph_view: An instance of :py:class:`aidge_core.GraphView`, providing access to nodes and + ordered input/output data within the computational graph. + """ + + cpp_recipes = OrderedDict({ + # Quantization + "QMul": "Mul->Quantizer", # Fixed Point Scaling + + # FC + "QFC": "FC->(Quantizer|QMul)", + "FCAct": "(FC|QFC)->ReLU", + + # Conv + "QConv": "Conv2D->(Quantizer|QMul)", + "PadConv": "(QConv|Conv2D)<-Pad2D", + "ConvAct": "(QConv|Conv2D)->ReLU", + "PadConvAct": "PadConv->ReLU", + + # ConvDw + "QConvDw": "ConvDepthWise2D->(Quantizer|QMul)", + "ConvDwPad": "(QConvDw|ConvDepthWise2D)->Pad2D", + "ConvDwAct": "(QConvDw|ConvConvDepthWise2D2D)->ReLU", + "ConvDwActPad": "ConvDwAct->Pad2D", + + # Max Pooling + "PadMaxPool": "MaxPooling2D<-Pad2D", + "MaxPoolAct": "MaxPooling2D->ReLU", + "PadMaxPoolAct": "PadMaxPool->ReLU", + + # Average Pooling + "PadAvgPool": "AvgPooling2D<-Pad2D", + "AvgPoolAct": "AvgPooling2D->ReLU", + "PadAvgPoolAct": "PadAvgPool->ReLU", + + # Global Average Pooling + "PadGlobalAvgPool": "GlobalAveragePooling2D<-Pad2D", + "GlobalAvgPoolAct": "GlobalAveragePooling2D->ReLU", + "PadGlobalAvgPoolAct": "PadGlobalAveragePool->ReLU", + + # ElemWise + "QAdd": "Add->(Quantizer|QMul)", + "QSub": "Sub->(Quantizer|QMul)", + # "QMul": "Mul->Quantizer", # Already defined + "AddAct": "(QAdd|Add)->ReLU", + "SubAct": "(QSub|Sub)->ReLU", + "MulAct": "(QMul|Mul)->ReLU", + + # Activation + "QReLU": "ReLU->(Quantizer|QMul)", + }) + + for node, recipe in cpp_recipes.items(): + aidge_core.fuse_to_metaops(graph_view, recipe, node) + + + +def set_nodes_names(scheduler): + """ + Set the CPP nodes names as well as their producers. + The producers naming is handled from their child node. + + [TODO] Fc and Conv layers will always have weights as parent 1 and + possibly biases as parent 2. It may be better to previously label the + producers. + + :param scheduler: Scheduler instance managing the computation graph. + Uses `graph_view` and `get_sequential_static_scheduling` methods + to retrieve the computation graph layout and ordered nodes. + :type scheduler: aidge_core.Scheduler + """ + + node_ids = {} # Dict holding the node type along with a counter + node_it = 0 # Node Iterator + + ## MetaOps + for node in scheduler.get_sequential_static_scheduling(): + node_type = node.type() + + if node_type != "Producer": + if node.type() not in node_ids: + node_ids[node_type] = 0 + + # Set node name + node.set_name("_" + str(node_it) + "_" + + str(node_type) + "_" + str(node_ids[node_type])) + node_ids[node_type] += 1 + node_it += 1 + + # Set producers names + ## Weights & Biases producers + if get_node_from_metaop(node, "FC") or \ + get_node_from_metaop(node, "Conv2D") or \ + get_node_from_metaop(node, "ConvDepthWise2D"): + + node.get_parent(1).set_name(node.name() + "_weights") + if node.get_parent(2) is not None: + node.get_parent(2).set_name(node.name() + "_biases") + + ## Scaling Producers + for node in scheduler.get_sequential_static_scheduling(): + """ + TODO: If multiple quantizer nodes are found, the producers will + all have the same name and this will not work properly. + """ + if node.type() == "Producer": + child_node = node.output(0)[0][0] + if node.attributes().has_attr("shift_prod"): + node.set_name(child_node.name() + "_shift") + if node.attributes().has_attr("coef_prod"): + node.set_name(child_node.name() + "_coef") + + + +def set_nodes_datatypes(graph_view: aidge_core.GraphView): + """ Set the nodes' datatypes + + The set_datatype function can't be used on Conv2D and FC nodes directly + as the biases datatype is different from the other inputs. + TODO: Should be using forward_datatype() + + :param graph_view: An instance of :py:class:`aidge_core.graph_view`, providing access to nodes and + ordered input/output data within the computational graph. + """ + for node in graph_view.get_nodes(): + if node.type() != "Producer": + if get_node_from_metaop(node, "FC") or \ + get_node_from_metaop(node, "Conv2D") or \ + get_node_from_metaop(node, "ConvDepthWise2D"): + + node.get_operator().get_input(0).set_datatype(aidge_core.dtype.int8) # Input + node.get_operator().get_input(1).set_datatype(aidge_core.dtype.int8) # Weights + if node.get_parent(2) is not None: + node.get_operator().get_input(2).set_datatype(aidge_core.dtype.int32) # Biases + node.get_operator().get_output(0).set_datatype(aidge_core.dtype.int8) # Output + else: + node.get_operator().set_datatype(aidge_core.dtype.int8) + + # Set input node's datatype + for n in graph_view.get_input_nodes(): + n.get_operator().get_input(0).set_datatype(aidge_core.dtype.int8) + + + +def read_log_file(file_path: str): + """ Read log file + Used to read the aidge generated log files containing the intermediate + tensors of the exported model. + + :param file_path: Path to the file to read. + :type file_path: str + """ + # Check if the file exists + if not os.path.isfile(file_path): + print(f"File not found: {file_path}") + return None + + with open(file_path, 'r') as file: + content = file.read() + return content + + + +def exclude_unwanted_producers(model): + """ Exclude some producers not needed for the export + + Currently excludes the producers attached to the Mul and BitShift nodes, as they are + tensors holding a single data. This data is retrieved during the export + generation process and passed as argument directly within the Mul layer + configuration. + """ + + nodes_to_ignore = ["Mul", "BitShift", "Clip"] + + for node in model.get_nodes(): + if node.type() == "Producer": + children_nodes = [n.type() for n in node.get_children()] + for node_type in nodes_to_ignore: + if node_type in children_nodes: + node.attributes().ignore = True + break + + + +def set_scaling_attributes(export_node: aidge_core.export_utils.ExportNode, node: aidge_core.Node): + """ + Look recursively for a Quantizer node inside of the given node, + then set shift and coef attributes of the given export node. + [TODO] Should be moved into aidge_core.ExportNode + + :param export_node: An instance of :py:class:`aidge_core.export_utils.ExportNode` to set the scaling + attributes needed for a quantized export. + :type export_node: aidge_core.export_utils.ExportNode + :param node: Node which may hold a Quantizer node. + :type node: aidge_core.Node + """ + + QNode = get_node_from_metaop(node, "Quantizer") + if QNode: + BNode = get_node_from_metaop(QNode[0], "BitShift") + export_node.attributes["shift_value"] = BNode[0].get_operator().get_input(1)[0] + + QMulNode = get_node_from_metaop(node, "QMul") + if QMulNode: + CNode = get_node_from_metaop(QMulNode[0], "Mul") + export_node.attributes["coef_value"] = CNode[0].get_operator().get_input(1)[0] + + + +def normalize(array): + """ + Normalize an input image between -1 and 1 + """ + if array.max() == array.min(): + return array/array.max() + array = (array - array.min()) / (array.max() - array.min()) + return 2 * array - 1 + diff --git a/aidge_export_cpp/kernels/activation.hpp b/aidge_export_cpp/kernels/activation.hpp index d6695159255e4c2c12ced879a90cbe6b01dae0eb..ee80ed275ab9edf574dee6e7d32276f00ba92412 100644 --- a/aidge_export_cpp/kernels/activation.hpp +++ b/aidge_export_cpp/kernels/activation.hpp @@ -1,61 +1,8 @@ #ifndef __AIDGE_EXPORT_CPP_KERNELS_ACTIVATION__ #define __AIDGE_EXPORT_CPP_KERNELS_ACTIVATION__ -#include <type_traits> -#include "network/typedefs.hpp" -#include "network/utils.hpp" -#include "kernels/rescaling.hpp" - -template<typename Output_T, typename T, - typename std::enable_if<std::is_floating_point<T>::value>::type* = nullptr> -__attribute__((always_inline)) inline -Output_T saturate (T value, int32_t /*sat*/) -{ - return value; -} - -template<typename Output_T, typename T, - typename std::enable_if<!std::is_floating_point<T>::value>::type* = nullptr> -__attribute__((always_inline)) inline -Output_T saturate (T value, uint32_t sat) -{ - if (std::is_unsigned<Output_T>::value) { - return clamp(value, T(0), (T(1) << sat) - 1); - } else { - return clamp(value, -(T(1) << (sat - 1)), (T(1) << (sat - 1)) - 1); - } -} - -template<typename Output_T, - typename Sum_T, - typename Rescaling_T> -__attribute__((always_inline)) inline -Output_T activation_forward_value (Sum_T weightedSum, - int output, - ActivationFunction_T func, - const Rescaling_T& __restrict rescaling) -{ - switch(func) { - case Linear: - case Saturation: { - break; - } - case Rectifier: { - if(weightedSum <= 0) weightedSum = 0; - break; - } - default: - // Unsupported activation function - break; - } - - // Value fixed here for now but it should be generated by - // the export module or determined by the type of Output_T - // For now only works for int8_t and uint8_t - const uint32_t NB_BITS = 8; - return saturate<Output_T>(rescaling(weightedSum, output), NB_BITS); -} - +#include "network/activation_utils.hpp" +#include "network/rescaling_utils.hpp" template<int NB_DATA, ActivationFunction_T ACTIVATION, diff --git a/aidge_export_cpp/kernels/batchnorm.hpp b/aidge_export_cpp/kernels/batchnorm.hpp index f05a047511e12f895ef88be0e402b89e5197432b..27866ab923eb8a519e684030cfb63f894c15ec98 100644 --- a/aidge_export_cpp/kernels/batchnorm.hpp +++ b/aidge_export_cpp/kernels/batchnorm.hpp @@ -2,7 +2,7 @@ #define __AIDGE_EXPORT_CPP_KERNELS_BATCHNORM__ #include "network/typedefs.hpp" -#include "kernels/activation.hpp" +#include "network/activation_utils.hpp" #include <math.h> diff --git a/aidge_export_cpp/kernels/convolution.hpp b/aidge_export_cpp/kernels/convolution.hpp index 5855654b39d5d7faf09e81735fbe80fa248ace94..ed62401e69ff8d53b23ba9f88917bb54acd3740a 100644 --- a/aidge_export_cpp/kernels/convolution.hpp +++ b/aidge_export_cpp/kernels/convolution.hpp @@ -2,10 +2,10 @@ #define __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__ #include "network/typedefs.hpp" -#include "kernels/rescaling.hpp" +#include "network/rescaling_utils.hpp" #include "network/utils.hpp" -#include "kernels/macs.hpp" -#include "kernels/activation.hpp" +#include "network/macs.hpp" +#include "network/activation_utils.hpp" template<int NB_CHANNELS, @@ -159,158 +159,4 @@ void convolution_forward( (inputs, outputs, weights, b, rescaling); } -template<int NB_CHANNELS, - int CHANNELS_HEIGHT, int CHANNELS_WIDTH, - int NB_OUTPUTS, - int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH, - int PADDING_Y, int PADDING_X, - int STRIDE_Y, int STRIDE_X, - int DILATION_Y, int DILATION_X, - int KERNEL_HEIGHT, int KERNEL_WIDTH, - ActivationFunction_T ACTIVATION, - typename Input_T, typename Output_T, - typename Weight_T, typename Bias_T, - typename Rescaling_T> -__attribute__((always_inline)) inline -void convolution_depthwise_forward( - const Input_T* __restrict inputs, - Output_T* __restrict outputs, - const Weight_T* __restrict weights, - const Bias_T* __restrict biases, - const Rescaling_T& __restrict rescaling) -{ - static_assert(NB_OUTPUTS % NB_CHANNELS == 0, - "NB_OUTPUTS should be a multiple of NB_CHANNELS."); - - constexpr int DILATED_KERNEL_HEIGHT - = KERNEL_HEIGHT + (DILATION_Y - 1) * (KERNEL_HEIGHT - 1); - - constexpr int DILATED_KERNEL_WIDTH - = KERNEL_WIDTH + (DILATION_X - 1) * (KERNEL_WIDTH - 1); - - constexpr int OUTPUTS_HEIGHT_NOPAD - = (CHANNELS_HEIGHT - DILATION_Y * (KERNEL_HEIGHT - 1) - 1 + STRIDE_Y) / STRIDE_Y; - constexpr int OUTPUTS_WIDTH_NOPAD - = (CHANNELS_WIDTH - DILATION_X * (KERNEL_WIDTH - 1) - 1 + STRIDE_X) / STRIDE_X; - - for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) { - const int syMin = (PADDING_Y == 0) ? 0 - : max(PADDING_Y - (oy * STRIDE_Y), 0); - const int syMax = (PADDING_Y == 0 - && OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? DILATED_KERNEL_HEIGHT - : clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y), - 0, DILATED_KERNEL_HEIGHT); - const int iy = (oy * STRIDE_Y) - PADDING_Y; - -#ifdef _OPENMP -#pragma omp parallel for collapse(2) -#endif - for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) { - for (int output = 0; output < NB_OUTPUTS; ++output) { - // moved to inner loop for collapsing --> - const int sxMin = (PADDING_X == 0) ? 0 - : max(PADDING_X - (ox * STRIDE_X), 0); - const int sxMax = (PADDING_X == 0 - && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD) - ? DILATED_KERNEL_WIDTH - : clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X), - 0, DILATED_KERNEL_WIDTH); - const int ix = (ox * STRIDE_X) - PADDING_X; - - const int oPos = (ox + OUTPUTS_WIDTH * oy); - const int oOffset = NB_OUTPUTS * oPos; - // <-- - - const int channel = (output * NB_CHANNELS) / NB_OUTPUTS; - - Bias_T weightedSum = biases ? biases[output] : 0; - - for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) { - if ((PADDING_Y != 0 - || OUTPUTS_HEIGHT != OUTPUTS_HEIGHT_NOPAD) - && ((sy*DILATION_Y < syMin) || (sy*DILATION_Y >= syMax))) - { - continue; - } - - const int iPos = ix + CHANNELS_WIDTH * (iy + sy*DILATION_Y); - const int iOffset = NB_CHANNELS * iPos; - - const int wOffset = (output*KERNEL_HEIGHT + sy) - * KERNEL_WIDTH; - - if (DILATION_X == 1 && ((PADDING_X == 0 - && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD) - || sxMax - sxMin == KERNEL_WIDTH)) - { - macsOnRange<KERNEL_WIDTH, NB_CHANNELS>( - inputs + iOffset + channel, - weights + wOffset, - weightedSum); - } - else { - for (int sx = 0; sx < KERNEL_WIDTH; ++sx) { - if ((PADDING_X != 0 - || OUTPUTS_WIDTH != OUTPUTS_WIDTH_NOPAD) - && ((sx*DILATION_X < sxMin) || (sx*DILATION_X >= sxMax))) - { - continue; - } - - const int iOffsetInRange = iOffset - + sx * DILATION_X * NB_CHANNELS; - - weightedSum += inputs[iOffsetInRange + channel] - * weights[wOffset + sx]; - } - } - } - - outputs[oOffset + output] = activation_forward_value<Output_T>(weightedSum, output, ACTIVATION, rescaling); - } - } - } -} - -// Template specialization when biases are not given to the convolution -template<int NB_CHANNELS, - int CHANNELS_HEIGHT, int CHANNELS_WIDTH, - int NB_OUTPUTS, - int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH, - int PADDING_Y, int PADDING_X, - int STRIDE_Y, int STRIDE_X, - int DILATION_Y, int DILATION_X, - int KERNEL_HEIGHT, int KERNEL_WIDTH, - ActivationFunction_T ACTIVATION, - typename Input_T, typename Output_T, - typename Weight_T, - typename Rescaling_T> -__attribute__((always_inline)) inline -void convolution_depthwise_forward( - const Input_T* __restrict inputs, - Output_T* __restrict outputs, - const Weight_T* __restrict weights, - std::nullptr_t __restrict, - const Rescaling_T& __restrict rescaling) -{ - const float* b = nullptr; - - convolution_depthwise_forward<NB_CHANNELS, - CHANNELS_HEIGHT, - CHANNELS_WIDTH, - NB_OUTPUTS, - OUTPUTS_HEIGHT, - OUTPUTS_WIDTH, - PADDING_Y, - PADDING_X, - STRIDE_Y, - STRIDE_X, - DILATION_Y, - DILATION_X, - KERNEL_HEIGHT, - KERNEL_WIDTH, - ACTIVATION> - (inputs, outputs, weights, b, rescaling); -} - #endif // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__ diff --git a/aidge_export_cpp/kernels/convolution_depthwise.hpp b/aidge_export_cpp/kernels/convolution_depthwise.hpp new file mode 100644 index 0000000000000000000000000000000000000000..244dd86bc01be7142474380f1e3393ce32446aaf --- /dev/null +++ b/aidge_export_cpp/kernels/convolution_depthwise.hpp @@ -0,0 +1,164 @@ +#ifndef __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION_DEPTHWISE__ +#define __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION_DEPTHWISE__ + +#include "network/typedefs.hpp" +#include "network/rescaling_utils.hpp" +#include "network/utils.hpp" +#include "network/macs.hpp" +#include "network/activation_utils.hpp" + +template<int NB_CHANNELS, + int CHANNELS_HEIGHT, int CHANNELS_WIDTH, + int NB_OUTPUTS, + int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH, + int PADDING_Y, int PADDING_X, + int STRIDE_Y, int STRIDE_X, + int DILATION_Y, int DILATION_X, + int KERNEL_HEIGHT, int KERNEL_WIDTH, + ActivationFunction_T ACTIVATION, + typename Input_T, typename Output_T, + typename Weight_T, typename Bias_T, + typename Rescaling_T> +__attribute__((always_inline)) inline +void convolution_depthwise_forward( + const Input_T* __restrict inputs, + Output_T* __restrict outputs, + const Weight_T* __restrict weights, + const Bias_T* __restrict biases, + const Rescaling_T& __restrict rescaling) +{ + static_assert(NB_OUTPUTS % NB_CHANNELS == 0, + "NB_OUTPUTS should be a multiple of NB_CHANNELS."); + + constexpr int DILATED_KERNEL_HEIGHT + = KERNEL_HEIGHT + (DILATION_Y - 1) * (KERNEL_HEIGHT - 1); + + constexpr int DILATED_KERNEL_WIDTH + = KERNEL_WIDTH + (DILATION_X - 1) * (KERNEL_WIDTH - 1); + + constexpr int OUTPUTS_HEIGHT_NOPAD + = (CHANNELS_HEIGHT - DILATION_Y * (KERNEL_HEIGHT - 1) - 1 + STRIDE_Y) / STRIDE_Y; + constexpr int OUTPUTS_WIDTH_NOPAD + = (CHANNELS_WIDTH - DILATION_X * (KERNEL_WIDTH - 1) - 1 + STRIDE_X) / STRIDE_X; + + for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) { + const int syMin = (PADDING_Y == 0) ? 0 + : max(PADDING_Y - (oy * STRIDE_Y), 0); + const int syMax = (PADDING_Y == 0 + && OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? DILATED_KERNEL_HEIGHT + : clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y), + 0, DILATED_KERNEL_HEIGHT); + const int iy = (oy * STRIDE_Y) - PADDING_Y; + +#ifdef _OPENMP +#pragma omp parallel for collapse(2) +#endif + for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) { + for (int output = 0; output < NB_OUTPUTS; ++output) { + // moved to inner loop for collapsing --> + const int sxMin = (PADDING_X == 0) ? 0 + : max(PADDING_X - (ox * STRIDE_X), 0); + const int sxMax = (PADDING_X == 0 + && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD) + ? DILATED_KERNEL_WIDTH + : clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X), + 0, DILATED_KERNEL_WIDTH); + const int ix = (ox * STRIDE_X) - PADDING_X; + + const int oPos = (ox + OUTPUTS_WIDTH * oy); + const int oOffset = NB_OUTPUTS * oPos; + // <-- + + const int channel = (output * NB_CHANNELS) / NB_OUTPUTS; + + Bias_T weightedSum = biases ? biases[output] : 0; + + for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) { + if ((PADDING_Y != 0 + || OUTPUTS_HEIGHT != OUTPUTS_HEIGHT_NOPAD) + && ((sy*DILATION_Y < syMin) || (sy*DILATION_Y >= syMax))) + { + continue; + } + + const int iPos = ix + CHANNELS_WIDTH * (iy + sy*DILATION_Y); + const int iOffset = NB_CHANNELS * iPos; + + const int wOffset = (output*KERNEL_HEIGHT + sy) + * KERNEL_WIDTH; + + if (DILATION_X == 1 && ((PADDING_X == 0 + && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD) + || sxMax - sxMin == KERNEL_WIDTH)) + { + macsOnRange<KERNEL_WIDTH, NB_CHANNELS>( + inputs + iOffset + channel, + weights + wOffset, + weightedSum); + } + else { + for (int sx = 0; sx < KERNEL_WIDTH; ++sx) { + if ((PADDING_X != 0 + || OUTPUTS_WIDTH != OUTPUTS_WIDTH_NOPAD) + && ((sx*DILATION_X < sxMin) || (sx*DILATION_X >= sxMax))) + { + continue; + } + + const int iOffsetInRange = iOffset + + sx * DILATION_X * NB_CHANNELS; + + weightedSum += inputs[iOffsetInRange + channel] + * weights[wOffset + sx]; + } + } + } + + outputs[oOffset + output] = activation_forward_value<Output_T>(weightedSum, output, ACTIVATION, rescaling); + } + } + } +} + +// Template specialization when biases are not given to the convolution +template<int NB_CHANNELS, + int CHANNELS_HEIGHT, int CHANNELS_WIDTH, + int NB_OUTPUTS, + int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH, + int PADDING_Y, int PADDING_X, + int STRIDE_Y, int STRIDE_X, + int DILATION_Y, int DILATION_X, + int KERNEL_HEIGHT, int KERNEL_WIDTH, + ActivationFunction_T ACTIVATION, + typename Input_T, typename Output_T, + typename Weight_T, + typename Rescaling_T> +__attribute__((always_inline)) inline +void convolution_depthwise_forward( + const Input_T* __restrict inputs, + Output_T* __restrict outputs, + const Weight_T* __restrict weights, + std::nullptr_t __restrict, + const Rescaling_T& __restrict rescaling) +{ + const float* b = nullptr; + + convolution_depthwise_forward<NB_CHANNELS, + CHANNELS_HEIGHT, + CHANNELS_WIDTH, + NB_OUTPUTS, + OUTPUTS_HEIGHT, + OUTPUTS_WIDTH, + PADDING_Y, + PADDING_X, + STRIDE_Y, + STRIDE_X, + DILATION_Y, + DILATION_X, + KERNEL_HEIGHT, + KERNEL_WIDTH, + ACTIVATION> + (inputs, outputs, weights, b, rescaling); +} + +#endif // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION_DEPTHWISE__ diff --git a/aidge_export_cpp/kernels/elemwise.hpp b/aidge_export_cpp/kernels/elemwise.hpp index 67ee574c1cb7d197f3c976ce80a2a63d36aec873..9468b33f6b9785f36f511b14daffe9cc4a0ed420 100644 --- a/aidge_export_cpp/kernels/elemwise.hpp +++ b/aidge_export_cpp/kernels/elemwise.hpp @@ -2,7 +2,7 @@ #define __AIDGE_EXPORT_CPP_KERNELS_ELEMWISE__ #include "network/typedefs.hpp" -#include "kernels/activation.hpp" +#include "network/activation_utils.hpp" // Generic function for two inputs diff --git a/aidge_export_cpp/kernels/fullyconnected.hpp b/aidge_export_cpp/kernels/fullyconnected.hpp index 60805e7b90fa29ba00c6736bb8771985aeca19b4..abaab59c355263a79c905ffeb8a2a72b6e976445 100644 --- a/aidge_export_cpp/kernels/fullyconnected.hpp +++ b/aidge_export_cpp/kernels/fullyconnected.hpp @@ -2,10 +2,10 @@ #define __AIDGE_EXPORT_CPP_KERNELS_FULLYCONNECTED__ #include "network/typedefs.hpp" -#include "kernels/rescaling.hpp" +#include "network/rescaling_utils.hpp" #include "network/utils.hpp" -#include "kernels/macs.hpp" -#include "kernels/activation.hpp" +#include "network/macs.hpp" +#include "network/activation_utils.hpp" template<int NB_CHANNELS, int CHANNELS_HEIGHT, int CHANNELS_WIDTH, diff --git a/aidge_export_cpp/kernels/matmul.hpp b/aidge_export_cpp/kernels/matmul.hpp index 4500993e02cf42fb698bc9004462800bdd3f7dc4..b507c4f1e37065a620a0ac37ed370cfa6847487d 100644 --- a/aidge_export_cpp/kernels/matmul.hpp +++ b/aidge_export_cpp/kernels/matmul.hpp @@ -2,7 +2,7 @@ #define __AIDGE_EXPORT_CPP_KERNELS_MATMUL__ #include "network/typedefs.hpp" -#include "kernels/activation.hpp" +#include "network/activation_utils.hpp" // Generic function for matmul and activation diff --git a/aidge_export_cpp/kernels/pooling.hpp b/aidge_export_cpp/kernels/pooling.hpp index 30fa766abbeded7eb55caf01902c216d95a2ed17..12ac69ffcf30e72c6d854753d4d2a22b1ce4419c 100644 --- a/aidge_export_cpp/kernels/pooling.hpp +++ b/aidge_export_cpp/kernels/pooling.hpp @@ -4,6 +4,7 @@ #include "network/typedefs.hpp" #include "network/utils.hpp" #include <limits> +#include <cmath> #include <stdexcept> @@ -88,7 +89,7 @@ void pooling_forward( outputs[oOffset + output] = maxVal; } else if (POOLING_TYPE == Average) { - Output_T sum = 0; + float sum = 0; for (int sy = 0; sy < POOL_HEIGHT; ++sy) { if ((PADDING_Y != 0 @@ -115,7 +116,10 @@ void pooling_forward( } } - outputs[oOffset + output] = (Output_T) (sum / (POOL_HEIGHT * POOL_WIDTH)); + outputs[oOffset + output] = static_cast<Output_T>( + std::is_integral<Output_T>::value ? std::round(sum / (POOL_HEIGHT * POOL_WIDTH)) : sum / (POOL_HEIGHT * POOL_WIDTH) + ); + } else { throw std::runtime_error("The export only supports Max and Average pooling."); diff --git a/aidge_export_cpp/kernels/rescaling.hpp b/aidge_export_cpp/kernels/rescaling.hpp index 856010d97501f9d5c5a8cc77365e7fd335b757c0..a831fa8730dfa45384c6f251d7fe079caa015ce6 100644 --- a/aidge_export_cpp/kernels/rescaling.hpp +++ b/aidge_export_cpp/kernels/rescaling.hpp @@ -1,16 +1,26 @@ #ifndef __AIDGE_EXPORT_CPP_NETWORK_RESCALING__ #define __AIDGE_EXPORT_CPP_NETWORK_RESCALING__ +#include "network/rescaling_utils.hpp" +#include "network/activation_utils.hpp" -struct NoScaling { - - template<typename Sum_T> - Sum_T operator()(Sum_T weightedSum, unsigned int /*output*/) const - { - return weightedSum; +template<int NB_DATA, + ActivationFunction_T ACTIVATION, + typename Input_T, + typename Output_T, + typename Rescaling_T> +__attribute__((always_inline)) inline +void rescaling_forward ( + const Input_T* __restrict inputs, + Output_T* __restrict outputs, + const Rescaling_T& __restrict rescaling) +{ +#ifdef _OPENMP + #pragma omp parallel +#endif + for (int i = 0; i < NB_DATA; ++i) { + outputs[i] = activation_forward_value<Output_T>(inputs[i] , 0, ACTIVATION, rescaling); } - -}; - +} #endif // __AIDGE_EXPORT_CPP_NETWORK_RESCALING__ diff --git a/aidge_export_cpp/kernels/softmax.hpp b/aidge_export_cpp/kernels/softmax.hpp index f5472cf6d807bc2f547e58616943f6e72dccd80e..d29e9b42cba35287c71d32f211550a51b784aa12 100644 --- a/aidge_export_cpp/kernels/softmax.hpp +++ b/aidge_export_cpp/kernels/softmax.hpp @@ -3,7 +3,6 @@ #include "network/typedefs.hpp" #include "network/utils.hpp" -#include "kernels/macs.hpp" #include <type_traits> #include <cmath> diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py deleted file mode 100644 index e27dee224d1d2ad65546712f3cd9ef6edeffa1d8..0000000000000000000000000000000000000000 --- a/aidge_export_cpp/operators.py +++ /dev/null @@ -1,524 +0,0 @@ -import os -import numpy as np -from pathlib import Path -import aidge_core -from aidge_core.export_utils import ExportNode, ExportNodeCpp, generate_file -from aidge_export_cpp.utils import ROOT -from aidge_export_cpp import ExportLibCpp - -############################################## -############## Export functions ############## -############################################## -def numpy_dtype2ctype(dtype): - if dtype == np.int8: - return "int8_t" - elif dtype == np.int16: - return "int16_t" - elif dtype == np.int32: - return "int32_t" - elif dtype == np.int64: - return "int64_t" - elif dtype == np.float32: - return "float" - elif dtype == np.float64: - return "double" - # Add more dtype mappings as needed - else: - raise ValueError(f"Unsupported {dtype} dtype") - -def export_params(name: str, - array: np.ndarray, - filepath: str): - - # Get directory name of the file - dirname = os.path.dirname(filepath) - - # If directory doesn't exist, create it - if not os.path.exists(dirname): - os.makedirs(dirname) - - generate_file( - filepath, - str(ROOT / "templates" / "data" / "parameters.jinja"), - name=name, - data_t=numpy_dtype2ctype(array.dtype), - values=array.tolist() - ) - - -############################################## -############## Operators helper ############## -############################################## - -@ExportLibCpp.register("Producer", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) -class ProducerCPP(ExportNode): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - self.values = np.array(self.operator.get_output(0)) - - if len(self.values.shape) == 4: # Note: export in HWC - self.values = np.transpose(self.values, (0, 2, 3, 1)) - - def export(self, export_folder: Path): - header_path = f"include/parameters/{self.attributes['name']}.h" - export_params( - self.attributes['out_name'][0], - self.values.reshape(-1), - str(export_folder / header_path)) - return [header_path] - - def forward(self): - # A Producer does nothing during forward - return [] - -# TODO : find a way to remove this dummy exportnode -@ExportLibCpp.register("Pad2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) -class PadCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - self.attributes["padding"] = node.get_operator().attr.begin_end_borders - self.attributes["border_type"] = node.get_operator().attr.border_type - self.attributes["border_value"] = node.get_operator().attr.border_value - - assert self.attributes["border_type"] == aidge_core.pad_border_type.Constant, ( - f"export Pad2d: border_type == {node.get_operator().attr.border_type} not implemented" - ) - - self.config_template = str( - ROOT / "templates" / "configuration" / "pad_config.jinja") - self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "pad_forward.jinja") - self.include_list = [] - self.kernels_to_copy = [ - str(ROOT / "kernels" / "pad.hpp") - ] - -@ExportLibCpp.register("ReLU", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class ReLUCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - self.attributes["activation"] = "Rectifier" - self.attributes["rescaling"] = "NoScaling" - self.config_template = str( - ROOT / "templates" / "configuration" / "activation_config.jinja") - self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "activation_forward.jinja") - self.include_list = [] - self.kernels_to_copy = [ - str(ROOT / "kernels" / "activation.hpp"), - str(ROOT / "kernels" / "rescaling.hpp") - ] - -@ExportLibCpp.register("Reshape", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class ReshapeCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - self.config_template = str( - ROOT / "templates" / "configuration" / "reshape_config.jinja") - self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "reshape_forward.jinja") - self.include_list = [] - self.kernels_to_copy = [ - str(ROOT / "kernels" / "reshape.hpp"), - ] - -@ExportLibCpp.register("MatMul", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class MatMulCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - self.attributes["activation"] = "Linear" - self.attributes["rescaling"] = "NoScaling" - self.config_template = str( - ROOT / "templates" / "configuration" / "matmul_config.jinja") - self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "matmul_forward.jinja") - self.include_list = [] - self.kernels_to_copy = [ - str(ROOT / "kernels" / "matmul.hpp"), - ] - -def _setup_conv2D(conv): - """Common setup code for convolutions: Conv2D and PaddedConv2D.""" - - # If biases are not provided we set it as nullptr instead of None - if (len(conv.attributes["in_name"]) > 2 and conv.attributes["in_name"][2] is None): - conv.attributes["in_name"][2] = "nullptr" - - conv.attributes["activation"] = "Linear" - conv.attributes["rescaling"] = "NoScaling" - conv.config_template = str( - ROOT / "templates" / "configuration" / "convolution_config.jinja") - conv.forward_template = str( - ROOT / "templates" / "kernel_forward" / "convolution_forward.jinja") - conv.include_list = [] - conv.kernels_to_copy = [ - str(ROOT / "kernels" / "convolution.hpp"), - str(ROOT / "kernels" / "macs.hpp"), - str(ROOT / "kernels" / "activation.hpp"), - str(ROOT / "kernels" / "rescaling.hpp") - ] - -@ExportLibCpp.register("Conv2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class ConvCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - # No padding with Conv - # Use PaddedConv to add padding attribute - self.attributes["padding"] = [0, 0] - - _setup_conv2D(self) - -@ExportLibCpp.register_metaop("PaddedConv2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class PaddedConvCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - # TODO find a way to retrive attr for meta op - for n in self.operator.get_micro_graph().get_nodes(): - if n.type() == "Pad2D": - self.attributes["padding"] = n.get_operator( - ).attr.begin_end_borders - if n.type() == "Conv2D": - self.attributes["kernel_dims"] = n.get_operator( - ).attr.kernel_dims - self.attributes["stride_dims"] = n.get_operator( - ).attr.stride_dims - self.attributes["dilation_dims"] = n.get_operator( - ).attr.dilation_dims - - _setup_conv2D(self) - -@ExportLibCpp.register("ConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class ConvDepthWiseCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - self.attributes["depthwise"] = True - - # No padding with Conv - # Use PaddedConv to add padding attribute - self.attributes["padding"] = [0, 0] - - _setup_conv2D(self) - -@ExportLibCpp.register_metaop("PaddedConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class PaddedConvDepthWiseCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - self.attributes["depthwise"] = True - - # TODO find a way to retrive attr for meta op - for n in self.operator.get_micro_graph().get_nodes(): - if n.type() == "Pad2D": - self.attributes["padding"] = n.get_operator( - ).attr.begin_end_borders - if n.type() == "ConvDepthWise2D": - self.attributes["kernel_dims"] = n.get_operator( - ).attr.kernel_dims - self.attributes["stride_dims"] = n.get_operator( - ).attr.stride_dims - self.attributes["dilation_dims"] = n.get_operator( - ).attr.dilation_dims - - _setup_conv2D(self) - -def _setup_elemwise_op(elemwise, op): - """Common code (template and kernel setup) shared across all the different elementWise operator (Add, Sub,...).""" - - elemwise.attributes["elemwise_op"] = op - elemwise.attributes["activation"] = "Linear" - elemwise.attributes["rescaling"] = "NoScaling" - elemwise.config_template = str( - ROOT / "templates" / "configuration" / "elemwise_config.jinja") - elemwise.forward_template = str( - ROOT / "templates" / "kernel_forward" / "elemwise_forward.jinja") - elemwise.include_list = [] - elemwise.kernels_to_copy = [ - str(ROOT / "kernels" / "elemwise.hpp"), - str(ROOT / "kernels" / "activation.hpp"), - str(ROOT / "kernels" / "rescaling.hpp") - ] - -@ExportLibCpp.register("Add", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class AddCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - - _setup_elemwise_op(self, "Add") - -@ExportLibCpp.register("Sub", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class SubCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - - _setup_elemwise_op(self, "Sub") - -@ExportLibCpp.register("Mul", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class MulCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - - _setup_elemwise_op(self, "Mul") - -def _setup_pooling(pooling): - """Common code (template and kernel setup) shared across all the different pooling operator.""" - - pooling.config_template = str( - ROOT / "templates" / "configuration" / "pooling_config.jinja") - pooling.forward_template = str( - ROOT / "templates" / "kernel_forward" / "pooling_forward.jinja") - pooling.include_list = [] - pooling.kernels_to_copy = [ - str(ROOT / "kernels" / "pooling.hpp"), - str(ROOT / "kernels" / "activation.hpp"), - str(ROOT / "kernels" / "rescaling.hpp") - ] - -@ExportLibCpp.register("MaxPooling2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class MaxPoolCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - - # No padding with MaxPooling - # Use PaddedMaxPooling to add padding attribute - self.attributes["padding"] = [0, 0] - self.attributes["pool_type"] = "Max" - self.attributes["activation"] = "Linear" - - _setup_pooling(self) - -@ExportLibCpp.register("AvgPooling2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class AvgPoolCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - - # No padding with MaxPooling - # Use PaddedMaxPooling to add padding attribute - self.attributes["padding"] = [0, 0] - self.attributes["pool_type"] = "Average" - self.attributes["activation"] = "Linear" - self.attributes["rescaling"] = "NoScaling" - - _setup_pooling(self) - -@ExportLibCpp.register_metaop("PaddedMaxPooling2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class PaddedMaxPoolCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - for n in self.operator.get_micro_graph().get_nodes(): - if n.type() == "Pad2D": - self.attributes["padding"] = n.get_operator( - ).attr.begin_end_borders - if n.type() == "MaxPooling2D": - self.attributes["kernel_dims"] = n.get_operator( - ).attr.kernel_dims - self.attributes["stride_dims"] = n.get_operator( - ).attr.stride_dims - self.attributes["pool_type"] = "Max" - self.attributes["activation"] = "Linear" - - _setup_pooling(self) - -@ExportLibCpp.register("GlobalAveragePooling", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class GlobalAveragePoolCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - - self.attributes["stride_dims"] = [1, 1] - # No padding with MaxPooling - # Use PaddedMaxPooling to add padding attribute - self.attributes["padding"] = [0, 0] - self.attributes["kernel_dims"] = [ - self.attributes["in_height"][0], - self.attributes["in_width"][0], - ] - self.attributes["pool_type"] = "Average" - self.attributes["activation"] = "Linear" - - _setup_pooling(self) - -@ExportLibCpp.register("FC", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class FcCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - self.attributes["activation"] = "Linear" - self.attributes["rescaling"] = "NoScaling" - self.config_template = str( - ROOT / "templates" / "configuration" / "fullyconnected_config.jinja") - self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "fullyconnected_forward.jinja") - self.include_list = [] - self.kernels_to_copy = [ - str(ROOT / "kernels" / "fullyconnected.hpp"), - str(ROOT / "kernels" / "macs.hpp"), - str(ROOT / "kernels" / "activation.hpp"), - str(ROOT / "kernels" / "rescaling.hpp") - ] - -@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) -class TransposeCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - self.config_template = str( - ROOT / "templates" / "configuration" / "transpose_ND_config.jinja") - self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "transpose_ND_forward.jinja") - self.include_list = [] - self.kernels_to_copy = [ - str(ROOT / "kernels" / "transpose.hpp") - ] - -@ExportLibCpp.register("Softmax", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class SoftmaxCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - assert self.node.get_nb_inputs() == 1, ( - f"export softmax: nb_inputs == {self.node.get_nb_inputs()} not implemented" - ) - - tensor = self.operator.get_input(0) - nbDims = len(tensor.dims()) - axis = node.get_operator().attr.axis if node.get_operator().attr.axis >= 0 else node.get_operator().attr.axis + nbDims - - assert axis < nbDims, ( - f"export softmax: attribute axis == {node.get_operator().attr.axis} should be less than {nbDims}" - ) - - postAxisElems = 1 - for i in range(axis + 1, nbDims): - postAxisElems *= tensor.dims()[i] - - preAxisElems = 1 - for i in range(axis): - preAxisElems *= tensor.dims()[i] - - self.attributes["axis_size"] = tensor.dims()[axis] - self.attributes["axis_size_post"] = postAxisElems - self.attributes["axis_size_pre"] = preAxisElems - - self.config_template = str( - ROOT / "templates" / "configuration" / "softmax_config.jinja") - self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "softmax_forward.jinja") - self.include_list = [] - self.kernels_to_copy = [ - str(ROOT / "kernels" / "softmax.hpp"), - str(ROOT / "kernels" / "macs.hpp"), - ] - -@ExportLibCpp.register("Hardmax", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class HardmaxCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - assert self.node.get_nb_inputs() == 1, ( - f"export hardmax: nb_inputs == {self.node.get_nb_inputs()} not implemented" - ) - - tensor = self.operator.get_input(0) - nbDims = len(tensor.dims()) - axis = node.get_operator().attr.axis if node.get_operator().attr.axis >= 0 else node.get_operator().attr.axis + nbDims - - assert axis >= -nbDims and axis < nbDims, ( - f"export hardmax: attribute axis == {node.get_operator().attr.axis} should be comprised within [-{nbDims},{nbDims}]." - ) - - - post_axis_elems = 1 - for i in range(axis + 1, nbDims): - post_axis_elems *= tensor.dims()[i] - - preaxis_elems = 1 - for i in range(axis): - preaxis_elems *= tensor.dims()[i] - - axis_elems = post_axis_elems * tensor.dims()[axis] - nb_elems = preaxis_elems * axis_elems - - self.attributes["axis_dim_size"] = tensor.dims()[axis] - self.attributes["preaxis_stride"] = preaxis_elems - self.attributes["axis_stride"] = axis_elems - self.attributes["postaxis_stride"] = post_axis_elems - self.attributes["out_nb_elts"] = nb_elems - - self.config_template = str( - ROOT / "templates" / "configuration" / "hardmax_config.jinja") - self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "hardmax_forward.jinja") - self.include_list = [] - self.kernels_to_copy = [ - str(ROOT / "kernels" / "hardmax.hpp"), - str(ROOT / "kernels" / "macs.hpp"), - ] - -@ExportLibCpp.register("BatchNorm2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class BatchNorm2DCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - self.attributes["activation"] = "Linear" - self.attributes["rescaling"] = "NoScaling" - self.attributes["epsilon"] = node.get_operator().attr.epsilon - self.config_template = str( - ROOT / "templates" / "configuration" / "batchnorm_config.jinja") - self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "batchnorm_forward.jinja") - self.include_list = [] - self.kernels_to_copy = [ - str(ROOT / "kernels" / "batchnorm.hpp"), - str(ROOT / "kernels" / "macs.hpp"), - str(ROOT / "kernels" / "activation.hpp"), - str(ROOT / "kernels" / "rescaling.hpp") - ] - -@ExportLibCpp.register("Concat", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class Concat(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - assert self.node.get_nb_inputs() >= 1, ( - f"export softmax: nb_inputs == {self.node.get_nb_inputs()} not implemented" - ) - - inputIndex = 0 - - tensor = self.operator.get_input(0) - for idx, _ in enumerate(self.node.inputs()): - if self.operator.get_input(idx) is not None: - tensor = self.operator.get_input(idx) - nbDims = len(tensor.dims()) - axis = node.get_operator().attr.axis if node.get_operator().attr.axis >= 0 else node.get_operator().attr.axis + nbDims - - assert axis < nbDims, ( - f"export softmax: attribute axis == {axis} should be less than {nbDims}" - ) - - postAxisElems = 1 - for i in range(axis + 1, nbDims): - postAxisElems *= tensor.dims()[i] - - preAxisElems = 1 - for i in range(axis): - preAxisElems *= tensor.dims()[i] - - if (inputIndex == 0): - self.attributes["axis_size_post"] = postAxisElems - self.attributes["axis_size_pre"] = preAxisElems - - self.attributes["axis_size"] = [None] * self.attributes["nb_in"] - else: - assert self.attributes["axis_size_post"] == postAxisElems, ( - f"export concat: axis_size_post {self.attributes['axis_size_post']} != {postAxisElems}" - ) - assert self.attributes["axis_size_pre"] == preAxisElems, ( - f"export concat: axis_size_pre {self.attributes['axis_size_pre']} != {preAxisElems}" - ) - - self.attributes["axis_size"][idx] = tensor.dims()[axis] - else: - assert false, ( - f"export concat: input {idx} is None, not implemented") - - inputIndex += 1 - - self.config_template = str(ROOT / "templates" / "configuration" / "concat_config.jinja") - self.forward_template = str(ROOT / "templates" / "kernel_forward" / "concat_forward.jinja") - self.include_list = [] - self.kernels_to_copy = [ - str(ROOT / "kernels" / "concat.hpp"), - ] diff --git a/aidge_export_cpp/operators/BatchNorm.py b/aidge_export_cpp/operators/BatchNorm.py new file mode 100644 index 0000000000000000000000000000000000000000..b0f5a16f195fb27846db9a8727b3804d84520d12 --- /dev/null +++ b/aidge_export_cpp/operators/BatchNorm.py @@ -0,0 +1,34 @@ +import aidge_core +from aidge_core.export_utils import ExportNodeCpp +from aidge_export_cpp import ROOT +from aidge_export_cpp import ExportLibCpp + +@ExportLibCpp.register("BatchNorm2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +class BatchNorm(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Initialize kernel attributes + self.attributes["activation"] = "Linear" + self.attributes["rescaling"] = "NoScaling" + self.attributes["epsilon"] = node.get_operator().attr.epsilon + self.attributes["aidge_cmp"] = node.attributes().has_attr("aidge_cmp") + + # Template for layer configutation file generation + self.config_template = str( ROOT / "templates" / "configuration" / "batchnorm_config.jinja") + + # Template layer call function generation within the forward file + self.forward_template = str(ROOT / "templates" / "kernel_forward" / "batchnorm_forward.jinja") + + # Files to include within the generated forward.cpp file + self.include_list = [] + + # Path to the kernel(s) files to copy + self.add_kernel_to_copy(ROOT / "kernels" / "batchnorm.hpp") + self.add_kernel_to_copy(ROOT / "static" / "macs.hpp", "include/network", fwd_include=False) + + # Include aidge outputs within the fwd file + if self.attributes["aidge_cmp"]: + self.include_list.append("network/utils.hpp") # aidge_cmp function + self.include_list.append("data/aidge_outputs/" + node.name() + ".hpp") + \ No newline at end of file diff --git a/aidge_export_cpp/operators/Concat.py b/aidge_export_cpp/operators/Concat.py new file mode 100644 index 0000000000000000000000000000000000000000..ea65f8d3cd4debc01b388b71086620e6ba7b3d0b --- /dev/null +++ b/aidge_export_cpp/operators/Concat.py @@ -0,0 +1,74 @@ + +import aidge_core +from aidge_core.export_utils import ExportNodeCpp +from aidge_export_cpp import ROOT +from aidge_export_cpp import ExportLibCpp + +@ExportLibCpp.register("Concat", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +class Concat(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + self.attributes["aidge_cmp"] = node.attributes().has_attr("aidge_cmp") + + assert self.node.get_nb_inputs() >= 1, ( + f"export softmax: nb_inputs == {self.node.get_nb_inputs()} not implemented" + ) + + inputIndex = 0 + + tensor = self.operator.get_input(0) + for idx, _ in enumerate(self.node.inputs()): + if self.operator.get_input(idx) is not None: + tensor = self.operator.get_input(idx) + nbDims = len(tensor.dims()) + axis = node.get_operator().attr.axis if node.get_operator().attr.axis >= 0 else node.get_operator().attr.axis + nbDims + + assert axis < nbDims, ( + f"export softmax: attribute axis == {axis} should be less than {nbDims}" + ) + + postAxisElems = 1 + for i in range(axis + 1, nbDims): + postAxisElems *= tensor.dims()[i] + + preAxisElems = 1 + for i in range(axis): + preAxisElems *= tensor.dims()[i] + + if (inputIndex == 0): + self.attributes["axis_size_post"] = postAxisElems + self.attributes["axis_size_pre"] = preAxisElems + + self.attributes["axis_size"] = [None] * self.attributes["nb_in"] + else: + assert self.attributes["axis_size_post"] == postAxisElems, ( + f"export concat: axis_size_post {self.attributes['axis_size_post']} != {postAxisElems}" + ) + assert self.attributes["axis_size_pre"] == preAxisElems, ( + f"export concat: axis_size_pre {self.attributes['axis_size_pre']} != {preAxisElems}" + ) + + self.attributes["axis_size"][idx] = tensor.dims()[axis] + else: + assert False, ( + f"export concat: input {idx} is None, not implemented") + + inputIndex += 1 + + # Template for layer configutation file generation + self.config_template = str(ROOT / "templates" / "configuration" / "concat_config.jinja") + + # Template layer call function generation within the forward file + self.forward_template = str(ROOT / "templates" / "kernel_forward" / "concat_forward.jinja") + + # Files to include within the generated forward.cpp file + self.include_list = [] + + # Path to the kernel(s) files to copy + self.add_kernel_to_copy(ROOT / "kernels" / "concat.hpp") + + # Include aidge outputs within the fwd file + if self.attributes["aidge_cmp"]: + self.include_list.append("network/utils.hpp") # aidge_cmp function + self.include_list.append("data/aidge_outputs/" + node.name() + ".hpp") \ No newline at end of file diff --git a/aidge_export_cpp/operators/Conv.py b/aidge_export_cpp/operators/Conv.py new file mode 100644 index 0000000000000000000000000000000000000000..c8137c51377c103855b9c2d133707124ebef64c5 --- /dev/null +++ b/aidge_export_cpp/operators/Conv.py @@ -0,0 +1,81 @@ +import aidge_core +from aidge_core.export_utils import ExportNodeCpp, get_node_from_metaop +from aidge_export_cpp import ROOT, ExportLibCpp, set_scaling_attributes + +@ExportLibCpp.register("Conv2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class Conv(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Initialize kernel attributes + self.attributes["padding"] = [0, 0, 0, 0] + self.attributes["activation"] = "Linear" + self.attributes["aidge_cmp"] = node.attributes().has_attr("aidge_cmp") + + ## Scaling + self.attributes["rescaling"] = "NoScaling" + self.attributes["shift_value"] = 0 + + # Browse the metaop to update kernel attributes + ConvNode = get_node_from_metaop(node, "Conv2D") + self.attributes["kernel_dims"] = ConvNode[0].get_operator().attr.kernel_dims + self.attributes["stride_dims"] = ConvNode[0].get_operator().attr.stride_dims + self.attributes["dilation_dims"] = ConvNode[0].get_operator().attr.dilation_dims + + # Template for layer configutation file generation + self.config_template = str(ROOT / "templates" / "configuration" / "convolution_config.jinja") + + # Template layer call function generation within the forward file + self.forward_template = str(ROOT / "templates" / "kernel_forward" / "convolution_forward.jinja") + + # Files to include within the generated forward.cpp file + self.include_list = [] + + # Path to the kernel(s) files to copy + self.add_kernel_to_copy(ROOT / "kernels" / "convolution.hpp") + self.add_kernel_to_copy(ROOT / "static" / "macs.hpp", "include/network", fwd_include=False) + + # Include aidge outputs within the fwd file + if self.attributes["aidge_cmp"]: + self.include_list.append("network/utils.hpp") # aidge_cmp function + self.include_list.append("data/aidge_outputs/" + node.name() + ".hpp") + + +@ExportLibCpp.register_metaop("QConv", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class QConv(Conv): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Look for Quantizer node and set shift and coef export node attributes + set_scaling_attributes(self, node) + + ## Set the scaling type + if self.attributes["shift_value"] != 0: + self.attributes["rescaling"] = "SingleShiftScaling" + + +@ExportLibCpp.register_metaop("PadConv", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class PadConv(QConv): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Browse the metaop to update kernel attributes + PadNode = get_node_from_metaop(node, "Pad2D") + self.attributes["padding"] = PadNode[0].get_operator().attr.begin_end_borders + + +@ExportLibCpp.register_metaop("ConvAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class ConvAct(QConv): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Browse the metaop to update kernel attributes + if get_node_from_metaop(node, "ReLU"): + self.attributes["activation"] = "Rectifier" + else: + aidge_core.Log.error(f"{node.type()} activation is not yet supported.") + +@ExportLibCpp.register_metaop("PadConvAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class PadConvAct(PadConv, ConvAct): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) diff --git a/aidge_export_cpp/operators/ConvDw.py b/aidge_export_cpp/operators/ConvDw.py new file mode 100644 index 0000000000000000000000000000000000000000..936c3b6a9a6b96012d3c01a1ab6961e1adcc0c1e --- /dev/null +++ b/aidge_export_cpp/operators/ConvDw.py @@ -0,0 +1,82 @@ +import aidge_core +from aidge_core.export_utils import ExportNodeCpp, get_node_from_metaop +from aidge_export_cpp import ROOT, ExportLibCpp, set_scaling_attributes + +@ExportLibCpp.register("ConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class ConvDw(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Initialize kernel attributes + self.attributes["padding"] = [0, 0, 0, 0] + self.attributes["activation"] = "Linear" + self.attributes["depthwise"] = True + self.attributes["aidge_cmp"] = node.attributes().has_attr("aidge_cmp") + + ## Scaling + self.attributes["rescaling"] = "NoScaling" + self.attributes["shift_value"] = 0 + + # Browse the metaop to update kernel attributes + ConvDwNode = get_node_from_metaop(node, "ConvDepthWise2D") + self.attributes["kernel_dims"] = ConvDwNode[0].get_operator().attr.kernel_dims + self.attributes["stride_dims"] = ConvDwNode[0].get_operator().attr.stride_dims + self.attributes["dilation_dims"] = ConvDwNode[0].get_operator().attr.dilation_dims + + # Template for layer configutation file generation + self.config_template = str(ROOT / "templates" / "configuration" / "convolution_config.jinja") + + # Template layer call function generation within the forward file + self.forward_template = str(ROOT / "templates" / "kernel_forward" / "convolution_forward.jinja") + + # Files to include within the generated forward.cpp file + self.include_list = [] + + # Path to the kernel(s) files to copy + self.add_kernel_to_copy(ROOT / "kernels" / "convolution_depthwise.hpp") + self.add_kernel_to_copy(ROOT / "static" / "macs.hpp", "include/network", fwd_include=False) + + # Include aidge outputs within the fwd file + if self.attributes["aidge_cmp"]: + self.include_list.append("network/utils.hpp") # aidge_cmp function + self.include_list.append("data/aidge_outputs/" + node.name() + ".hpp") + + +@ExportLibCpp.register_metaop("QConvDw", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class QConvDw(ConvDw): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Look for Quantizer node and set shift and coef export node attributes + set_scaling_attributes(self, node) + + ## Set the scaling type + if self.attributes["shift_value"] != 0: + self.attributes["rescaling"] = "SingleShiftScaling" + + +@ExportLibCpp.register_metaop("PadConvDw", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class PadConvDw(QConvDw): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Browse the metaop to update kernel attributes + PadNode = get_node_from_metaop(node, "Pad2D") + self.attributes["padding"] = PadNode[0].get_operator().attr.begin_end_borders + + +@ExportLibCpp.register_metaop("ConvDwAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class ConvDwAct(QConvDw): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Browse the metaop to update kernel attributes + if get_node_from_metaop(node, "ReLU"): + self.attributes["activation"] = "Rectifier" + else: + aidge_core.Log.error(f"{node.type()} activation is not yet supported.") + +@ExportLibCpp.register_metaop("PadConvDwAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class PadConvDwAct(PadConvDw, ConvDwAct): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) diff --git a/aidge_export_cpp/operators/ElemWise.py b/aidge_export_cpp/operators/ElemWise.py new file mode 100644 index 0000000000000000000000000000000000000000..7d073ca9549aa0fb67a2c63562536a7c6808e6cd --- /dev/null +++ b/aidge_export_cpp/operators/ElemWise.py @@ -0,0 +1,116 @@ +import aidge_core +from aidge_core.export_utils import ExportNodeCpp, get_node_from_metaop +from aidge_export_cpp import ROOT, ExportLibCpp, set_scaling_attributes + +class ElemWise(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Initialize kernel attributes + self.attributes["activation"] = "Linear" + self.attributes["aidge_cmp"] = node.attributes().has_attr("aidge_cmp") + + ## Scaling + self.attributes["rescaling"] = "NoScaling" + self.attributes["shift_value"] = 0 + self.attributes["coef_value"] = 1 + + # Template for layer configutation file generation + self.config_template = str(ROOT / "templates" / "configuration" / "elemwise_config.jinja") + + # Template layer call function generation within the forward file + self.forward_template = str(ROOT / "templates" / "kernel_forward" / "elemwise_forward.jinja") + + # Files to include within the generated forward.cpp file + self.include_list = [] + + # Path to the kernel(s) files to copy + self.add_kernel_to_copy(ROOT / "kernels" / "elemwise.hpp") + + # Include aidge outputs within the fwd file + if self.attributes["aidge_cmp"]: + self.include_list.append("network/utils.hpp") # aidge_cmp function + self.include_list.append("data/aidge_outputs/" + node.name() + ".hpp") + + +class QElemWise(ElemWise): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Browse the metaop to update kernel attributes + set_scaling_attributes(self, node) + + ## Set the scaling type + if self.attributes["coef_value"] != 1: + self.attributes["rescaling"] = "FixedPointScaling" + elif self.attributes["shift_value"] != 0: + self.attributes["rescaling"] = "SingleShiftScaling" + + +@ExportLibCpp.register("Add", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class Add(ElemWise): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + self.attributes["elemwise_op"] = "Add" + + +@ExportLibCpp.register_metaop("QAdd", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class QAdd(QElemWise, Add): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + +@ExportLibCpp.register_metaop("AddAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class AddAct(QAdd): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Browse the metaop to update kernel attributes + if get_node_from_metaop(node, "ReLU"): + self.attributes["activation"] = "Rectifier" + else: + aidge_core.Log.error(f"{node.type()} activation is not yet supported.") + + +@ExportLibCpp.register("Sub", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class Sub(ElemWise): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + self.attributes["elemwise_op"] = "Sub" + + +@ExportLibCpp.register_metaop("QSub", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class QSub(QElemWise, Sub): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + +@ExportLibCpp.register_metaop("SubAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class SubAct(QSub): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Browse the metaop to update kernel attributes + if get_node_from_metaop(node, "ReLU"): + self.attributes["activation"] = "Rectifier" + else: + aidge_core.Log.error(f"{node.type()} activation is not yet supported.") + + +@ExportLibCpp.register("Mul", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class Mul(QElemWise): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + self.attributes["elemwise_op"] = "Mul" + + +@ExportLibCpp.register_metaop("MulAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class MulAct(Mul): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Browse the metaop to update kernel attributes + if get_node_from_metaop(node, "ReLU"): + self.attributes["activation"] = "Rectifier" + else: + aidge_core.Log.error(f"{node.type()} activation is not yet supported.") \ No newline at end of file diff --git a/aidge_export_cpp/operators/Fc.py b/aidge_export_cpp/operators/Fc.py new file mode 100644 index 0000000000000000000000000000000000000000..d32d20e2fad90f8418ee58067f1cd6e6c7e72065 --- /dev/null +++ b/aidge_export_cpp/operators/Fc.py @@ -0,0 +1,59 @@ +import aidge_core +from aidge_core.export_utils import ExportNodeCpp, get_node_from_metaop +from aidge_export_cpp import ROOT, ExportLibCpp, set_scaling_attributes + +@ExportLibCpp.register("FC", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class FC(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Initialize kernel attributes + self.attributes["activation"] = "Linear" + self.attributes["aidge_cmp"] = node.attributes().has_attr("aidge_cmp") + + ## Scaling + self.attributes["rescaling"] = "NoScaling" + self.attributes["shift_value"] = 0 + + # Template for layer configutation file generation + self.config_template = str(ROOT / "templates" / "configuration" / "fullyconnected_config.jinja") + + # Template layer call function generation within the forward file + self.forward_template = str(ROOT / "templates" / "kernel_forward" / "fullyconnected_forward.jinja") + + # Files to include within the generated forward.cpp file + self.include_list = [] + + # Path to the kernel(s) files to copy + self.add_kernel_to_copy(ROOT / "kernels" / "fullyconnected.hpp") + self.add_kernel_to_copy(ROOT / "static" / "macs.hpp", "include/network", fwd_include=False) + + # Include aidge outputs within the fwd file + if self.attributes["aidge_cmp"]: + self.include_list.append("network/utils.hpp") # aidge_cmp function + self.include_list.append("data/aidge_outputs/" + node.name() + ".hpp") + + +@ExportLibCpp.register_metaop("QFC", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class QFC(FC): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Browse the metaop to update kernel attributes + set_scaling_attributes(self, node) + + ## Set the scaling type + if self.attributes["shift_value"] != 0: + self.attributes["rescaling"] = "SingleShiftScaling" + + +@ExportLibCpp.register_metaop("FCAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class FCAct(QFC): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Browse the metaop to update kernel attributes + if get_node_from_metaop(node, "ReLU"): + self.attributes["activation"] = "Rectifier" + else: + aidge_core.Log.error(f"{node.type()} activation is not yet supported.") diff --git a/aidge_export_cpp/operators/Hardmax.py b/aidge_export_cpp/operators/Hardmax.py new file mode 100644 index 0000000000000000000000000000000000000000..1dabf0090165011298b60d3eb274420330e8c0f1 --- /dev/null +++ b/aidge_export_cpp/operators/Hardmax.py @@ -0,0 +1,43 @@ +import aidge_core +from aidge_core.export_utils import ExportNodeCpp +from aidge_export_cpp import ROOT, ExportLibCpp + +@ExportLibCpp.register("Hardmax", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +class HardmaxCPP(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + assert self.node.get_nb_inputs() == 1, ( + f"export hardmax: nb_inputs == {self.node.get_nb_inputs()} not implemented" + ) + + tensor = self.operator.get_input(0) + nbDims = len(tensor.dims()) + axis = node.get_operator().attr.axis if node.get_operator().attr.axis >= 0 else node.get_operator().attr.axis + nbDims + + assert axis >= -nbDims and axis < nbDims, ( + f"export hardmax: attribute axis == {node.get_operator().attr.axis} should be comprised within [-{nbDims},{nbDims}]." + ) + + post_axis_elems = 1 + for i in range(axis + 1, nbDims): + post_axis_elems *= tensor.dims()[i] + + preaxis_elems = 1 + for i in range(axis): + preaxis_elems *= tensor.dims()[i] + + axis_elems = post_axis_elems * tensor.dims()[axis] + nb_elems = preaxis_elems * axis_elems + + self.attributes["axis_dim_size"] = tensor.dims()[axis] + self.attributes["preaxis_stride"] = preaxis_elems + self.attributes["axis_stride"] = axis_elems + self.attributes["postaxis_stride"] = post_axis_elems + self.attributes["out_nb_elts"] = nb_elems + + self.config_template = str( + ROOT / "templates" / "configuration" / "hardmax_config.jinja") + self.forward_template = str( + ROOT / "templates" / "kernel_forward" / "hardmax_forward.jinja") + self.include_list = [] + self.add_kernel_to_copy(ROOT / "kernels" / "hardmax.hpp") diff --git a/aidge_export_cpp/operators/Pad.py b/aidge_export_cpp/operators/Pad.py new file mode 100644 index 0000000000000000000000000000000000000000..f84f2cff0f1df283327c1122ddb7b77049bddd3f --- /dev/null +++ b/aidge_export_cpp/operators/Pad.py @@ -0,0 +1,36 @@ +import aidge_core +from aidge_core.export_utils import ExportNodeCpp +from aidge_export_cpp import ROOT +from aidge_export_cpp import ExportLibCpp + +@ExportLibCpp.register("Pad2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class CppPad(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Initialize kernel attributes + self.attributes["padding"] = node.get_operator().attr.begin_end_borders + self.attributes["border_type"] = node.get_operator().attr.border_type + self.attributes["border_value"] = node.get_operator().attr.border_value + self.attributes["aidge_cmp"] = node.attributes().has_attr("aidge_cmp") + + assert self.attributes["border_type"] == aidge_core.pad_border_type.Constant, ( + f"export Pad2d: border_type == {node.get_operator().attr.border_type} not implemented" + ) + + # Template for layer configutation file generation + self.config_template = str(ROOT / "templates" / "configuration" / "pad_config.jinja") + + # Template layer call function generation within the forward file + self.forward_template = str(ROOT / "templates" / "kernel_forward" / "pad_forward.jinja") + + # Files to include within the generated forward.cpp file + self.include_list = [] + + # Path to the kernel(s) files to copy + self.add_kernel_to_copy(ROOT / "kernels" / "pad.hpp") + + # Include aidge outputs within the fwd file + if self.attributes["aidge_cmp"]: + self.include_list.append("network/utils.hpp") # aidge_cmp function + self.include_list.append("data/aidge_outputs/" + node.name() + ".hpp") \ No newline at end of file diff --git a/aidge_export_cpp/operators/Pool.py b/aidge_export_cpp/operators/Pool.py new file mode 100644 index 0000000000000000000000000000000000000000..10d595e5ed4a76c22bcc15f90d8c693b8dbf2144 --- /dev/null +++ b/aidge_export_cpp/operators/Pool.py @@ -0,0 +1,139 @@ +import aidge_core +from aidge_core.export_utils import ExportNodeCpp, get_node_from_metaop +from aidge_export_cpp import ROOT +from aidge_export_cpp import ExportLibCpp + +class Pool(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Initialize kernel attributes + self.attributes["stride_dims"] = [1, 1] + self.attributes["padding"] = [0, 0, 0, 0] + self.attributes["pool_type"] = "Max" + self.attributes["activation"] = "Linear" + self.attributes["aidge_cmp"] = node.attributes().has_attr("aidge_cmp") + + # Template for layer configutation file generation + self.config_template = str(ROOT / "templates" / "configuration" / "pooling_config.jinja") + + # Template layer call function generation within the forward file + self.forward_template = str(ROOT / "templates" / "kernel_forward" / "pooling_forward.jinja") + + # Files to include within the generated forward.cpp file + self.include_list = [] + + # Path to the kernel(s) files to copy + self.add_kernel_to_copy(ROOT / "kernels" / "pooling.hpp") + + # Include aidge outputs within the fwd file + if self.attributes["aidge_cmp"]: + self.include_list.append("network/utils.hpp") # aidge_cmp function + self.include_list.append("data/aidge_outputs/" + node.name() + ".hpp") + + +class PadPool(Pool): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Browse the metaop to update kernel attributes + PadNode = get_node_from_metaop(node, "Pad2D") + self.attributes["padding"] = PadNode[0].get_operator().attr.begin_end_borders + + +class PoolAct(Pool): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Browse the metaop to update kernel attributes + if get_node_from_metaop(node, "ReLU"): + self.attributes["activation"] = "Rectifier" + else: + aidge_core.Log.error(f"{node.type()} activation is not yet supported.") + + +@ExportLibCpp.register("MaxPooling2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class MaxPool(Pool): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Browse the metaop to update kernel attributes + PoolNode = get_node_from_metaop(node, "MaxPooling2D") + self.attributes["pool_type"] = "Max" + self.attributes["kernel_dims"] = PoolNode[0].get_operator().attr.kernel_dims + self.attributes["stride_dims"] = PoolNode[0].get_operator().attr.stride_dims + + +@ExportLibCpp.register_metaop("PadMaxPool", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class PadMaxPool(MaxPool, PadPool): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + +@ExportLibCpp.register_metaop("MaxPoolAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class MaxPoolAct(MaxPool, PoolAct): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + +@ExportLibCpp.register_metaop("PadMaxPoolAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class PadMaxPoolAct(PadMaxPool, MaxPoolAct): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + +@ExportLibCpp.register("AvgPooling2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class AvgPool(Pool): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Browse the metaop to update kernel attributes + PoolNode = get_node_from_metaop(node, "AvgPooling2D") + self.attributes["pool_type"] = "Average" + self.attributes["kernel_dims"] = PoolNode[0].get_operator().attr.kernel_dims + self.attributes["stride_dims"] = PoolNode[0].get_operator().attr.stride_dims + + +@ExportLibCpp.register_metaop("PadAvgPool", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class PadAvgPool(AvgPool, PadPool): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + +@ExportLibCpp.register_metaop("AvgPoolAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class AvgPoolAct(AvgPool, PoolAct): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + +@ExportLibCpp.register_metaop("PadAvgPoolAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class PadAvgPoolAct(PadAvgPool, AvgPoolAct): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + +@ExportLibCpp.register("GlobalAveragePooling", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class GlobalAvgPool(Pool): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + self.attributes["pool_type"] = "Average" + self.attributes["kernel_dims"] = [self.attributes["in_width"][0], self.attributes["in_height"][0]] + + +@ExportLibCpp.register_metaop("PadGlobalAvgPool", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class PadGlobalAvgPool(GlobalAvgPool, PadPool): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + +@ExportLibCpp.register_metaop("GlobalAvgPoolAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class GlobalAvgPoolAct(GlobalAvgPool, PoolAct): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + +@ExportLibCpp.register_metaop("PadGlobalAvgPoolAct", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class PadGlobalAvgPoolAct(PadGlobalAvgPool, GlobalAvgPoolAct): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) \ No newline at end of file diff --git a/aidge_export_cpp/operators/Producer.py b/aidge_export_cpp/operators/Producer.py new file mode 100644 index 0000000000000000000000000000000000000000..627dcb29ed21e2779a30978b5b22768c0c2cc9c4 --- /dev/null +++ b/aidge_export_cpp/operators/Producer.py @@ -0,0 +1,67 @@ +import os +from pathlib import Path +import numpy as np +import aidge_core +from aidge_core.export_utils import ExportNode, generate_file +from aidge_export_cpp import ROOT +from aidge_export_cpp import ExportLibCpp + +def numpy_dtype2ctype(dtype): + if dtype == np.int8: + return "int8_t" + elif dtype == np.int16: + return "int16_t" + elif dtype == np.int32: + return "int32_t" + elif dtype == np.int64: + return "int64_t" + elif dtype == np.float32: + return "float" + elif dtype == np.float64: + return "double" + # Add more dtype mappings as needed + else: + raise ValueError(f"Unsupported {dtype} dtype") + +def export_params(name: str, + array: np.ndarray, + filepath: str): + + # Get directory name of the file + dirname = os.path.dirname(filepath) + + # If directory doesn't exist, create it + if not os.path.exists(dirname): + os.makedirs(dirname) + + generate_file( + filepath, + str(ROOT / "templates" / "data" / "parameters.jinja"), + name=name, + data_t=numpy_dtype2ctype(array.dtype), + values=array.tolist() + ) + +@ExportLibCpp.register("Producer", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class ProducerCPP(ExportNode): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + self.values = np.array(self.operator.get_output(0)) + self.ignore = node.attributes().has_attr("ignore") + + if len(self.values.shape) == 4: # Note: export in HWC + self.values = np.transpose(self.values, (0, 2, 3, 1)) + + def export(self, export_folder: Path): + if not self.ignore : + header_path = f"include/parameters/{self.attributes['name']}.h" + export_params( + self.attributes['out_name'][0], + self.values.reshape(-1), + str(export_folder / header_path)) + return [header_path] + return [] + + def forward(self): + # A Producer does nothing during forward + return [] \ No newline at end of file diff --git a/aidge_export_cpp/operators/Quantizer.py b/aidge_export_cpp/operators/Quantizer.py new file mode 100644 index 0000000000000000000000000000000000000000..51f5c23da24e7c6a47c162314f54a15c8845fc00 --- /dev/null +++ b/aidge_export_cpp/operators/Quantizer.py @@ -0,0 +1,50 @@ +import aidge_core +from aidge_core.export_utils import ExportNodeCpp, get_node_from_metaop +from aidge_export_cpp import ROOT, ExportLibCpp, set_scaling_attributes + +@ExportLibCpp.register_metaop("Quantizer", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class Quantizer(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Initialize kernel attributes + self.attributes["activation"] = "Linear" + self.attributes["rescaling"] = "NoScaling" + self.attributes["shift_value"] = 0 + self.attributes["coef_value"] = 1 + self.attributes["aidge_cmp"] = node.attributes().has_attr("aidge_cmp") + + # Browse the metaop to update kernel attributes + if get_node_from_metaop(node, "ReLU"): + self.attributes["activation"] = "Rectifier" + + # Set scaling attributes + set_scaling_attributes(self, node) + + ## Set the scaling type + if self.attributes["coef_value"] != 1: + self.attributes["rescaling"] = "FixedPointScaling" + elif self.attributes["shift_value"] != 0: + self.attributes["rescaling"] = "SingleShiftScaling" + + # Template for layer configutation file generation + self.config_template = str(ROOT / "templates" / "configuration" / "rescaling_config.jinja") + + # Template layer call function generation within the forward file + self.forward_template = str(ROOT / "templates" / "kernel_forward" / "rescaling_forward.jinja") + + # Files to include within the generated forward.cpp file + self.include_list = [] + + # Path to the kernel(s) files to copy + self.add_kernel_to_copy(ROOT / "kernels" / "rescaling.hpp") + + # Include aidge outputs within the fwd file + if self.attributes["aidge_cmp"]: + self.include_list.append("network/utils.hpp") # aidge_cmp function + self.include_list.append("data/aidge_outputs/" + node.name() + ".hpp") + +@ExportLibCpp.register_metaop("QMul", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class QMul(Quantizer): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) diff --git a/aidge_export_cpp/operators/ReLU.py b/aidge_export_cpp/operators/ReLU.py new file mode 100644 index 0000000000000000000000000000000000000000..55e7e19425e0a5b61790b58a2d36a8f233f75228 --- /dev/null +++ b/aidge_export_cpp/operators/ReLU.py @@ -0,0 +1,49 @@ +import aidge_core +from aidge_core.export_utils import ExportNodeCpp +from aidge_export_cpp import ROOT, ExportLibCpp, set_scaling_attributes + +@ExportLibCpp.register("ReLU", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class ReLU(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Initialize kernel attributes + self.attributes["activation"] = "Rectifier" + self.attributes["aidge_cmp"] = node.attributes().has_attr("aidge_cmp") + + ## Scaling + self.attributes["rescaling"] = "NoScaling" + self.attributes["shift_value"] = 0 + self.attributes["coef_value"] = 1 + + # Template for layer configutation file generation + self.config_template = str(ROOT / "templates" / "configuration" / "activation_config.jinja") + + # Template layer call function generation within the forward file + self.forward_template = str(ROOT / "templates" / "kernel_forward" / "activation_forward.jinja") + + # Files to include within the generated forward.cpp file + self.include_list = [] + + # Path to the kernel(s) files to copy + self.add_kernel_to_copy(ROOT / "kernels" / "activation.hpp") + + # Include aidge outputs within the fwd file + if self.attributes["aidge_cmp"]: + self.include_list.append("network/utils.hpp") # aidge_cmp function + self.include_list.append("data/aidge_outputs/" + node.name() + ".hpp") + + +@ExportLibCpp.register_metaop("QReLU", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class QReLU(ReLU): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + # Browse the metaop to update kernel attributes + set_scaling_attributes(self, node) + + # Update the scaling type + if self.attributes["coef_value"] != 1: + self.attributes["rescaling"] = "FixedPointScaling" + elif self.attributes["shift_value"] != 0: + self.attributes["rescaling"] = "SingleShiftScaling" diff --git a/aidge_export_cpp/operators/Softmax.py b/aidge_export_cpp/operators/Softmax.py new file mode 100644 index 0000000000000000000000000000000000000000..aa8300867143f5cc5c7bc7ecdd4d89f8e5a792f3 --- /dev/null +++ b/aidge_export_cpp/operators/Softmax.py @@ -0,0 +1,53 @@ +import aidge_core +from aidge_core.export_utils import ExportNodeCpp +from aidge_export_cpp import ROOT +from aidge_export_cpp import ExportLibCpp + +@ExportLibCpp.register("Softmax", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +class Softmax(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + self.attributes["aidge_cmp"] = node.attributes().has_attr("aidge_cmp") + + assert self.node.get_nb_inputs() == 1, ( + f"export softmax: nb_inputs == {self.node.get_nb_inputs()} not implemented" + ) + + tensor = self.operator.get_input(0) + nbDims = len(tensor.dims()) + axis = node.get_operator().attr.axis if node.get_operator().attr.axis >= 0 else node.get_operator().attr.axis + nbDims + + assert axis < nbDims, ( + f"export softmax: attribute axis == {node.get_operator().attr.axis} should be less than {nbDims}" + ) + + postAxisElems = 1 + for i in range(axis + 1, nbDims): + postAxisElems *= tensor.dims()[i] + + preAxisElems = 1 + for i in range(axis): + preAxisElems *= tensor.dims()[i] + + # Set kernel attributes + self.attributes["axis_size"] = tensor.dims()[axis] + self.attributes["axis_size_post"] = postAxisElems + self.attributes["axis_size_pre"] = preAxisElems + + # Template for layer configutation file generation + self.config_template = str(ROOT / "templates" / "configuration" / "softmax_config.jinja") + + # Template layer call function generation within the forward file + self.forward_template = str(ROOT / "templates" / "kernel_forward" / "softmax_forward.jinja") + + # Files to include within the generated forward.cpp file + self.include_list = [] + + # Path to the kernel(s) files to copy + self.add_kernel_to_copy(ROOT / "kernels" / "softmax.hpp") + + # Include aidge outputs within the fwd file + if self.attributes["aidge_cmp"]: + self.include_list.append("network/utils.hpp") # aidge_cmp function + self.include_list.append("data/aidge_outputs/" + node.name() + ".hpp") \ No newline at end of file diff --git a/aidge_export_cpp/operators/__init__.py b/aidge_export_cpp/operators/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..37d674ac84f72d643ba1a628a86fbcde9780f4a4 --- /dev/null +++ b/aidge_export_cpp/operators/__init__.py @@ -0,0 +1,14 @@ +""" +Copyright (c) 2023 CEA-List + +This program and the accompanying materials are made available under the +terms of the Eclipse Public License 2.0 which is available at +http://www.eclipse.org/legal/epl-2.0. + +SPDX-License-Identifier: EPL-2.0 +""" +from pathlib import Path + +DIR_PATH = Path(__file__).parent +modules = [Path(module).stem for module in DIR_PATH.glob("*.py")] +__all__ = [ f for f in modules if f != "__init__"] diff --git a/aidge_export_cpp/static/activation_utils.hpp b/aidge_export_cpp/static/activation_utils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c6a1bcdc0ce289a384519673ca04a001a5ca9692 --- /dev/null +++ b/aidge_export_cpp/static/activation_utils.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include <type_traits> +#include "network/typedefs.hpp" +#include "network/utils.hpp" +#include "network/rescaling_utils.hpp" + +template<typename Output_T, typename T, + typename std::enable_if<std::is_floating_point<T>::value>::type* = nullptr> +__attribute__((always_inline)) inline +Output_T saturate (T value, int32_t /*sat*/) +{ + return value; +} + +template<typename Output_T, typename T, + typename std::enable_if<!std::is_floating_point<T>::value>::type* = nullptr> +__attribute__((always_inline)) inline +Output_T saturate (T value, uint32_t sat) +{ + if (std::is_unsigned<Output_T>::value) { + return clamp(value, T(0), (T(1) << sat) - 1); + } else { + return clamp(value, -(T(1) << (sat - 1)), (T(1) << (sat - 1)) - 1); + } +} + +template<typename Output_T, + typename Sum_T, + typename Rescaling_T> +__attribute__((always_inline)) inline +Output_T activation_forward_value (Sum_T weightedSum, + int output, + ActivationFunction_T func, + const Rescaling_T& __restrict rescaling) +{ + switch(func) { + case Linear: + case Saturation: { + break; + } + case Rectifier: { + if(weightedSum <= 0) weightedSum = 0; + break; + } + default: + // Unsupported activation function + break; + } + + // Value fixed here for now but it should be generated by + // the export module or determined by the type of Output_T + // For now only works for int8_t and uint8_t + const uint32_t NB_BITS = 8; + return saturate<Output_T>(rescaling(weightedSum, output), NB_BITS); +} diff --git a/aidge_export_cpp/kernels/macs.hpp b/aidge_export_cpp/static/macs.hpp similarity index 100% rename from aidge_export_cpp/kernels/macs.hpp rename to aidge_export_cpp/static/macs.hpp diff --git a/aidge_export_cpp/static/rescaling_utils.hpp b/aidge_export_cpp/static/rescaling_utils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4fdb321820f92f8d33e474aabc4665a99cb0d4b0 --- /dev/null +++ b/aidge_export_cpp/static/rescaling_utils.hpp @@ -0,0 +1,78 @@ +#pragma once + +// --------------------------------------------------- +// ----------------- Saturate Utils ------------------ +// --------------------------------------------------- + +static int64_t toInt64(uint32_t lo, uint32_t hi) { + return (int64_t) (((uint64_t) hi) << 32ull) | ((uint64_t) lo); +} + +static int64_t smlal(int32_t lhs, int32_t rhs, + uint32_t accumLo, uint32_t accumHi) +{ + return ((int64_t) lhs) * ((int64_t) rhs) + toInt64(accumLo, accumHi); +} + +// --------------------------------------------------- +// --------------- Scaling by Shifting --------------- +// --------------------------------------------------- + +template<int SHIFT> +struct SingleShiftScaling { + + template<typename Sum_T> + Sum_T operator()(Sum_T weightedSum, size_t /*output*/) const + { + return (SHIFT != 0) ? ((weightedSum >> (SHIFT - 1)) + 1) >> 1 // Rounding + : weightedSum; + } + + // // Shift attribute + // static const int mShift = SHIFT; + // static const Scaling_T mScalingType = SingleShift; + + // // FP Attribute + // static const int32_t mScaling = 0; + // static const int64_t mFractionalBits = 0; + +}; + +// --------------------------------------------------- +// --------------- Fixed Point Scaling --------------- +// --------------------------------------------------- + +template<int64_t SHIFT, int32_t COEF> +struct FixedPointScaling { + + template<typename Sum_T> + Sum_T operator()(Sum_T weightedSum, size_t /*output*/) const + { + return smlal(weightedSum, COEF, HALF_LO, HALF_HI) >> SHIFT; + } + + // Attributes + static const uint32_t HALF_LO = (SHIFT > 0) + ? (1ull << (SHIFT - 1)) & 0xFFFFFFFF : 0; + static const uint32_t HALF_HI = (SHIFT > 0) + ? (1ull << (SHIFT - 1)) >> 32u : 0; + + // static const int32_t mScaling = SCALING; + // static const int64_t mFractionalBits = FRACTIONAL_BITS; + // static const Scaling_T mScalingType = FixedPoint; + // static const int mShift = 0; +}; + +// --------------------------------------------------- +// ------------------- No Scaling -------------------- +// --------------------------------------------------- + +struct NoScaling { + + template<typename Sum_T> + Sum_T operator()(Sum_T weightedSum, unsigned int /*output*/) const + { + return weightedSum; + } + +}; diff --git a/aidge_export_cpp/static/include/network/typedefs.hpp b/aidge_export_cpp/static/typedefs.hpp similarity index 100% rename from aidge_export_cpp/static/include/network/typedefs.hpp rename to aidge_export_cpp/static/typedefs.hpp diff --git a/aidge_export_cpp/static/include/network/utils.hpp b/aidge_export_cpp/static/utils.hpp similarity index 53% rename from aidge_export_cpp/static/include/network/utils.hpp rename to aidge_export_cpp/static/utils.hpp index e2bfbe2f35b3522d0600f10e8481e0879338f43a..b9b739269216f6e02e05f5da3bcb3c2a8df30150 100644 --- a/aidge_export_cpp/static/include/network/utils.hpp +++ b/aidge_export_cpp/static/utils.hpp @@ -1,13 +1,17 @@ #ifndef __AIDGE_EXPORT_CPP_NETWORK_UTILS__ #define __AIDGE_EXPORT_CPP_NETWORK_UTILS__ -#ifdef SAVE_OUTPUTS +#if SAVE_OUTPUTS #include <sys/types.h> #include <sys/stat.h> #include <cstdio> // fprintf #include <type_traits> // std::is_floating_point #endif +#if AIDGE_CMP +#include <string> +#endif + /** * @brief Integer clamping * @param[in] v Value to be clamped @@ -49,7 +53,7 @@ int min (int lhs, int rhs) } -#ifdef SAVE_OUTPUTS +#if SAVE_OUTPUTS enum class Format { Default, NCHW, @@ -65,11 +69,11 @@ template<typename Output_T> inline void saveOutputs( int NB_OUTPUTS, int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH, - int OUTPUT_MEM_CONT_OFFSET, - int OUTPUT_MEM_CONT_SIZE, - int OUTPUT_MEM_WRAP_OFFSET, - int OUTPUT_MEM_WRAP_SIZE, - int OUTPUT_MEM_STRIDE, + // int OUTPUT_MEM_CONT_OFFSET, + // int OUTPUT_MEM_CONT_SIZE, + // int OUTPUT_MEM_WRAP_OFFSET, + // int OUTPUT_MEM_WRAP_SIZE, + // int OUTPUT_MEM_STRIDE, const Output_T* __restrict outputs, FILE* pFile, Format format) @@ -77,27 +81,29 @@ inline void saveOutputs( // default is NHCW ! if (format == Format::NHWC) { fprintf(pFile, "("); + auto oOffset = 0; for(int oy = 0; oy < OUTPUTS_HEIGHT; oy++) { fprintf(pFile, "("); for(int ox = 0; ox < OUTPUTS_WIDTH; ox++) { fprintf(pFile, "("); - const int oPos = (ox + OUTPUTS_WIDTH * oy); - int oOffset = OUTPUT_MEM_STRIDE * oPos; + // const int oPos = (ox + OUTPUTS_WIDTH * oy); + // int oOffset = OUTPUT_MEM_STRIDE * oPos; - if (OUTPUT_MEM_WRAP_SIZE > 0 - && oOffset >= OUTPUT_MEM_CONT_SIZE) - { - oOffset += OUTPUT_MEM_WRAP_OFFSET - OUTPUT_MEM_CONT_OFFSET - - OUTPUT_MEM_CONT_SIZE; - } + // if (OUTPUT_MEM_WRAP_SIZE > 0 + // && oOffset >= OUTPUT_MEM_CONT_SIZE) + // { + // oOffset += OUTPUT_MEM_WRAP_OFFSET - OUTPUT_MEM_CONT_OFFSET + // - OUTPUT_MEM_CONT_SIZE; + // } for (int output = 0; output < NB_OUTPUTS; output++) { if (std::is_floating_point<Output_T>::value) - fprintf(pFile, "%f", static_cast<float>(outputs[oOffset + output])); + fprintf(pFile, "%f", static_cast<float>(outputs[oOffset])); else - fprintf(pFile, "%d", static_cast<int>(outputs[oOffset + output])); + fprintf(pFile, "%d", static_cast<int>(outputs[oOffset])); + oOffset += 1; fprintf(pFile, ", "); } @@ -111,25 +117,14 @@ inline void saveOutputs( fprintf(pFile, ")\n"); } else if (format == Format::NCHW || format == Format::Default) { + auto ofst = 0; for(int output = 0; output < NB_OUTPUTS; output++) { fprintf(pFile, "%d:\n", output); for(int oy = 0; oy < OUTPUTS_HEIGHT; oy++) { for(int ox = 0; ox < OUTPUTS_WIDTH; ox++) { - const int oPos = (ox + OUTPUTS_WIDTH * oy); - int oOffset = OUTPUT_MEM_STRIDE * oPos; - if (OUTPUT_MEM_WRAP_SIZE > 0 - && oOffset >= OUTPUT_MEM_CONT_SIZE) - { - oOffset += OUTPUT_MEM_WRAP_OFFSET - - OUTPUT_MEM_CONT_OFFSET - OUTPUT_MEM_CONT_SIZE; - } - - if (std::is_floating_point<Output_T>::value) - fprintf(pFile, "%f", static_cast<float>(outputs[oOffset + output])); - else - fprintf(pFile, "%d", static_cast<int>(outputs[oOffset + output])); - + fprintf(pFile, "%d", static_cast<int>(outputs[ofst])); fprintf(pFile, " "); + ofst += 1; } fprintf(pFile, "\n"); @@ -146,4 +141,35 @@ inline void saveOutputs( } #endif // SAVE_OUTPUTS +#if AIDGE_CMP + +template<int NB_OUTPUTS, int OUT_WIDTH, int OUT_HEIGHT, typename AidgeOutput_T, typename DevOutput_T> +void aidge_cmp(std::string layer_name, AidgeOutput_T* aidge_output, DevOutput_T* dev_output) { + + printf("[AIDGE COMPARE] - %s\n", layer_name.c_str()); + + for (auto out = 0; out < NB_OUTPUTS; ++out) { + for (auto h = 0; h < OUT_HEIGHT; ++h) { + for (auto w = 0; w < OUT_WIDTH; ++w) { + const int aidge_ofst = out * OUT_HEIGHT * OUT_WIDTH + h * OUT_WIDTH + w; + const int dev_ofst = h * OUT_WIDTH * NB_OUTPUTS + w * NB_OUTPUTS + out; + if (aidge_output[aidge_ofst] != dev_output[dev_ofst]) { + if (std::is_floating_point<DevOutput_T>::value) { + printf("[ERROR] - First error detected at %dx%dx%d (out x h x w) : aidge_out = %f vs dev_out = %f\n", + out, h, w, static_cast<double>(aidge_output[aidge_ofst]), static_cast<double>(dev_output[dev_ofst])); + } else { + printf("[ERROR] - First error detected at %dx%dx%d (out x h x w) : aidge_out = %d vs dev_out = %d\n", + out, h, w, static_cast<int>(aidge_output[aidge_ofst]), static_cast<int>(dev_output[dev_ofst])); + } + printf("Abort program.\n"); + exit(1); + } + } + } + } + printf("[SUCCESS]\n\n"); +} + +#endif // AIDGE_CMP + #endif // __AIDGE_EXPORT_CPP_NETWORK_UTILS__ diff --git a/aidge_export_cpp/templates/configuration/_def_io.jinja b/aidge_export_cpp/templates/configuration/_def_io.jinja index f44454769bc66e5d15e93834b28e088525930271..314ae39a491f2bf1eafec4b0ee5aaab156d3c987 100644 --- a/aidge_export_cpp/templates/configuration/_def_io.jinja +++ b/aidge_export_cpp/templates/configuration/_def_io.jinja @@ -1,11 +1,16 @@ {# NOTE: Suppose input is first #} -// INPUT CONF +{# // INPUT CONF {% for inidx in range(nb_in) -%} #define {{ in_name[inidx]|upper }}_NB_CHANNELS {{ in_chan[inidx] }} #define {{ in_name[inidx]|upper }}_IN_HEIGHT {{ in_height[inidx] }} #define {{ in_name[inidx]|upper }}_IN_WIDTH {{ in_width[inidx] }} -#define {{ in_name[inidx]|upper }}_IN_BATCH {{ in_batch[inidx] }} -{% endfor %} +{% endfor %} #} + +// INPUT CONF +#define {{ in_name[0]|upper }}_IN_BATCH {{ in_batch[0] }} +#define {{ in_name[0]|upper }}_NB_CHANNELS {{ in_chan[0] }} +#define {{ in_name[0]|upper }}_IN_HEIGHT {{ in_height[0] }} +#define {{ in_name[0]|upper }}_IN_WIDTH {{ in_width[0] }} // OUTPUT CONF {% for outidx in range(nb_out) -%} diff --git a/aidge_export_cpp/templates/configuration/_rescaling.jinja b/aidge_export_cpp/templates/configuration/_rescaling.jinja new file mode 100644 index 0000000000000000000000000000000000000000..8f3ad3d05a63af33f911ad516e98c2e2db662e88 --- /dev/null +++ b/aidge_export_cpp/templates/configuration/_rescaling.jinja @@ -0,0 +1,7 @@ +{%- if rescaling == "NoScaling" %} +static const NoScaling {{ name|upper }}_RESCALING = {}; +{%- elif rescaling == "SingleShiftScaling" %} +static const SingleShiftScaling<{{ shift_value }}> {{ name|upper }}_RESCALING = {}; +{%- elif rescaling == "FixedPointScaling" %} +static const FixedPointScaling<{{ shift_value }}, {{ coef_value }}> {{ name|upper }}_RESCALING = {}; +{%- endif %} \ No newline at end of file diff --git a/aidge_export_cpp/templates/configuration/activation_config.jinja b/aidge_export_cpp/templates/configuration/activation_config.jinja index 1ab5b21e2915a95318b531d14964077005839ffe..84b122ba5207f0022d72f35bb4f8e7064bf7fe32 100644 --- a/aidge_export_cpp/templates/configuration/activation_config.jinja +++ b/aidge_export_cpp/templates/configuration/activation_config.jinja @@ -1,7 +1,7 @@ {#- For name header -#} #ifndef {{ name|upper }}_LAYER_H #define {{ name|upper }}_LAYER_H -#include "kernels/rescaling.hpp" +#include "network/rescaling_utils.hpp" {# For layer configuration -#} {%- set nb_data = in_chan[0] * in_height[0] * in_width[0] %} @@ -9,6 +9,6 @@ #define {{ name|upper }}_ACTIVATION {{ activation }} {% include "./_def_io.jinja" %} {% include "./_meminfo.jinja" %} -static const {{ rescaling }} {{ name|upper }}_RESCALING = {}; +{% include "./_rescaling.jinja" %} #endif /* {{ name|upper }}_LAYER_H */ diff --git a/aidge_export_cpp/templates/configuration/batchnorm_config.jinja b/aidge_export_cpp/templates/configuration/batchnorm_config.jinja index ae7ef5760a63689d11f6d7369e387b55b7cb3d15..0c0bc49b521556eee1a4e455486caae44a2b86cb 100644 --- a/aidge_export_cpp/templates/configuration/batchnorm_config.jinja +++ b/aidge_export_cpp/templates/configuration/batchnorm_config.jinja @@ -1,13 +1,13 @@ {#- For name header -#} #ifndef {{ name|upper }}_LAYER_H #define {{ name|upper }}_LAYER_H -#include "kernels/rescaling.hpp" +#include "network/rescaling_utils.hpp" {# For layer configuration -#} {% include "./_def_io.jinja" %} {% include "./_meminfo.jinja" %} #define {{ name|upper }}_ACTIVATION {{ activation }} #define {{ name|upper }}_EPSILON {{ epsilon }} -static const {{ rescaling }} {{ name|upper }}_RESCALING = {}; +{% include "./_rescaling.jinja" %} #endif /* {{ name|upper }}_LAYER_H */ diff --git a/aidge_export_cpp/templates/configuration/convolution_config.jinja b/aidge_export_cpp/templates/configuration/convolution_config.jinja index beb7de3f081dbbcdd8c5ff69864547444d1e62f5..b72df4d10f5342f661e921f4b2a7dbaf79d32e85 100644 --- a/aidge_export_cpp/templates/configuration/convolution_config.jinja +++ b/aidge_export_cpp/templates/configuration/convolution_config.jinja @@ -1,7 +1,7 @@ {#- For name header -#} #ifndef {{ name|upper }}_LAYER_H #define {{ name|upper }}_LAYER_H -#include "kernels/rescaling.hpp" +#include "network/rescaling_utils.hpp" {# For layer configuration -#} {% include "./_def_io.jinja" %} {% include "./_meminfo.jinja" %} @@ -14,10 +14,11 @@ #define {{ name|upper }}_KERNEL_HEIGHT {{ kernel_dims[0] }} #define {{ name|upper }}_KERNEL_WIDTH {{ kernel_dims[1] }} #define {{ name|upper }}_ACTIVATION {{ activation }} -static const {{ rescaling }} {{ name|upper }}_RESCALING = {}; +{% include "./_rescaling.jinja" %} {#- Calculate sizes #} -{%- set weights_size = out_chan[0] * in_chan[0] * kernel_dims[1] * kernel_dims[0] %} +{%- set weights_size = out_chan[0] * kernel_dims[1] * kernel_dims[0] if depthwise is defined + else out_chan[0] * in_chan[0] * kernel_dims[1] * kernel_dims[0] %} #define {{ name|upper }}_WEIGHTS_SIZE {{ weights_size }} #define {{ name|upper }}_BIASES_SIZE {{ out_chan[0] }} diff --git a/aidge_export_cpp/templates/configuration/elemwise_config.jinja b/aidge_export_cpp/templates/configuration/elemwise_config.jinja index 91a0be4cc4b6fc15e8b979ecd3ca01f122ebc63d..f839602fff707bc4dc30b11835846c977130cab4 100644 --- a/aidge_export_cpp/templates/configuration/elemwise_config.jinja +++ b/aidge_export_cpp/templates/configuration/elemwise_config.jinja @@ -1,7 +1,7 @@ {#- For name header -#} #ifndef {{ name|upper }}_LAYER_H #define {{ name|upper }}_LAYER_H -#include "kernels/rescaling.hpp" +#include "network/rescaling_utils.hpp" {% include "./_def_io.jinja" %} {% include "./_meminfo.jinja" %} @@ -9,5 +9,6 @@ #define {{ name|upper }}_NB_ELTS {{ in_dims[0]|join('*') }} #define {{ name|upper }}_ACTIVATION {{ activation }} #define {{ name|upper }}_ELEM_OP {{ elemwise_op }} -static const {{ rescaling }} {{ name|upper }}_RESCALING = {}; +{% include "./_rescaling.jinja" %} + #endif /* {{ name|upper }}_LAYER_H */ diff --git a/aidge_export_cpp/templates/configuration/fullyconnected_config.jinja b/aidge_export_cpp/templates/configuration/fullyconnected_config.jinja index 3c803388894935b99d60d740c7abdb0cfc853482..856d727abc11ceb6f914e9d71d286ef5882322d6 100644 --- a/aidge_export_cpp/templates/configuration/fullyconnected_config.jinja +++ b/aidge_export_cpp/templates/configuration/fullyconnected_config.jinja @@ -1,12 +1,12 @@ {#- For name header -#} #ifndef {{ name|upper }}_LAYER_H #define {{ name|upper }}_LAYER_H -#include "kernels/rescaling.hpp" +#include "network/rescaling_utils.hpp" {# For layer configuration -#} {% include "./_def_io.jinja" %} {% include "./_meminfo.jinja" %} #define {{ name|upper }}_ACTIVATION {{ activation }} -static const {{ rescaling }} {{ name|upper }}_RESCALING = {}; +{% include "./_rescaling.jinja" %} {#- Calculate sizes #} {%- set weights_size = out_chan[0] * in_chan[0] * in_height[0] * in_width[0] %} diff --git a/aidge_export_cpp/templates/configuration/matmul_config.jinja b/aidge_export_cpp/templates/configuration/matmul_config.jinja index 38316f20947fa726085bf3577ead510e6c5096f3..d0d4958e505b3208598fe387bba357a0c3d84602 100644 --- a/aidge_export_cpp/templates/configuration/matmul_config.jinja +++ b/aidge_export_cpp/templates/configuration/matmul_config.jinja @@ -1,6 +1,7 @@ {#- For name header -#} #ifndef {{ name|upper }}_LAYER_H #define {{ name|upper }}_LAYER_H +#include "network/rescaling_utils.hpp" {% include "./_def_io.jinja" %} {% include "./_meminfo.jinja" %} @@ -10,7 +11,7 @@ #define {{ name|upper }}_K {{ in_dims[0][1] }} #define {{ name|upper }}_N {{ in_dims[1][1] }} #define {{ name|upper }}_ACTIVATION {{ activation }} -static const {{ rescaling }} {{ name|upper }}_RESCALING = {}; +{% include "./_rescaling.jinja" %} {#- Calculate sizes #} diff --git a/aidge_export_cpp/templates/configuration/rescaling_config.jinja b/aidge_export_cpp/templates/configuration/rescaling_config.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6f4e3ad80e4f72d180a9add5fd7978181a71031d --- /dev/null +++ b/aidge_export_cpp/templates/configuration/rescaling_config.jinja @@ -0,0 +1,16 @@ +{#- For name header -#} +#ifndef {{ name|upper }}_LAYER_H +#define {{ name|upper }}_LAYER_H +#include "network/rescaling_utils.hpp" + +{# For layer configuration -#} +{% include "./_def_io.jinja" %} +{% include "./_meminfo.jinja" %} + +#define {{ name|upper }}_NB_DATA {{ in_chan[0] * in_height[0] * in_width[0] }} + +// Activation +#define {{ name|upper }}_ACTIVATION {{ activation }} +{% include "./_rescaling.jinja" %} + +#endif /* {{ name|upper }}_LAYER_H */ diff --git a/aidge_export_cpp/templates/configuration/transpose_ND_config.jinja b/aidge_export_cpp/templates/configuration/transpose_ND_config.jinja index e5ef4ffbf8ced740f00b5ba716348bc9cc06ca8c..8879fe0098aa81344d1a9ee855c12c9e4cb055d7 100644 --- a/aidge_export_cpp/templates/configuration/transpose_ND_config.jinja +++ b/aidge_export_cpp/templates/configuration/transpose_ND_config.jinja @@ -11,5 +11,4 @@ static constexpr unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} }; static constexpr unsigned int {{ name|upper }}_DIMS[] = { {{ in_dims[0] | join(', ') }}}; - #endif /* {{ name|upper }}_LAYER_H */ \ No newline at end of file diff --git a/aidge_export_cpp/templates/data/aidge_tensor.jinja b/aidge_export_cpp/templates/data/aidge_tensor.jinja new file mode 100644 index 0000000000000000000000000000000000000000..3f086afd0a8f9a479a6073b463cefce21fc3e752 --- /dev/null +++ b/aidge_export_cpp/templates/data/aidge_tensor.jinja @@ -0,0 +1,7 @@ +#include <stdint.h> + +static const {{ data_t }} {{ name }} +{%- for dim in dims -%} + [{{ dim }}] +{%- endfor %} = +{{ values }}; diff --git a/aidge_export_cpp/templates/kernel_forward/_aidge_cmp.jinja b/aidge_export_cpp/templates/kernel_forward/_aidge_cmp.jinja new file mode 100644 index 0000000000000000000000000000000000000000..bf8b4d7d404abb7905d24e581c20b6625b4db291 --- /dev/null +++ b/aidge_export_cpp/templates/kernel_forward/_aidge_cmp.jinja @@ -0,0 +1,8 @@ +{%- if aidge_cmp %} +#if AIDGE_CMP + aidge_cmp<{{ out_name[0] | upper }}_NB_OUTPUTS, + {{ out_name[0] | upper }}_OUT_HEIGHT, + {{ out_name[0] | upper }}_OUT_WIDTH> + ("{{ name }}", ({{out_cdtype[0]}}*) {{ out_name[0] }}_aidge, {{ out_name[0] }}); +#endif +{%- endif %} \ No newline at end of file diff --git a/aidge_export_cpp/templates/kernel_forward/_mem_offset.jinja b/aidge_export_cpp/templates/kernel_forward/_mem_offset.jinja index b85aae8f6cde13a9314b2ffef231f5dfbe416883..f3bea038c52dc1500f6a827a24557f2bc4f7dcc9 100644 --- a/aidge_export_cpp/templates/kernel_forward/_mem_offset.jinja +++ b/aidge_export_cpp/templates/kernel_forward/_mem_offset.jinja @@ -1,6 +1,3 @@ -{% filter indent(width=4, first=False) %} - -{% for outidx in range(nb_out) -%} +{%- for outidx in range(nb_out) %} {{out_cdtype[outidx]}}* {{out_name[outidx]}} = ({{out_cdtype[outidx]}}*) mem + {{out_name[outidx]|upper}}_OFFSET; -{% endfor %} -{% endfilter %} +{%- endfor %} diff --git a/aidge_export_cpp/templates/kernel_forward/_save_outputs.jinja b/aidge_export_cpp/templates/kernel_forward/_save_outputs.jinja index ddefc0c7bfbb5fc5e9298091f755b0438496fe53..6865be575a613af16fc6a88fd969525abba80d0d 100644 --- a/aidge_export_cpp/templates/kernel_forward/_save_outputs.jinja +++ b/aidge_export_cpp/templates/kernel_forward/_save_outputs.jinja @@ -1,20 +1,19 @@ -/* -#ifdef SAVE_OUTPUTS + +#if SAVE_OUTPUTS {% for outidx in range(nb_out) -%} - FILE* {{out_name[outidx]|upper}}_STREAM = fopen("outputs/{{out_name[outidx]}}.txt", "w"); + FILE* {{out_name[outidx]|upper}}_STREAM = fopen("data/export_outputs/{{out_name[outidx]}}.txt", "w"); saveOutputs<{{out_cdtype[outidx]}}>( {{out_name[outidx]|upper}}_NB_OUTPUTS, {{out_name[outidx]|upper}}_OUT_HEIGHT, {{out_name[outidx]|upper}}_OUT_WIDTH, - {{out_name[outidx]|upper}}_CONT_OFFSET, + {#- {{out_name[outidx]|upper}}_CONT_OFFSET, {{out_name[outidx]|upper}}_CONT_SIZE, {{out_name[outidx]|upper}}_WRAP_OFFSET, {{out_name[outidx]|upper}}_WRAP_SIZE, - {{out_name[outidx]|upper}}_STRIDE, + {{out_name[outidx]|upper}}_STRIDE, #} {{out_name[outidx]}}, {{out_name[outidx]|upper}}_STREAM, - Format::{{out_format[outidx]}}); + Format::NHWC); fclose({{out_name[outidx]|upper}}_STREAM); {% endfor %} #endif -*/ diff --git a/aidge_export_cpp/templates/kernel_forward/activation_forward.jinja b/aidge_export_cpp/templates/kernel_forward/activation_forward.jinja index 9a39495e268361a16ee5215ecb15c3b3b9bd9479..1dc4eb530aeafdac10b59b3b2c8a0313d4411659 100644 --- a/aidge_export_cpp/templates/kernel_forward/activation_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/activation_forward.jinja @@ -4,4 +4,5 @@ activation_forward<{{name|upper}}_NB_DATA, {{name|upper}}_ACTIVATION> ({{in_name[0]}}, {{out_name[0]}}, {{name|upper}}_RESCALING); {% include "./_save_outputs.jinja" %} +{% include "./_aidge_cmp.jinja" %} {% endfilter %} diff --git a/aidge_export_cpp/templates/kernel_forward/batchnorm_forward.jinja b/aidge_export_cpp/templates/kernel_forward/batchnorm_forward.jinja index 03fd8e89921bfa27f4eeb33b05a47b40329fa5de..69fa69e2b9cb2c7d307c476e0d756fc0f406bc81 100644 --- a/aidge_export_cpp/templates/kernel_forward/batchnorm_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/batchnorm_forward.jinja @@ -5,6 +5,10 @@ batchnorm_forward<{{ out_name[0]|upper }}_OUT_BATCH, {{ out_name[0]|upper }}_OUT_HEIGHT, {{ out_name[0]|upper }}_OUT_WIDTH, {{name|upper}}_ACTIVATION> - ({{in_name[0]}}, {{out_name[0]}}, {{in_name[1]}}, {{in_name[2]}}, {{in_name[3]}}, {{in_name[4]}}, {{name|upper}}_EPSILON, {{name|upper}}_RESCALING); + ({{in_name[0]}}, {{out_name[0]}}, + {{in_name[1]}}, {{in_name[2]}}, + {{in_name[3]}}, {{in_name[4]}}, + {{name|upper}}_EPSILON, {{name|upper}}_RESCALING); {% include "./_save_outputs.jinja" %} +{% include "./_aidge_cmp.jinja" %} {% endfilter %} diff --git a/aidge_export_cpp/templates/kernel_forward/concat_forward.jinja b/aidge_export_cpp/templates/kernel_forward/concat_forward.jinja index 7a77e904db6c18f338f93099f4f117c9285bf6fc..88cbc9a25f6e5342c2d3cc14f8e40fe452716944 100644 --- a/aidge_export_cpp/templates/kernel_forward/concat_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/concat_forward.jinja @@ -14,9 +14,9 @@ unsigned int {{ name|upper }}_SIZES[] = { concat_forward<{{ name|upper }}_AXIS_SIZE_POST, {{ name|upper }}_AXIS_SIZE_PRE, - {{ nb_in }}, - float> ( - {{ name|upper }}_INPUTS, - {{ name|upper }}_SIZES, - {{ out_name[0] }}); - {% endfilter %} + {{ nb_in }}, float> + ({{ name|upper }}_INPUTS, + {{ name|upper }}_SIZES, + {{ out_name[0] }}); + +{%- endfilter %} diff --git a/aidge_export_cpp/templates/kernel_forward/convolution_forward.jinja b/aidge_export_cpp/templates/kernel_forward/convolution_forward.jinja index 7d0af8c6f75df47825e67a8b47258c3f8469fc6a..bdde325707eeb497a93ba2084c0672bd7f7e5daa 100644 --- a/aidge_export_cpp/templates/kernel_forward/convolution_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/convolution_forward.jinja @@ -17,4 +17,5 @@ convolution{{ "_depthwise" if depthwise is defined else "" }}_forward<{{ in_name {{name|upper}}_ACTIVATION> ({{in_name[0]}}, {{out_name[0]}}, {{in_name[1]}}, {{in_name[2]}}, {{name|upper}}_RESCALING); {% include "./_save_outputs.jinja" %} +{% include "./_aidge_cmp.jinja" %} {% endfilter %} diff --git a/aidge_export_cpp/templates/kernel_forward/elemwise_forward.jinja b/aidge_export_cpp/templates/kernel_forward/elemwise_forward.jinja index f60d163dcbfd6eff75e6b66c37bc5e57cf2cfca9..1a99921c185d14f4494c923092e2c36ab684945e 100644 --- a/aidge_export_cpp/templates/kernel_forward/elemwise_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/elemwise_forward.jinja @@ -3,6 +3,10 @@ elemwise_forward<{{name|upper}}_NB_ELTS, {{name|upper}}_ELEM_OP, {{name|upper}}_ACTIVATION> - ({{out_name[0]}}, {{name|upper}}_RESCALING, {{in_name[0]}}, {{in_name[1]}}); + ({{out_name[0]}}, + {{name|upper}}_RESCALING, + {{in_name[0]}}, + {{in_name[1]}}); {% include "./_save_outputs.jinja" %} +{% include "./_aidge_cmp.jinja" %} {% endfilter %} diff --git a/aidge_export_cpp/templates/kernel_forward/fullyconnected_forward.jinja b/aidge_export_cpp/templates/kernel_forward/fullyconnected_forward.jinja index cac97de22b20c4c8e0953e0d6cb2f40a18d0cb30..9a35d799be09d1bc5b311f750e64b38656f723c1 100644 --- a/aidge_export_cpp/templates/kernel_forward/fullyconnected_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/fullyconnected_forward.jinja @@ -9,4 +9,5 @@ fullyconnected_forward<{{ in_name[0]|upper }}_NB_CHANNELS, {{name|upper}}_ACTIVATION> ({{in_name[0]}}, {{out_name[0]}}, {{in_name[1]}}, {{in_name[2]}}, {{name|upper}}_RESCALING); {% include "./_save_outputs.jinja" %} +{% include "./_aidge_cmp.jinja" %} {% endfilter %} diff --git a/aidge_export_cpp/templates/kernel_forward/leakyrelu_forward.jinja b/aidge_export_cpp/templates/kernel_forward/leakyrelu_forward.jinja index 591fafeec996f9b7dc8f52a779cda5eea8a53eae..89cf2591139ef2719d516c2fba522534dbb806c8 100644 --- a/aidge_export_cpp/templates/kernel_forward/leakyrelu_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/leakyrelu_forward.jinja @@ -3,4 +3,5 @@ leakyrelu_forward<{{name|upper}}_NB_DATA> ({{input_name}}, {{output_name}}, {{name|upper}}_ALPHA); {% include "./_save_outputs.jinja" %} +{% include "./_aidge_cmp.jinja" %} {% endfilter %} diff --git a/aidge_export_cpp/templates/kernel_forward/matmul_forward.jinja b/aidge_export_cpp/templates/kernel_forward/matmul_forward.jinja index 64b3df301794e1cb3d56170646a6b9524f18a6ab..090fbac398b207d29a1f4ebf94eca564e032a53b 100644 --- a/aidge_export_cpp/templates/kernel_forward/matmul_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/matmul_forward.jinja @@ -6,4 +6,5 @@ matmul_forward<{{name|upper}}_M, {{name|upper}}_ACTIVATION> ({{in_name[0]}}, {{in_name[1]}}, {{out_name[0]}}, {{name|upper}}_RESCALING); {% include "./_save_outputs.jinja" %} +{% include "./_aidge_cmp.jinja" %} {% endfilter %} diff --git a/aidge_export_cpp/templates/kernel_forward/pooling_forward.jinja b/aidge_export_cpp/templates/kernel_forward/pooling_forward.jinja index c730923cfc4f8b534cab85a82b4fce5161a528de..fb1f2b7e0a1b33602c93b96856533a93eeec9023 100644 --- a/aidge_export_cpp/templates/kernel_forward/pooling_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/pooling_forward.jinja @@ -16,4 +16,5 @@ pooling_forward<{{ in_name[0]|upper }}_NB_CHANNELS, {{name|upper}}_ACTIVATION> ({{in_name[0]}}, {{out_name[0]}}); {% include "./_save_outputs.jinja" %} +{% include "./_aidge_cmp.jinja" %} {% endfilter %} diff --git a/aidge_export_cpp/templates/kernel_forward/rescaling_forward.jinja b/aidge_export_cpp/templates/kernel_forward/rescaling_forward.jinja new file mode 100644 index 0000000000000000000000000000000000000000..ce4ffb869c7f99ac789311a9bc98b926253f968f --- /dev/null +++ b/aidge_export_cpp/templates/kernel_forward/rescaling_forward.jinja @@ -0,0 +1,9 @@ +{% filter indent(width=4, first=False) %} +{% include "./_mem_offset.jinja" %} +rescaling_forward<{{name|upper}}_NB_DATA, + {{name|upper}}_ACTIVATION> + ({{in_name[0]}}, + {{out_name[0]}}, + {{name|upper}}_RESCALING); +{% include "./_save_outputs.jinja" %} +{% endfilter %} diff --git a/aidge_export_cpp/templates/kernel_forward/reshape_forward.jinja b/aidge_export_cpp/templates/kernel_forward/reshape_forward.jinja index f9752bcc85255ba321082fbf5cf599f45b3ab4c4..6af8ece3428b563689f8373c0dc7560d867fff1d 100644 --- a/aidge_export_cpp/templates/kernel_forward/reshape_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/reshape_forward.jinja @@ -3,4 +3,5 @@ reshape_forward<{{name|upper}}_NB_ELTS> ({{in_name[0]}}, {{in_name[1]}}, {{out_name[0]}}); {% include "./_save_outputs.jinja" %} +{% include "./_aidge_cmp.jinja" %} {% endfilter %} diff --git a/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja b/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja index 25af5bd9a3cdab4c91d5f2f09dae9144348729db..5fc06d664563bfc576386f9fcf8cbcfe227ab5b9 100644 --- a/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja @@ -1 +1,12 @@ -transpose_ND_forward<{{in_cdtype[0]}},{{name|upper}}_NB_DIMS>({{in_name[0]}},{{name|upper}}_DIMS,{{name|upper}}_PERMUTE,{{ out_name[0]|upper }}_SIZE,{{out_name[0]}}); \ No newline at end of file +{% filter indent(width=4, first=False) %} +{% include "./_mem_offset.jinja" %} +transpose_ND_forward<{{in_cdtype[0]}}, + {{name|upper}}_NB_DIMS> + ({{in_name[0]}}, + {{name|upper}}_DIMS, + {{name|upper}}_PERMUTE, + {{out_name[0]|upper}}_SIZE, + {{out_name[0]}}); +{% include "./_save_outputs.jinja" %} +{% include "./_aidge_cmp.jinja" %} +{% endfilter %} diff --git a/aidge_export_cpp/unit_tests/test_examples.py b/aidge_export_cpp/unit_tests/test_examples.py new file mode 100644 index 0000000000000000000000000000000000000000..d3bb40636eba420b457263873ca00fd6b2d3b6a9 --- /dev/null +++ b/aidge_export_cpp/unit_tests/test_examples.py @@ -0,0 +1,48 @@ +import subprocess +import sys +import os +import pytest +from pathlib import Path + +CURRENT_DIR = Path(__file__).parent +EXAMPLES_DIR = CURRENT_DIR / "../../examples" + +# Dictionary of test cases: {id: (script_name, script_args)} +TEST_CASES = { + "lenet-no-args": ("export_LeNet/lenet.py", []), + "lenet-int8": ("export_LeNet/lenet.py", ["--dtype=int8"]), + "resnet18-no-args": ("export_ResNet18/resnet18.py", ["--mock_db"]), + "resnet18-int8": ("export_ResNet18/resnet18.py", ["--mock_db", "--dtype=int8"]) +} + +def generate_test_cases(): + """Parse TEST_CASES to provide valid pytest params. + """ + for test_id, (script, args) in TEST_CASES.items(): + yield pytest.param(script, args, id=test_id) + +@pytest.mark.parametrize(("script_name", "script_args"), generate_test_cases()) +def test_example_scripts_run_without_error(script_name, script_args): + """Basic test to verify that examples script run withoput raising an Error. + This test DO NOT check that the examples are working only that they are not broken. + """ + script_path = os.path.join(EXAMPLES_DIR, script_name) + result = subprocess.run( + [sys.executable, script_path] + script_args, # Or any lightweight args + capture_output=True, + text=True + ) + assert result.returncode == 0, f"{script_name} failed with error:\n{result.stderr}\n\nTraceback:\n{result.stdout}" + + +def main(): + import sys + + print( + f"{sys.argv[0]}: Warning: skipped: run with: pytest {sys.argv[0]}", + file=sys.stderr, +) + + +if __name__ == "__main__": + main() diff --git a/aidge_export_cpp/unit_tests/test_export.py b/aidge_export_cpp/unit_tests/test_export.py index 9215ff8cece31d829b5ecb024f2cbd32eb10bcaf..82a9a3512e0dd469a82a4effd4a44134141e1da8 100644 --- a/aidge_export_cpp/unit_tests/test_export.py +++ b/aidge_export_cpp/unit_tests/test_export.py @@ -11,6 +11,8 @@ import re import shutil from aidge_core.utils import run_command +from aidge_export_cpp import cpp_fuse_to_metaops, set_nodes_names + def initFiller(model): # Initialize parameters (weights and biases) for node in model.get_nodes(): @@ -98,6 +100,8 @@ class test_operator_export(unittest.TestCase): else: aidge_core.constant_filler(value, default_value) + # Fuse operators to match implemented cpp kernels + cpp_fuse_to_metaops(graph_view) scheduler = aidge_core.SequentialScheduler(graph_view) @@ -108,6 +112,9 @@ class test_operator_export(unittest.TestCase): scheduler.forward(data=in_tensor) + # Name the metaops + set_nodes_names(scheduler) + # Note the convention ``<op_name>_test`` is useful for gitignore to avoid pushing generated export by accident. export_folder = op_name + "_test" diff --git a/aidge_export_cpp/utils.py b/aidge_export_cpp/utils.py index 915c2c63f9dee838a0dc77ca6304cbf56720a9bf..f25e969da0c9d602cd739bfe4c2cef7f6fdc8db5 100644 --- a/aidge_export_cpp/utils.py +++ b/aidge_export_cpp/utils.py @@ -1,11 +1,5 @@ -from pathlib import Path from importlib.metadata import version -# Constants -FILE = Path(__file__).resolve() -ROOT = FILE.parents[0] - - def show_version(): version_aidge_export_cpp = version("aidge_export_cpp") print(f"Aidge Export CPP: {version_aidge_export_cpp}") diff --git a/examples/export_LeNet/.gitignore b/examples/export_LeNet/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..98ce649a943a90590bb8f4f067a3c1ac9691dcbc --- /dev/null +++ b/examples/export_LeNet/.gitignore @@ -0,0 +1,6 @@ +# Exclude export artefacts +export_lenet_int8/ +log_outputs/* +assets/* +data/* +log.txt diff --git a/examples/export_LeNet/lenet.py b/examples/export_LeNet/lenet.py new file mode 100644 index 0000000000000000000000000000000000000000..1cda87b9583a37007b61ef481d49c2bed037020f --- /dev/null +++ b/examples/export_LeNet/lenet.py @@ -0,0 +1,504 @@ +""" +lenet.py + +Run this file to export a LeNet using the Aidge CPP Export module. +""" + +import os +import shutil +import random +import aidge_core.utils +import numpy as np +import subprocess + +# Aidge Modules +import aidge_core +import aidge_onnx +import aidge_backend_cpu +import aidge_quantization +import aidge_export_cpp + +from aidge_export_cpp.export_utils import ( + cpp_fuse_to_metaops, + set_nodes_names, + set_nodes_datatypes, + exclude_unwanted_producers) + +from aidge_core.export_utils import remove_optional_inputs, get_node_from_metaop + +# Torch (Dataset) +import torch +import torch.nn.functional as F +from torch import nn +from torchvision import transforms, datasets + +# Arguments +import argparse + +supported_types = ["float32", "int8"] + +parser = argparse.ArgumentParser(description="Export the LeNet model with the aidge_export_cpp module.") +parser.add_argument("--dev", action="store_true", help="Export in dev mode") +parser.add_argument("--no_cuda", action="store_true", help="Disable USE_CUDA usage to perform inferences and training.") +parser.add_argument("--dtype", type=str, choices=supported_types, default="float32", help="Specify the targeted datatype : [int8, float32]") +parser.add_argument("--aidge_cmp", action="store_true", help="Use aidge tensor results as reference.") +parser.add_argument( + '-v', '--verbose', + action='count', + default=0, + help = ( + "Set the verbosity level of the console output." + "Use -v to increase verbosity, with the following levels in ascending ordern" + "default WARN - Only warnings and higher (WARN, ERROR, FATAL) are displayed.n" + "-v NOTICE - Notices and higher (NOTICE, WARN, ERROR, FATAL) are displayed.n" + "-vv INFO - Informational messages and higher (INFO, NOTICE, WARN, ERROR, FATAL) are displayed.n" + "-vvv DEBUG - All messages, including debug information, are displayed.n" + "Available levels in descending order of severityn" + "DEBUG < INFO < NOTICE < WARN < ERROR < FATAL." + ) +) +args = parser.parse_args() + +USE_CUDA = not args.no_cuda + +# Setting Aidge verbose level +if args.verbose == 0: + aidge_core.Log.set_console_level(aidge_core.Level.Error) +elif args.verbose == 1: + aidge_core.Log.set_console_level(aidge_core.Level.Notice) +elif args.verbose == 2: + aidge_core.Log.set_console_level(aidge_core.Level.Info) +elif args.verbose >= 3: + aidge_core.Log.set_console_level(aidge_core.Level.Debug) + +if USE_CUDA: + import aidge_backend_cuda + +# ------------------------------------------------------------ +# EXPORT CONFIG +# ------------------------------------------------------------ + +""" +Export configuration details : +- RNG_SEED : Fix a random seed for torch to always get the same images from the dataset, + therefore always getting the same output. +- NB_TEST : Number of example inferences to perform (used to get an accuracy approximation). +- NB_CALIB : Number of samples used for the calibration step of quantization. +- MODEL_NAME : Should be the same name as the onnx file you want to load and export. +- DO_EXAMPLES : Perform example inferences (and allow to get accuracy approximation) +- NB_BITS : Quantization output precision. Should be 8 to work with this export. +- TARGET_TYPE : The aidge datatype for tensors to be casted after the quantization step [float64, float32, int32]. +- OPTIM_SIGN : Quantization optional optimization based on data sign. +- SINGLE_SHIFT : Quantization option specifying if inserted scaling nodes should be + single shift or floating point. +- NO_QUANT : Skip the quantization step. +- CLIPPING : Clipping method during quantization. +- FOLD_GRAPH : The quantization step adds cast nodes to cast the graph into the given TARGET_TYPE. + Enabling the FOLD_GRAPH will automatically fold these nodes into the following + ones at the end of quantization step. +- USE_CUDA : Determine if the quantization step uses the GPU. It is generally recommended + to enable this option if you have access to GPUs as the quantization step + may take a while to complete. +- DEV_MODE : The dev mode allows to identify errors more easily exporting the model with + symbolic links enabling to modify the source files directly in the + generated export (make sure you installed the export plugin running + `pip install -e .`). + Enabled running this python file, adding the --dev argument. +- AIDGE_CMP : Saves and export the outputs generated by the aidge inferences in order + to compare it with the export outputs. + Enabled running this python file, adding the --aidge_cmp argument. +""" + +print(" Available backends : ", aidge_core.Tensor.get_available_backends()) + +quantize_model = False +NB_BITS = 32 +TARGET_TYPE = aidge_core.dtype.float32 + +if args.dtype == "float32": + quantize_model = False +elif args.dtype == "int8": + quantize_model = True + NB_BITS = 8 + TARGET_TYPE = aidge_core.dtype.int32 # int8 not yet available +else: + print(f"[ERROR] Datatype '{args.dtype}' not supported.") + print(f"[ERROR] Supported datatypes : {supported_types}.") + exit(1) + +RNG_SEED = 1234 +NB_TEST = 10 # Example inferences +NB_CALIB = 20 # Calibration set +MODEL_NAME = 'lenet' +EXPORT_FOLDER = f"export_{MODEL_NAME}_int8" +DO_EXAMPLES = True + +# Quantization params +OPTIM_SIGN = False +SINGLE_SHIFT = True +ROUNDING = True +NO_QUANT = False +CLIPPING = aidge_quantization.Clipping.MSE # 'MAX' +FOLD_GRAPH = True + +# Export modes +DEV_MODE = args.dev +AIDGE_CMP = args.aidge_cmp + +print('\n RNG_SEED = ', RNG_SEED) +print(' MODEL_NAME = ', MODEL_NAME) +print(' NB_TEST = ', NB_TEST) +print(' NB_CALIB = ', NB_CALIB) +print(' NB_BITS = ', NB_BITS) +print(' OPTIM_SIGN = ', OPTIM_SIGN) +print(' NO_QUANT = ', NO_QUANT) +print(' CLIPPING = ', CLIPPING) +print(' SINGLE_SHIFT = ', SINGLE_SHIFT) +print(' USE_CUDA = ', USE_CUDA) +print(' DEV_MODE = ', DEV_MODE) + +torch.manual_seed(RNG_SEED) +random.seed(RNG_SEED) + +backend = "cuda" if USE_CUDA else "cpu" + +# ------------------------------------------------------------ +# CREATE THE LENET MODEL +# ------------------------------------------------------------ +""" +The LeNet model is created and trained using the create_lenet file. +If a lenet.onnx file is already present in the current folder, this step will be skiped. +The generated network is not yet quantized. +""" +# Define the target path and filename +file_url = "https://huggingface.co/EclipseAidge/LeNet/resolve/main/lenet_mnist.onnx?download=true" +file_path = MODEL_NAME + "_mnist.onnx" +aidge_core.utils.download_file(file_path, file_url) + +# -------------------------------------------------------------- +# CREATE THE SAMPLES +# -------------------------------------------------------------- + +transform = transforms.ToTensor() +test_set = datasets.MNIST(root='./data', train=False, transform=transform, download=True) + +tensors = [] +labels = [] +index = 0 +for in_tensor, label in test_set: + array = np.array(in_tensor) + array = np.reshape(array, (1, 1, 28, 28)) + tensor = aidge_core.Tensor(array) + tensor.set_backend(backend) + tensor.set_datatype(aidge_core.dtype.float32) + tensors.append(tensor) + labels.append(label) + index += 1 + if (index == max(NB_TEST, NB_CALIB)): + break + +# -------------------------------------------------------------- +# LOAD THE MODEL +# -------------------------------------------------------------- + +""" +Load the .onnx model and perform some usual graph modifications : + - Remove the flatten nodes; + - Fuse the batchnorm nodes into the biases producers. + - Expand the metaOperators to perform the desired fusions. +""" + +model = aidge_onnx.load_onnx(file_path, verbose=False) +aidge_core.remove_flatten(model) +aidge_core.fuse_batchnorm(model) +aidge_core.expand_metaops(model) +model.save("imported_model") + +# -------------------------------------------------------------- +# SET UP THE AIDGE SCHEDULER +# -------------------------------------------------------------- + +""" +The scheduler is an ordered version of the model, allowing to schedule +nodes to be able to run inferences, for instance. +""" + +# Set up the backend +model.set_datatype(aidge_core.dtype.float32) +model.set_backend(backend) + +# Create the Scheduler +scheduler = aidge_core.SequentialScheduler(model) + +# -------------------------------------------------------------- +# RUN SOME EXAMPLES INFERENCES +# -------------------------------------------------------------- + +def propagate(model, scheduler, tensor): + """ + Propagate the given tensor into the model and return the + output tensor. + """ + print(f"Propagate: {tensor.backend()}") + # Run the inference + scheduler.forward(True, [tensor]) + # Gather the results + output_node = model.get_output_nodes().pop() + output_tensor = output_node.get_operator().get_output(0).clone() + output_tensor.set_backend("cpu") + return np.array(output_tensor) + +accuracy = 0 +if (DO_EXAMPLES): + print('\n EXAMPLE INFERENCES :') + nb_valid = 0 + base_values = [] + for i in range(NB_TEST): + print(f"Inférence: {tensors[i].backend()}") + output_array = propagate(model, scheduler, tensors[i]) + print(labels[i], ' VS ', np.argmax(output_array), ' -> ', np.max(output_array)) + base_values.append(np.max(output_array)) + if (labels[i] == np.argmax(output_array)): + nb_valid += 1 + accuracy = nb_valid / NB_TEST + print('\n MODEL ACCURACY = ', accuracy * 100, '%') + +# -------------------------------------------------------------- +# PERFORM THE QUANTIZATION +# -------------------------------------------------------------- + +if quantize_model: + aidge_quantization.quantize_network( + network = model, + nb_bits = NB_BITS, + calibration_set = tensors[0:NB_CALIB], + clipping_mode = CLIPPING, + target_type = TARGET_TYPE, + no_quant = NO_QUANT, + optimize_signs = OPTIM_SIGN, + single_shift = SINGLE_SHIFT, + use_cuda = USE_CUDA, + fold_graph = FOLD_GRAPH) + +# Tag the scaling producers +for node in model.get_nodes(): + if node.type() == "Quantizer": + for SNode in get_node_from_metaop(node, "BitShift"): + SNode.get_parent(1).attributes().shift_prod = True + for CNode in get_node_from_metaop(node, "Mul"): + CNode.get_parent(1).attributes().coef_prod = True + +model.save("post_ptq_model") + +# -------------------------------------------------------------- +# RESCALE THE INPUT SAMPLES +# -------------------------------------------------------------- + +""" +Once the quantization is done, the graph now only accepts integer inputs. +So we need to rescale the dataset for the data to be within [0, 255]. +Also, tensors should be casted to be the same type as TARGET_TYPE. +""" + +if quantize_model: + rescaling = 2**(NB_BITS-1)-1 + for i in range(NB_TEST): + tensors[i].set_backend("cpu") + array = np.array(tensors[i]) * rescaling + array = np.round(array).astype(int) + tensors[i] = aidge_core.Tensor(array) + tensors[i].set_datatype(TARGET_TYPE) + tensors[i].set_backend("cpu") + # Setting modele to CPU for export + model.set_backend("cpu") + + +# -------------------------------------------------------------- +# GENERATE NEW SCHEDULER +# -------------------------------------------------------------- + +""" +Each time the graph has been change, it has to be reset. +Here some Quantizer and Cast nodes have been added. +""" + +""" [Issue] +We need first to manually add an input tensor with the correct datatype, +as it is not automatically done in PTQ. +""" +if quantize_model: + input_node = model.get_ordered_inputs()[0] + input_node[0].get_operator().set_input(0, tensors[0]) + scheduler.reset_scheduling() + +# -------------------------------------------------------------- +# PERFORM THE EXAMPLE INFERENCES AGAIN +# -------------------------------------------------------------- + +if (DO_EXAMPLES and quantize_model): + print('\n QUANTIZED EXAMPLE INFERENCES:') + nb_valid = 0 + post_values = [] + for i in range(NB_TEST): + print(f"QEI: {tensors[i].backend()}") + output_array = propagate(model, scheduler, tensors[i]) + print(labels[i], ' VS ', np.argmax(output_array), ' -> ', np.max(output_array)) + post_values.append(np.max(output_array)) + if (labels[i] == np.argmax(output_array)): + nb_valid += 1 + + quant_accuracy = nb_valid / NB_TEST + print('\n MODEL ACCURACY = ', accuracy * 100, '%') + print('\n QUANTIZED ACCURACY = ', quant_accuracy * 100, '%') + + +# -------------------------------------------------------------- +# FUSE NODES INTO METAOPS +# -------------------------------------------------------------- + +""" +Here is made the link between the Aidge model and the CPP +kernels implementation. In aidge, all the nodes calculations +are performed separately (Pad -> Conv -> Quantizer -> ReLU -> ...). + +However within the CPP export, some core operators are merged +in meta operators. For instance, the padding, scaling and ReLU are +performed within the Conv kernel. + +In this step, we use graph regex techniques to find the desired patterns +within the graph in order to match the export implementation of the kernels. +""" + +# Exclude unwanted producers +""" +Before fusing the nodes, we set a tag on the Producers in order to exclude +from the export the ones holding coefficients, as they are directly handled +within the layers parameters. +""" +exclude_unwanted_producers(model) + +# Fuse nodes +cpp_fuse_to_metaops(model) + +# Remove optional inputs +""" +Some optional inputs may be added by the quantization step (for instance with the clipping nodes). +Here we make sure that they will not be considered as actual graph inputs by the export, by +excluding them from the ordered_inputs list of the model. +""" +remove_optional_inputs(model) + +# Reset scheduler to apply graph modifications +""" +The scheduler always needs to be reset after graph manipulation. +""" +scheduler.reset_scheduling() + +# Name newly created MetaOps +""" +As names are optional in Aidge, the fuse_to_metaops function will not automatically +give a name to the newly created metaOps. However, in an export context, we need +our operators to be named, as this will be used to name the corresponding files. +""" + +scheduler.generate_scheduling() # Scheduler needs to be generated as it has just been reset +set_nodes_names(scheduler) + +# -------------------------------------------------------------- +# LOG OUTPUTS FOR THE FIRST IMAGE OF THE TEST DATASET +# -------------------------------------------------------------- + +""" +Here a final inference is made on the input we want to export and run. +This will ensure that all the feature maps tensors (between the layers) +hold the data corresponding to this specific input. +Then, the "log_outputs()" function (called later) will store these tensors +into log files that may be exported as well for comparison purpose. +""" + +output_array = propagate(model, scheduler, tensors[0]) + +print("### Exported Sample ###") +print("Aidge prediction :", np.argmax(output_array), "(" + str(np.max(output_array)) + ")") +print("Label :", labels[0]) + +# -------------------------------------------------------------- +# HANDLING DATATYPE +# -------------------------------------------------------------- + +""" +Now, despite the quantization stage, all the tensors of the model are +still "virtually" in Int32. Before exporting the model, we have to set +tensors' datatypes to Int8, except for biases which should remain in Int32. +""" + +if quantize_model: + set_nodes_datatypes(model) + +# Store tensors values into log files +""" +Once the tensors have been casted, the log_outputs() function can be +called to store their values into log files. +""" + +if os.path.isdir("log_outputs"): + shutil.rmtree("log_outputs") +model.log_outputs("log_outputs") + +# -------------------------------------------------------------- +# TEST MODE +# -------------------------------------------------------------- + +""" +The test mode is mainly used for validation and benchmark. The model will be +exported in a way that each layer's result will be compared with the CPU implementation. +The timings for each layer will be displayed. +In case of error, you will be able to enter debug mode, showing in-layer data or +changing the inputs of the layer, to isolate the source of the issue. +""" + +for node in model.get_nodes(): + node.attributes().dev_mode = DEV_MODE + +# -------------------------------------------------------------- +# AIDGE CMP +# -------------------------------------------------------------- + +""" +If the --aidge_cmp option is enabled, the feature maps generated by aidge with the +backend cpu will be exported in the generated export. It will be used as reference +to verify that the results with the optimized kernels are correct for the exported +model. +This option has to be passed to each node in order to be used within the Export Nodes. +(JConv, JPad, ...) that you can find in the "operators" folder. +""" + +if AIDGE_CMP: + for node in model.get_nodes(): + node.attributes().aidge_cmp = True + +# -------------------------------------------------------------- +# EXPORT THE MODEL +# -------------------------------------------------------------- + +model.save("exported_model") + +aidge_export_cpp.export(EXPORT_FOLDER, + model, + scheduler, + # tensors[0], + labels = aidge_core.Tensor(labels[0]), + dev_mode = DEV_MODE, + aidge_cmp = AIDGE_CMP) +print("\n### Compiling the export ###") +try: + for std_line in aidge_core.utils.run_command(["make"], cwd=EXPORT_FOLDER): + print(std_line, end="") +except subprocess.CalledProcessError as e: + raise RuntimeError(0, f"An error occurred, failed to build export.") from e +print("\n### Running the export ###") +try: + for std_line in aidge_core.utils.run_command(["./bin/run_export"], cwd=EXPORT_FOLDER): + print(std_line, end="") +except subprocess.CalledProcessError as e: + raise RuntimeError(0, f"An error occurred, failed to run export.") from e \ No newline at end of file diff --git a/examples/export_ResNet18/.gitignore b/examples/export_ResNet18/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..a6e4e9706bae169c65e15f7a2a8c090fe21618c7 --- /dev/null +++ b/examples/export_ResNet18/.gitignore @@ -0,0 +1,6 @@ +# Exclude export artefacts +export_resnet18_int8/ +log_outputs/* +assets/* +data/* +log.txt diff --git a/examples/export_ResNet18/resnet18.py b/examples/export_ResNet18/resnet18.py new file mode 100644 index 0000000000000000000000000000000000000000..81e335567194a83794bd4be8c2dcbd8056fa3ebf --- /dev/null +++ b/examples/export_ResNet18/resnet18.py @@ -0,0 +1,580 @@ +""" +resnet.py + +This file allows the generation of a resnet18 CPP export. + +In order for this file to work properly, you should first download the imagenet dataset +(search for "ILSVRC2012"). +""" + +import random +import numpy as np +import os +import shutil +from PIL import Image +import requests +from pathlib import Path +import subprocess +from random import randint +# Aidge Modules +import aidge_core +import aidge_onnx +import aidge_backend_cpu +import aidge_quantization +import aidge_export_cpp + +from aidge_export_cpp.export_utils import ( + cpp_fuse_to_metaops, + exclude_unwanted_producers, + set_nodes_names, + set_nodes_datatypes, + normalize) + +from aidge_core.export_utils import remove_optional_inputs, get_node_from_metaop + +# Torch (Dataset) +import torch +from torchvision import transforms +# Arguments +import argparse + +supported_types = ["float32", "int8"] + +parser = argparse.ArgumentParser(description="Export the ResNet18 model with the aidge_export_cpp module.") +parser.add_argument("--dev", action="store_true", help="Export in dev mode") +parser.add_argument("--no_cuda", action="store_true", help="Disable USE_CUDA usage to perform inferences and training.") +parser.add_argument("--dtype", type=str, choices=supported_types, default="float32", help="Specify the targeted datatype : [int8, float32]") +parser.add_argument("--aidge_cmp", action="store_true", help="Use aidge tensor results as reference.") +parser.add_argument( + '-v', '--verbose', + action='count', + default=0, + help = ( + "Set the verbosity level of the console output." + "Use -v to increase verbosity, with the following levels in ascending ordern" + "default WARN - Only warnings and higher (WARN, ERROR, FATAL) are displayed.n" + "-v NOTICE - Notices and higher (NOTICE, WARN, ERROR, FATAL) are displayed.n" + "-vv INFO - Informational messages and higher (INFO, NOTICE, WARN, ERROR, FATAL) are displayed.n" + "-vvv DEBUG - All messages, including debug information, are displayed.n" + "Available levels in descending order of severityn" + "DEBUG < INFO < NOTICE < WARN < ERROR < FATAL." + ) +) + +parser.add_argument("--mock_db", action="store_true", help="Use a mock database instead of real one (TEST ONLY).") + + +parser.add_argument( + "--imagenet_path", + type=str, + default="/database/ILSVRC2012/val", + help="Path to the ImageNet validation images folder (default: /database/ILSVRC2012/val)" +) + +parser.add_argument( + "--imagenet_labels", + type=str, + default="/database/ILSVRC2012/val.txt", + help="Path to the file containing validation image labels (default: /database/ILSVRC2012/val.txt)" +) + +args = parser.parse_args() + +USE_CUDA = not args.no_cuda + +# Setting Aidge verbose level +if args.verbose == 0: + aidge_core.Log.set_console_level(aidge_core.Level.Error) +elif args.verbose == 1: + aidge_core.Log.set_console_level(aidge_core.Level.Notice) +elif args.verbose == 2: + aidge_core.Log.set_console_level(aidge_core.Level.Info) +elif args.verbose >= 3: + aidge_core.Log.set_console_level(aidge_core.Level.Debug) + +if USE_CUDA: + import aidge_backend_cuda + +# ------------------------------------------------------------ +# EXPORT CONFIG +# ------------------------------------------------------------ + +""" +Export configuration details : +- RNG_SEED : Fix a random seed for torch to always get the same images from the dataset, + therefore always getting the same output. +- NB_TEST : Number of example inferences to perform (used to get an accuracy approximation). +- NB_CALIB : Number of samples used for the calibration step of quantization. +- MODEL_NAME : Should be the same name as the onnx file you want to load and export. +- DO_EXAMPLES : Perform example inferences (and allow to get accuracy approximation) +- NB_BITS : Quantization output precision. Should be 8 to work with this export. +- TARGET_TYPE : The aidge datatype for tensors to be casted after the quantization step. +- OPTIM_SIGN : Quantization optional optimization based on data sign. +- SINGLE_SHIFT : Quantization option specifying if inserted scaling nodes should be + single shift or floating point. +- NO_QUANT : Skip the quantization step. Should be set to False. +- CLIPPING : Clipping method during quantization. +- FOLD_GRAPH : The quantization step adds cast nodes to cast the graph into the given TARGET_TYPE. + Enabling the FOLD_GRAPH will automatically fold these nodes into the following + ones at the end of quantization step. +- USE_CUDA : Determine if the quantization step uses the GPU. It is generally recommended + to enable this option if you have access to GPUs as the quantization step + may take a while to complete. +- DEV_MODE : The dev mode allows to identify errors more easily export the model with + symbolic links enabling to modify the source files directly in the + generated export (make sure you installed the export plugin running + `pip install -e .`). + Enabled running this python file, adding the --test argument. +- AIDGE_MODE : Saves and export the outputs generated by the aidge inferences in order + to compare it with the export outputs. + Enabled running this python file, adding the --aidge_cmp argument. +""" + +print(" Available backends : ", aidge_core.Tensor.get_available_backends()) + +quantize_model = False +NB_BITS = 32 +TARGET_TYPE = aidge_core.dtype.float32 + +if args.dtype == "float32": + quantize_model = False +elif args.dtype == "int8": + quantize_model = True + NB_BITS = 8 + TARGET_TYPE = aidge_core.dtype.int32 # int8 not yet available +else: + print(f"[ERROR] Datatype '{args.dtype}' not supported.") + print(f"[ERROR] Supported datatypes : {supported_types}.") + exit(1) + +RNG_SEED = 1234 +NB_TEST = 20 # Test set +NB_CALIB = 20 # Calibration set +MODEL_NAME = 'resnet18' +EXPORT_FOLDER = f"export_{MODEL_NAME}_int8" +DO_EXAMPLES = True + +# Quantization params +OPTIM_SIGN = False +SINGLE_SHIFT = True +ROUNDING = True +NO_QUANT = False +CLIPPING = aidge_quantization.Clipping.MSE # 'MAX' +FOLD_GRAPH = True + +# Export modes +DEV_MODE = args.dev +AIDGE_CMP = args.aidge_cmp + +# Path to databases +IMAGENET_PATH = args.imagenet_path # Path to ImageNet database +LABEL_PATH = args.imagenet_labels # File containing labels of image of val folder (Look for val.txt) +########################### + +def print_cfg(): + print("") + print(' RNG_SEED = ', RNG_SEED) + print(' MODEL_NAME = ', MODEL_NAME) + print(' NB_TEST = ', NB_TEST) + print(' NB_CALIB = ', NB_CALIB) + print(' NB_BITS = ', NB_BITS) + print(' OPTIM_SIGN = ', OPTIM_SIGN) + print(' NO_QUANT = ', NO_QUANT) + print(' CLIPPING = ', CLIPPING) + print(' SINGLE_SHIFT = ', SINGLE_SHIFT) + print(' TARGET_TYPE = ', TARGET_TYPE) + print(' FOLD_GRAPH = ', FOLD_GRAPH) + print(' USE_CUDA = ', USE_CUDA) + print(' DEV_MODE = ', DEV_MODE) + print(' IMAGENET_PATH = ', IMAGENET_PATH) + print(' LABEL_PATH = ', LABEL_PATH) + print(' MOCK_DB = ', args.mock_db) + +print_cfg() + +torch.manual_seed(RNG_SEED) +random.seed(RNG_SEED) +np.random.seed(RNG_SEED) + +backend = "cuda" if USE_CUDA else "cpu" +aidge_tensors = [] +labels = [] +if args.mock_db: + for i in range(NB_TEST): + aidge_tensor = aidge_core.Tensor(dims=(1, 3, 224, 224)) + aidge_tensor.set_backend(backend) + aidge_tensor.set_datatype(aidge_core.dtype.float32) + aidge_core.uniform_filler(aidge_tensor, -1.0, 1.0) + aidge_tensors.append(aidge_tensor) + labels.append(randint(1, 1000)) +else: + image_label_pairs = [] + with open(LABEL_PATH, 'r') as f: + for line in f: + parts = line.strip().split() + if len(parts) == 2: + image_name, label = parts + image_label_pairs.append((image_name, int(label))) + + np.random.seed(RNG_SEED) + NB_SELECT = max(NB_TEST, NB_CALIB) # Check that NB_TEST and NB_CALIB are fixed + selected_pairs = image_label_pairs[:NB_SELECT] + + # -------------------------------------------------------------- + # CREATE THE SAMPLES + # -------------------------------------------------------------- + + transform_val = transforms.Compose([transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + tensors = [] + labels = [] + paths = [] + index = 0 + + for image_name, label in selected_pairs: + image_path = os.path.join(IMAGENET_PATH, image_name) + if os.path.exists(image_path): + try: + image = Image.open(image_path) + if image.mode != 'RGB': + image = image.convert('RGB') + tensor = transform_val(image) + tensors.append(tensor) + labels.append(label) + paths.append(image_path) + except Exception as e: + print(f"Error with image {image_path}: {e}") + + + for tensor in tensors: + array = tensor.numpy() + array = np.reshape(array, (1, 3, 224, 224)) + array = normalize(array) + aidge_tensor = aidge_core.Tensor(array) + aidge_tensor.set_backend(backend) + aidge_tensor.set_datatype(aidge_core.dtype.float32) + aidge_tensors.append(aidge_tensor) + + +# -------------------------------------------------------------- +# LOAD THE MODEL +# -------------------------------------------------------------- + +""" +Load the .onnx model and perform some usual graph modifications : + - Remove the flatten nodes; + - Fuse the batchnorm nodes into the biases producers. + - Expand the metaOperators to perform the desired fusions. +""" + +# Define the target path and filename +file_url = "https://huggingface.co/EclipseAidge/resnet18/resolve/main/resnet18_imagenet_1k.onnx?download=true" +file_path = Path(MODEL_NAME + "_imagenet_1k.onnx") + +aidge_core.utils.download_file(file_path, file_url) + +model = aidge_onnx.load_onnx(file_path, verbose=False) + +model.save("imported_model") +aidge_core.remove_flatten(model) +aidge_core.fuse_batchnorm(model) +aidge_core.expand_metaops(model) +model.save("imported_model_fused_bn") + +# -------------------------------------------------------------- +# SET UP THE AIDGE SCHEDULER +# -------------------------------------------------------------- + +""" +The scheduler is an ordered version of the model, allowing to schedule +nodes to be able to run inferences, for instance. +""" + +# Set up the backend +model.set_datatype(aidge_core.dtype.float32) +model.set_backend(backend) + +# Create the Scheduler +scheduler = aidge_core.SequentialScheduler(model) + +# -------------------------------------------------------------- +# RUN SOME EXAMPLES INFERENCES +# -------------------------------------------------------------- + +def propagate(model, scheduler, aidge_tensor): + """ Propagate the given tensor into the model + """ + # Run the inference + scheduler.forward(True, [aidge_tensor]) + # Gather the results + output_node = model.get_output_nodes().pop() + output_tensor = output_node.get_operator().get_output(0).clone() + output_tensor.set_backend("cpu") + return np.array(output_tensor) + +accuracy = 0 +if (DO_EXAMPLES): + print('\n EXAMPLE INFERENCES :') + nb_valid = 0 + base_values = [] + for i in range(NB_TEST): + output_array = propagate(model, scheduler, aidge_tensors[i]) + print(labels[i], ' VS ', np.argmax(output_array), ' -> ', np.max(output_array)) + base_values.append(np.max(output_array)) + if (labels[i] == np.argmax(output_array)): + nb_valid += 1 + accuracy = nb_valid / NB_TEST + print('\n MODEL ACCURACY = ', accuracy * 100, '%') + + +#-------------------------------------------------------------- +# PERFORM THE QUANTIZATION +# -------------------------------------------------------------- + +if quantize_model: + aidge_quantization.quantize_network( + network = model, + nb_bits = NB_BITS, + calibration_set = aidge_tensors[0:NB_CALIB], + clipping_mode = CLIPPING, + target_type = TARGET_TYPE, + no_quant = NO_QUANT, + optimize_signs = OPTIM_SIGN, + single_shift = SINGLE_SHIFT, + use_cuda = USE_CUDA, + fold_graph = FOLD_GRAPH) + +# Tag the scaling producers +for node in model.get_nodes(): + if node.type() == "Quantizer": + for SNode in get_node_from_metaop(node, "BitShift"): + SNode.get_parent(1).attributes().shift_prod = True + for CNode in get_node_from_metaop(node, "Mul"): + CNode.get_parent(1).attributes().coef_prod = True + +model.save("post_ptq_model") + +# -------------------------------------------------------------- +# RESCALE THE INPUT SAMPLES +# -------------------------------------------------------------- + +""" +Once the quantization is done, the graph now only accepts integer inputs. +So we need to rescale the dataset for the data to be within [0, 255]. +Also, tensors should be casted to be the same type as TARGET_TYPE. +""" +if quantize_model: + rescaling = 2**(NB_BITS-1)-1 + for i in range(max(NB_TEST, NB_CALIB)): + array = np.array(aidge_tensors[i]) * rescaling + array = np.round(array).astype(int) + aidge_tensors[i] = aidge_core.Tensor(array) + aidge_tensors[i].set_datatype(TARGET_TYPE) + aidge_tensors[i].set_backend("cpu") + # Setting modele to CPU for export + model.set_backend("cpu") + +# -------------------------------------------------------------- +# GENERATE NEW SCHEDULER +# -------------------------------------------------------------- + +""" +Each time the graph has been change, it has to be reset. +Here some Quantizer and Cast nodes have been added. +""" + +""" [Issue] +We need first to manually add an input tensor with the correct datatype, +as it is not automatically done in PTQ. +""" +if quantize_model: + input_node = model.get_ordered_inputs()[0] + input_node[0].get_operator().set_input(0, aidge_tensors[0]) + scheduler.reset_scheduling() + +# -------------------------------------------------------------- +# PERFORM THE EXAMPLE INFERENCES AGAIN +# -------------------------------------------------------------- + +model.save("post_ptq") + +if (DO_EXAMPLES and quantize_model): + print('\n QUANTIZED EXAMPLE INFERENCES :') + nb_valid = 0 + post_values = [] + for i in range(NB_TEST): + output_array = propagate(model, scheduler, aidge_tensors[i]) + print(labels[i], ' VS ', np.argmax(output_array), ' -> ', np.max(output_array)) + post_values.append(np.max(output_array)) + if (labels[i] == np.argmax(output_array)): + nb_valid += 1 + + quant_accuracy = nb_valid / NB_TEST + print('\n MODEL ACCURACY = ', accuracy * 100, '%') + print('\n QUANTIZED ACCURACY = ', quant_accuracy * 100, '%') + + output_array = propagate(model, scheduler, aidge_tensors[0]) + +if USE_CUDA: + model.set_backend("cpu") + for aidge_tensor in aidge_tensors: + aidge_tensor.set_backend("cpu") + +# -------------------------------------------------------------- +# FUSE NODES INTO METAOPS +# -------------------------------------------------------------- + +""" +Here is made the link between the Aidge model and the CPP +kernels implementation. In aidge, all the nodes calculations +are performed separately (Pad -> Conv -> Quantizer -> ReLU -> ...). + +However within the CPP export, some core operators are merged +in meta operators. For instance, the padding, scaling and ReLU are +performed within the Conv kernel. + +In this step, we use graph regex techniques to find the desired patterns +within the graph in order to match the export implementation of the kernels. +""" + +# Exclude unwanted producers +""" +Before fusing the nodes, we set a tag on the Producers in order to exclude +from the export the ones holding coefficients, as they are directly handled +within the layers parameters. +""" +exclude_unwanted_producers(model) + +# Fuse nodes +cpp_fuse_to_metaops(model) + +# Remove optional inputs +""" +Some optional inputs may be added by the quantization step (for instance with the clipping nodes). +Here we make sure that they will not be considered as actual graph inputs by the export, by +excluding them from the ordered_inputs list of the model. +""" +remove_optional_inputs(model) + +# Reset scheduler to apply graph modifications +""" +The scheduler always needs to be reset after graph manipulation. +""" +scheduler.reset_scheduling() + +# Name newly created MetaOps +""" +As names are optional in Aidge, the fuse_to_metaops function will not automatically +give a name to the newly created metaOps. However, in an export context, we need +our operators to be named, as this will be used to name the corresponding files. +""" +scheduler.generate_scheduling() # Scheduler needs to be generated as it has just been reset +set_nodes_names(scheduler) + +# -------------------------------------------------------------- +# LOG OUTPUTS FOR THE LAST IMAGE OF THE TEST DATASET +# -------------------------------------------------------------- + +""" +Here a final inference is made on the input we want to export and run. +This will ensure that all the feature maps tensors (between the layers) +hold the data corresponding to this specific input. +Then, the "log_outputs()" function (called later) will store these tensors +into log files that may be exported as well for comparison purpose. +""" + +output_array = propagate(model, scheduler, aidge_tensors[0]) + +print("### Exported Sample ###") +print("Aidge prediction after quantization :", np.argmax(output_array), "(" + str(np.max(output_array)) + ")") +print("Label :", labels[0]) + +# -------------------------------------------------------------- +# HANDLING DATATYPE +# -------------------------------------------------------------- + +""" +Now, despite the quantization stage, all the tensors of the model are +still "virtually" in Int32. Before exporting the model, we have to set +tensors' datatypes to Int8, except for biases which should remain in Int32. +""" +if quantize_model: + set_nodes_datatypes(model) + +# Store tensors values into log files +""" +Once the tensors has been casted, the log_outputs() function can be +called to store their values into log files. +""" + +if os.path.isdir("log_outputs"): + shutil.rmtree("log_outputs") +model.log_outputs("log_outputs") + +# -------------------------------------------------------------- +# TEST MODE +# -------------------------------------------------------------- + +""" +The test mode is mainly used for validation and benchmark. The model will be +exported in a way that each layer's result will be compared with the CPU implementation. +The timings for each layer will be displayed. +In case of error, you will be able to enter debug mode, showing in-layer data or +changing the inputs of the layer, to isolate the source of the issue. +""" + +for node in model.get_nodes(): + node.attributes().dev_mode = DEV_MODE + +# -------------------------------------------------------------- +# AIDGE CMP +# -------------------------------------------------------------- + +""" +If the --aidge_cmp option is enabled, the feature maps generated by aidge with the +backend cpu will be exported in the generated export. It will be used as reference +to verify that the results with the optimized kernels are correct for the exported +model. +This option has to be passed to each node in order to be used within the Export Nodes. +(JConv, JPad, ...) that you can find in the "export_gen/operator_export" folder. +""" + +if AIDGE_CMP: + for node in model.get_nodes(): + node.attributes().aidge_cmp = True + +# -------------------------------------------------------------- +# EXPORT THE MODEL +# -------------------------------------------------------------- + +model.save("exported_model") +inputs_tensor = aidge_core.Tensor(np.array(aidge_tensors[0])) +inputs_tensor.set_data_format(aidge_core.dformat.nchw) # Init the dataformat (default -> nchw) +inputs_tensor.set_data_format(aidge_core.dformat.nhwc) # Transpose the data (nchw -> nhwc) +if args.dtype == "int8": + inputs_tensor.set_datatype(aidge_core.dtype.int8) + +aidge_export_cpp.export(EXPORT_FOLDER, + model, + scheduler, + labels = aidge_core.Tensor(labels[0]), + inputs_tensor=inputs_tensor, + dev_mode = DEV_MODE, + aidge_cmp = AIDGE_CMP) + +print("\n### Compiling the export ###") +try: + for std_line in aidge_core.utils.run_command(["make"], cwd=EXPORT_FOLDER): + print(std_line, end="") +except subprocess.CalledProcessError as e: + raise RuntimeError(0, f"An error occurred, failed to build export.") from e +print("\n### Running the export ###") +try: + for std_line in aidge_core.utils.run_command(["./bin/run_export"], cwd=EXPORT_FOLDER): + print(std_line, end="") +except subprocess.CalledProcessError as e: + raise RuntimeError(0, f"An error occurred, failed to run export.") from e \ No newline at end of file