From 29a0fec1af61ea5fb77cf63e5ebf36b0d6273e82 Mon Sep 17 00:00:00 2001 From: cmoineau <cyril.moineau@cea.fr> Date: Thu, 12 Dec 2024 09:38:50 +0000 Subject: [PATCH 01/22] Update export_cpp with https://gitlab.eclipse.org/eclipse/aidge/aidge_core/-/merge_requests/277 --- aidge_export_cpp/__init__.py | 2 +- aidge_export_cpp/utils/__init__.py | 25 ++++++------------------- pyproject.toml | 17 +++++++++++------ setup.cfg | 4 ++++ 4 files changed, 22 insertions(+), 26 deletions(-) create mode 100644 setup.cfg diff --git a/aidge_export_cpp/__init__.py b/aidge_export_cpp/__init__.py index 99df130..16c9be6 100644 --- a/aidge_export_cpp/__init__.py +++ b/aidge_export_cpp/__init__.py @@ -10,7 +10,7 @@ import aidge_core from aidge_export_cpp.utils import ROOT -from ._version import * +# from ._version import * from .export import * diff --git a/aidge_export_cpp/utils/__init__.py b/aidge_export_cpp/utils/__init__.py index 0728388..5b15131 100644 --- a/aidge_export_cpp/utils/__init__.py +++ b/aidge_export_cpp/utils/__init__.py @@ -1,27 +1,14 @@ from pathlib import Path -import os +from importlib.metadata import version # Constants FILE = Path(__file__).resolve() ROOT = FILE.parents[1] -OPERATORS_REGISTRY = {} +def show_version(): + version_aidge_export_cpp = version("aidge_export_cpp") + print(f"Aidge Export CPP: {version_aidge_export_cpp}") -def operator_register(*args): - - key_list = [arg for arg in args] - - def decorator(operator): - class Wrapper(operator): - def __init__(self, *args, **kwargs): - return operator(*args, **kwargs) - - for key in key_list: - OPERATORS_REGISTRY[key] = operator - - return Wrapper - return decorator - -def supported_operators(): - return list(OPERATORS_REGISTRY.keys()) +def get_project_version()->str: + return version("aidge_export_cpp") diff --git a/pyproject.toml b/pyproject.toml index 870f193..b3d85aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ dependencies = [ requires-python = ">= 3.7" readme = "README.md" license = { file = "LICENSE" } -classifiers = [ +classifiers = [ "Development Status :: 2 - Pre-Alpha", "Intended Audience :: Developers", "Intended Audience :: Education", @@ -26,7 +26,14 @@ classifiers = [ "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Software Development" ] -dynamic = ["version"] # defined in tool.setuptools_scm +dynamic = ["version"] # defined by pbr + +[project.urls] +Homepage = "https://www.deepgreen.ai/en/platform" +Documentation = "https://eclipse-aidge.readthedocs.io/en/latest/" +Repository = "https://gitlab.eclipse.org/eclipse/aidge/aidge_export_cpp" +Issues = "https://gitlab.eclipse.org/eclipse/aidge/aidge_export_cpp/-/issues/" +Changelog = "https://gitlab.eclipse.org/eclipse/aidge/aidge_export_cpp/-/releases" [project.optional-dependencies] test = ["pytest"] @@ -34,7 +41,7 @@ test = ["pytest"] [build-system] requires = [ "setuptools>=64", - "setuptools_scm[toml]==7.1.0" + "pbr" ] build-backend = "setuptools.build_meta" @@ -47,6 +54,4 @@ include = ["aidge_export_cpp"] # package names should match these glob patterns namespaces = false # to disable scanning PEP 420 namespaces (true by default) [tool.setuptools.exclude-package-data] aidge_export_cpp = ["unit_tests*"] # exclude unit_tests which may be included as data -# SETUPTOOLS_SCM -[tool.setuptools_scm] -write_to = "aidge_export_cpp/_version.py" + diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..bb5e124 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,4 @@ + +# pbr file +[metadata] +version = file: version.txt -- GitLab From d7d6330bdfadb2e09fe541ddff1f8c9315e679c3 Mon Sep 17 00:00:00 2001 From: cmoineau <cyril.moineau@cea.fr> Date: Thu, 2 Jan 2025 14:07:54 +0000 Subject: [PATCH 02/22] Fix import path + remove deprecated functions. --- aidge_export_cpp/__init__.py | 8 +- aidge_export_cpp/export.py | 128 +----------------- aidge_export_cpp/export_registry.py | 2 +- aidge_export_cpp/operators.py | 17 ++- .../{utils/__init__.py => utils.py} | 2 +- aidge_export_cpp/utils/converter.py | 18 --- aidge_export_cpp/utils/generation.py | 51 ------- 7 files changed, 25 insertions(+), 201 deletions(-) rename aidge_export_cpp/{utils/__init__.py => utils.py} (93%) delete mode 100644 aidge_export_cpp/utils/converter.py delete mode 100644 aidge_export_cpp/utils/generation.py diff --git a/aidge_export_cpp/__init__.py b/aidge_export_cpp/__init__.py index 16c9be6..4eff598 100644 --- a/aidge_export_cpp/__init__.py +++ b/aidge_export_cpp/__init__.py @@ -2,15 +2,9 @@ r""" Aidge Export for CPP standalone projects """ +from .utils import ROOT from .export_registry import ExportLibCpp - from .operators import * from collections import defaultdict -import aidge_core - -from aidge_export_cpp.utils import ROOT - -# from ._version import * - from .export import * diff --git a/aidge_export_cpp/export.py b/aidge_export_cpp/export.py index ebac7a8..42bf90f 100644 --- a/aidge_export_cpp/export.py +++ b/aidge_export_cpp/export.py @@ -1,131 +1,15 @@ -import re -import os -import numpy as np - import aidge_core - -from aidge_core.export_utils.code_generation import * -from aidge_core.mem_info import compute_default_mem_info - -from aidge_export_cpp.utils import ROOT -from aidge_export_cpp.utils.converter import numpy_dtype2ctype from aidge_export_cpp import ExportLibCpp -from aidge_export_cpp.utils.generation import * -# from aidge_export_cpp.memory import * - - -def generate_input_file(export_folder:str, - array_name:str, - array: np.ndarray): - - # If directory doesn't exist, create it - if not os.path.exists(export_folder): - os.makedirs(export_folder) - - generate_file( - file_path=f"{export_folder}/{array_name}.h", - template_path=str(ROOT / "templates" / "data" / "inputs.jinja"), - dims = array.shape, - data_t = numpy_dtype2ctype(array.dtype), - name = array_name, - values = array.tolist() - ) - def export(export_folder_name, graphview, scheduler, mem_wrapping=False): + print("Warning: This function is deprecated, check tutorial https://eclipse.dev/aidge/source/Tutorial/export_cpp.html to find the new way to generate a C++ export.") aidge_core.export_utils.scheduler_export( scheduler, export_folder_name, ExportLibCpp, - memory_manager=compute_default_mem_info + memory_manager=aidge_core.mem_info.generate_optimized_memory_info, + memory_manager_args={ + "stats_folder": f"{export_folder_name}/stats", + "wrapping": mem_wrapping + } ) - - # export_folder = Path().absolute() / export_folder_name - - # os.makedirs(str(export_folder), exist_ok=True) - - # dnn_folder = export_folder / "dnn" - # os.makedirs(str(dnn_folder), exist_ok=True) - - # list_actions = [] - # list_configs = [] - # peak_mem, mem_info = compute_default_mem_info(scheduler) - # list_forward_nodes = scheduler.get_static_scheduling() - - # for node in list_forward_nodes: - # if ExportLibCpp.exportable(node): - # op = ExportLibCpp.get_export_node(node)(node, mem_info[node]) - # # For configuration files - # list_configs = op.export(dnn_folder, list_configs) - - # # For forward file - # list_actions = op.forward(list_actions) - # else: - # raise RuntimeError(f"Operator not supported: {node.type()} !") - - # # Memory management - # # stats_folder = export_folder / "statistics" - # # os.makedirs(str(stats_folder), exist_ok=True) - # # mem_size, mem_info = generate_optimized_memory_info(stats_folder, scheduler, mem_wrapping) - # # peak_mem, mem_info = compute_default_mem_info(scheduler) - - # # Generate the memory file - # # generate_file( - # # str(dnn_folder / "memory" / "mem_info.h"), - # # str(ROOT / "templates" / "memory" / "mem_info.jinja"), - # # mem_size = mem_size, - # # mem_info_legends = MEMORY_INFO_TEMPLATE, - # # mem_info = mem_info - # # ) - # # list_configs.append("memory/mem_info.h") - - # # Get entry nodes - # # Store the datatype & name - # list_inputs_name = [] - # for node in graphview.get_input_nodes(): - # for idx, node_input_tuple in enumerate(node.inputs()): - # node_input, _ = node_input_tuple - # if node_input is None: - # export_type = aidge2c(node.get_operator().get_output(0).dtype()) - # list_inputs_name.append((export_type, f"{node.name()}_input_{idx}")) - # elif node_input not in graphview.get_nodes(): - # export_type = aidge2c(node_input.get_operator().get_output(0).dtype()) - # list_inputs_name.append((export_type, node_input.name())) - - - # # Get output nodes - # # Store the datatype & name, like entry nodes - # list_outputs_name = [] - # for node in graphview.get_nodes(): - # if len(node.get_children()) == 0: - # export_type = aidge2c(node.get_operator().get_output(0).dtype()) - # list_outputs_name.append((export_type, f"{node.name()}_output_0")) - - # # Generate forward file - # # TODO: for now the mem type is bound for all intermediate results, should change. - # # Note that we may have all inputs constants, hence select output type - # assert len(list_outputs_name) >= 1, f"TODO: requires some output to determine mem type" - # mem_ctype = list_outputs_name[0][0] - # generate_file( - # str(dnn_folder / "src" / "forward.cpp"), - # str(ROOT / "templates" / "network" / "network_forward.jinja"), - # headers=set(list_configs), - # actions=list_actions, - # inputs= list_inputs_name, - # outputs=list_outputs_name, - # mem_ctype=mem_ctype, - # peak_mem=peak_mem - # ) - - # # Generate dnn API - # generate_file( - # str(dnn_folder / "include" / "dnn.hpp"), - # str(ROOT / "templates" / "network" / "dnn_header.jinja"), - # libraries=[], - # functions=get_functions_from_c_file(str(dnn_folder / "src" / "forward.cpp")), - # ) - - # # Copy all static files in the export - # shutil.copy(str(ROOT / "static" / "main.cpp"), str(export_folder)) - # shutil.copy(str(ROOT / "static" / "Makefile"), str(export_folder)) - # shutil.copytree(str(ROOT / "static" / "include"), str(dnn_folder / "include"), dirs_exist_ok=True) diff --git a/aidge_export_cpp/export_registry.py b/aidge_export_cpp/export_registry.py index f1aa83b..876e4ff 100644 --- a/aidge_export_cpp/export_registry.py +++ b/aidge_export_cpp/export_registry.py @@ -1,5 +1,5 @@ from aidge_core.export_utils import ExportLib -from aidge_export_cpp.utils import ROOT +from aidge_export_cpp import ROOT class ExportLibCpp(ExportLib): _name="export_cpp" diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py index 9654a20..f04dbb3 100644 --- a/aidge_export_cpp/operators.py +++ b/aidge_export_cpp/operators.py @@ -4,12 +4,27 @@ from pathlib import Path import aidge_core from aidge_core.export_utils import ExportNode, ExportNodeCpp, generate_file from aidge_export_cpp.utils import ROOT -from aidge_export_cpp.utils.converter import numpy_dtype2ctype from aidge_export_cpp import ExportLibCpp ############################################## ############## Export functions ############## ############################################## +def numpy_dtype2ctype(dtype): + if dtype == np.int8: + return "int8_t" + elif dtype == np.int16: + return "int16_t" + elif dtype == np.int32: + return "int32_t" + elif dtype == np.int64: + return "int64_t" + elif dtype == np.float32: + return "float" + elif dtype == np.float64: + return "double" + # Add more dtype mappings as needed + else: + raise ValueError(f"Unsupported {dtype} dtype") def export_params(name: str, array: np.ndarray, diff --git a/aidge_export_cpp/utils/__init__.py b/aidge_export_cpp/utils.py similarity index 93% rename from aidge_export_cpp/utils/__init__.py rename to aidge_export_cpp/utils.py index 5b15131..915c2c6 100644 --- a/aidge_export_cpp/utils/__init__.py +++ b/aidge_export_cpp/utils.py @@ -3,7 +3,7 @@ from importlib.metadata import version # Constants FILE = Path(__file__).resolve() -ROOT = FILE.parents[1] +ROOT = FILE.parents[0] def show_version(): diff --git a/aidge_export_cpp/utils/converter.py b/aidge_export_cpp/utils/converter.py deleted file mode 100644 index d4af124..0000000 --- a/aidge_export_cpp/utils/converter.py +++ /dev/null @@ -1,18 +0,0 @@ -import numpy as np - -def numpy_dtype2ctype(dtype): - if dtype == np.int8: - return "int8_t" - elif dtype == np.int16: - return "int16_t" - elif dtype == np.int32: - return "int32_t" - elif dtype == np.int64: - return "int64_t" - elif dtype == np.float32: - return "float" - elif dtype == np.float64: - return "double" - # Add more dtype mappings as needed - else: - raise ValueError(f"Unsupported {dtype} dtype") diff --git a/aidge_export_cpp/utils/generation.py b/aidge_export_cpp/utils/generation.py deleted file mode 100644 index 4478ef7..0000000 --- a/aidge_export_cpp/utils/generation.py +++ /dev/null @@ -1,51 +0,0 @@ -import re -import os -import shutil -from jinja2 import Environment, FileSystemLoader - - -def get_functions_from_c_file(file_path): - functions = [] - pattern = r'\w+\s+(\w+)\s*\(([^)]*)\)\s*{' - - with open(file_path, 'r') as file: - file_content = file.read() - - matches = re.findall(pattern, file_content) - for match in matches: - function_name = match[0] - arguments = match[1].split(',') - arguments = [arg.strip() for arg in arguments] - - return_type = get_return_type(file_content, function_name) - - function_string = f"{return_type} {function_name}({', '.join(arguments)});" - functions.append(function_string) - - return functions - - -def get_return_type(file_content, function_name): - pattern = rf'\w+\s+{function_name}\s*\([^)]*\)\s*{{' - return_type = re.search(pattern, file_content).group() - return_type = return_type.split()[0].strip() - return return_type - - -def get_functions_from_c_folder(folder_path): - functions = [] - - for _, _, files in os.walk(folder_path): - for file in files: - functions += get_functions_from_c_file(os.path.join(folder_path, file)) - - return functions - - -def copyfile(filename, dst_folder): - - # If directory doesn't exist, create it - if not os.path.exists(dst_folder): - os.makedirs(dst_folder) - - shutil.copy(filename, dst_folder) -- GitLab From 847a27b15d16bf6b62964c070573f6e4761194ea Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Sun, 19 Jan 2025 15:48:57 +0100 Subject: [PATCH 03/22] Hotfix: source files were not included anymore --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index b3d85aa..25fb9d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,8 @@ build-backend = "setuptools.build_meta" where = ["."] # list of folders that contain the packages (["."] by default) include = ["aidge_export_cpp"] # package names should match these glob patterns (["*"] by default) namespaces = false # to disable scanning PEP 420 namespaces (true by default) +[tool.setuptools.package-data] +'aidge_export_cpp' = ['**/*'] [tool.setuptools.exclude-package-data] aidge_export_cpp = ["unit_tests*"] # exclude unit_tests which may be included as data -- GitLab From 4b92baa187381698f8cee9619b19cebf31a40a63 Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Wed, 12 Feb 2025 14:17:37 +0000 Subject: [PATCH 04/22] Upload New File --- aidge_export_cpp/kernels/erf.hpp | 40 ++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 aidge_export_cpp/kernels/erf.hpp diff --git a/aidge_export_cpp/kernels/erf.hpp b/aidge_export_cpp/kernels/erf.hpp new file mode 100644 index 0000000..b509133 --- /dev/null +++ b/aidge_export_cpp/kernels/erf.hpp @@ -0,0 +1,40 @@ +#ifndef __AIDGE_EXPORT_CPP_KERNELS_ERP__ +#define __AIDGE_EXPORT_CPP_KERNELS_ERP__ + +#include "network/typedefs.hpp" +#include <cmath> +#include <math.h> +#include <iostream> + +template<int _NB_ELTS, + typename Input_T, typename Output_T> +__attribute__((always_inline)) inline +void erf_forward ( + const Input_T* __restrict inputs, + Output_T* __restrict outputs) +{ + double a1 = 0.254829592; + double a2 = -0.284496736; + double a3 = 1.421413741; + double a4 = -1.453152027; + double a5 = 1.061405429; + double p = 0.3275911; + + +#pragma omp parallel for + for (int i = 0; i < _NB_ELTS; ++i) { + int sign = 1; + if (inputs[i] < 0) + sign = -1; + double abs_value = abs(inputs[i]); + + // A&S formula 7.1.26 + double t = 1.0/(1.0 + p*abs_value); + double y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-abs_value*abs_value); + outputs[i] = sign*y; + + } +} + + +#endif // __AIDGE_EXPORT_CPP_KERNELS_ERP_ \ No newline at end of file -- GitLab From 9b6fb714ff531a47da042fce67ef28cd864418ae Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Wed, 12 Feb 2025 14:21:31 +0000 Subject: [PATCH 05/22] Upload New File --- aidge_export_cpp/kernels/transpose.hpp | 56 ++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 aidge_export_cpp/kernels/transpose.hpp diff --git a/aidge_export_cpp/kernels/transpose.hpp b/aidge_export_cpp/kernels/transpose.hpp new file mode 100644 index 0000000..a6ddce2 --- /dev/null +++ b/aidge_export_cpp/kernels/transpose.hpp @@ -0,0 +1,56 @@ +#ifndef __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ +#define __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ + +#include "network/typedefs.hpp" +#include <cstring> +#include <cstdio> +#include <iostream> + +using namespace std; + +template< int INPUT_DIMS[], int PERM[], int OUTPUT_DIMS[], + int SIZE_OUTPUT_DIMS, int SIZE, + typename Input_T, typename Output_T> +__attribute__((always_inline)) inline +void transpose_forward ( + const Input_T* __restrict inputs, + Output_T* __restrict outputs) + { + + int newStrides[SIZE_OUTPUT_DIMS]; + for (int i = 0; i<SIZE_OUTPUT_DIMS;++i){newStrides[i] = 1;} + for (int i = 0; i < SIZE_OUTPUT_DIMS; ++i) { + for (int j = i + 1; j < SIZE_OUTPUT_DIMS; ++j) { + newStrides[i] *= OUTPUT_DIMS[j]; + } + } + + int indices[SIZE_OUTPUT_DIMS]; + for (int i = 0; i<SIZE_OUTPUT_DIMS;++i){indices[i] = 0;} + + for (int i = 0; i < SIZE; ++i) { + int idx = 0; + for (int j = SIZE_OUTPUT_DIMS -1; j >=0; --j) { + idx += indices[PERM[j]] * newStrides[j]; + } + + outputs[idx] = inputs[i]; + + + for (int j = SIZE_OUTPUT_DIMS - 1; j >= 0; --j) { + if (indices[j] < INPUT_DIMS[j] - 1) { + indices[j]++; + break; + } + else { + indices[j] = 0; + } + } + } + + +} + + + +#endif // __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ \ No newline at end of file -- GitLab From 1fd2f8da29e2b797bbc9e036cfff5e869b6fa52c Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Tue, 25 Feb 2025 12:56:48 +0000 Subject: [PATCH 06/22] Upload Erf jinja forward --- aidge_export_cpp/templates/kernel_forward/erf_forward.jinja | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 aidge_export_cpp/templates/kernel_forward/erf_forward.jinja diff --git a/aidge_export_cpp/templates/kernel_forward/erf_forward.jinja b/aidge_export_cpp/templates/kernel_forward/erf_forward.jinja new file mode 100644 index 0000000..9f3fbf3 --- /dev/null +++ b/aidge_export_cpp/templates/kernel_forward/erf_forward.jinja @@ -0,0 +1,6 @@ +{% filter indent(width=4, first=False) %} +{% include "./_mem_offset.jinja" %} +erf_forward<{{name|upper}}_NB_ELTS> + ({{in_name[0]}}, {{out_name[0]}}); +{% include "./_save_outputs.jinja" %} +{% endfilter %} \ No newline at end of file -- GitLab From 0264ce4b97ff330aea589f650a10b30c07bccf18 Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Tue, 25 Feb 2025 12:57:58 +0000 Subject: [PATCH 07/22] Upload Transpose Forward File --- .../templates/kernel_forward/transpose_forward.jinja | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 aidge_export_cpp/templates/kernel_forward/transpose_forward.jinja diff --git a/aidge_export_cpp/templates/kernel_forward/transpose_forward.jinja b/aidge_export_cpp/templates/kernel_forward/transpose_forward.jinja new file mode 100644 index 0000000..2f8d939 --- /dev/null +++ b/aidge_export_cpp/templates/kernel_forward/transpose_forward.jinja @@ -0,0 +1,11 @@ +{% filter indent(width=4, first=False) %} +{% include "./_mem_offset.jinja" %} +transpose_forward<{{ name|upper }}_INPUT_DIMS, + {{ name|upper }}_PERM, + {{ name|upper}}_OUTPUT_DIMS, + {{ name|upper}}_SIZE_OUTPUT_DIMS, + {{name|upper}}_SIZE> + ({{in_name[0]}}, {{out_name[0]}}); +{% include "./_save_outputs.jinja" %} +{% endfilter %} + -- GitLab From bb72026146e36ecdf51188f2f9d047dcfd6c4329 Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Wed, 26 Feb 2025 14:27:14 +0000 Subject: [PATCH 08/22] Adding erf and transpose config --- .../templates/configuration/erf_config.jinja | 11 +++++++++++ .../configuration/transpose_config.jinja | 17 +++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 aidge_export_cpp/templates/configuration/erf_config.jinja create mode 100644 aidge_export_cpp/templates/configuration/transpose_config.jinja diff --git a/aidge_export_cpp/templates/configuration/erf_config.jinja b/aidge_export_cpp/templates/configuration/erf_config.jinja new file mode 100644 index 0000000..b273472 --- /dev/null +++ b/aidge_export_cpp/templates/configuration/erf_config.jinja @@ -0,0 +1,11 @@ +{#- For name header -#} +#ifndef {{ name|upper }}_LAYER_H +#define {{ name|upper }}_LAYER_H + +{# For layer configuration -#} +{# For layer configuration -#} +{% include "./_def_io.jinja" %} +{% include "./_meminfo.jinja" %} +#define {{ name|upper }}_NB_ELTS {{ in_dims[0]|join('*') }} + +#endif /* {{ name|upper }}_LAYER_H */ \ No newline at end of file diff --git a/aidge_export_cpp/templates/configuration/transpose_config.jinja b/aidge_export_cpp/templates/configuration/transpose_config.jinja new file mode 100644 index 0000000..c3eabc5 --- /dev/null +++ b/aidge_export_cpp/templates/configuration/transpose_config.jinja @@ -0,0 +1,17 @@ +{#- For name header -#} +#ifndef {{ name|upper }}_LAYER_H +#define {{ name|upper }}_LAYER_H + +{# For layer configuration -#} +{% include "./_def_io.jinja" %} +{% include "./_meminfo.jinja" %} +#define {{ name|upper }}_SIZE {{out_size[0]}} +#define {{name|upper }}_SIZE_OUTPUT_DIMS {{out_dims[0]|length}} + +int {{name|upper}}_OUTPUT_DIMS[] = { {{ out_dims[0]|join(", ") }} }; +int {{name|upper}}_INPUT_DIMS[] = { {{ in_dims[0]|join(", ") }} }; +int {{name|upper}}_PERM[] = { {{ output_dims_order|join(", ") }} }; + + + +#endif /* {{ name|upper }}_LAYER_H */ -- GitLab From 659a8f4bb519f6b7f1863bb9de994608b4b5ff57 Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Wed, 26 Feb 2025 15:19:25 +0000 Subject: [PATCH 09/22] Adding new files for add, div, mul and sub to make it easier to read and manipulated --- aidge_export_cpp/kernels/add.hpp | 104 ++++++++++++++++++ aidge_export_cpp/kernels/div.hpp | 103 +++++++++++++++++ aidge_export_cpp/kernels/mul.hpp | 104 ++++++++++++++++++ aidge_export_cpp/kernels/sub.hpp | 103 +++++++++++++++++ .../templates/configuration/add_config.jinja | 25 +++++ .../templates/configuration/div_config.jinja | 25 +++++ .../templates/configuration/mul_config.jinja | 25 +++++ .../templates/configuration/sub_config.jinja | 25 +++++ .../kernel_forward/add_forward.jinja | 14 +++ .../kernel_forward/div_forward.jinja | 14 +++ .../kernel_forward/mul_forward.jinja | 14 +++ .../kernel_forward/sub_forward.jinja | 14 +++ 12 files changed, 570 insertions(+) create mode 100644 aidge_export_cpp/kernels/add.hpp create mode 100644 aidge_export_cpp/kernels/div.hpp create mode 100644 aidge_export_cpp/kernels/mul.hpp create mode 100644 aidge_export_cpp/kernels/sub.hpp create mode 100644 aidge_export_cpp/templates/configuration/add_config.jinja create mode 100644 aidge_export_cpp/templates/configuration/div_config.jinja create mode 100644 aidge_export_cpp/templates/configuration/mul_config.jinja create mode 100644 aidge_export_cpp/templates/configuration/sub_config.jinja create mode 100644 aidge_export_cpp/templates/kernel_forward/add_forward.jinja create mode 100644 aidge_export_cpp/templates/kernel_forward/div_forward.jinja create mode 100644 aidge_export_cpp/templates/kernel_forward/mul_forward.jinja create mode 100644 aidge_export_cpp/templates/kernel_forward/sub_forward.jinja diff --git a/aidge_export_cpp/kernels/add.hpp b/aidge_export_cpp/kernels/add.hpp new file mode 100644 index 0000000..03ba2c5 --- /dev/null +++ b/aidge_export_cpp/kernels/add.hpp @@ -0,0 +1,104 @@ +#ifndef __AIDGE_EXPORT_CPP_KERNELS_ADD__ +#define __AIDGE_EXPORT_CPP_KERNELS_ADD__ + +#include "network/typedefs.hpp" +#include "kernels/activation.hpp" +#include <iostream> +#include <cassert> + + + +template<int NB_ELTS, + int INPUT_A_DIMS[], int INPUT_B_DIMS[], int OUTPUT_DIMS[], + int SIZE_DIM_IN_A, int SIZE_DIM_IN_B, int SIZE_DIM_OUT, int OUT_SIZE, + ActivationFunction_T ACTIVATION, + typename Input_T, typename Output_T> +__attribute__((always_inline)) inline +void add_forward ( + Output_T* __restrict outputs, + const Input_T* __restrict inputs1, + const Input_T* __restrict inputs2) +{ + int ndim_a[SIZE_DIM_OUT]; + int ndim_b[SIZE_DIM_OUT]; + for (int i= 0; i<SIZE_DIM_OUT; i++){ + int idx = SIZE_DIM_OUT-SIZE_DIM_IN_A; + ndim_a[i] = (i< idx) ? 1 : INPUT_A_DIMS[i-idx]; + } + for (int i= 0; i<SIZE_DIM_OUT; i++){ + int idx = SIZE_DIM_OUT-SIZE_DIM_IN_B; + ndim_b[i] = (i< idx) ? 1 : INPUT_B_DIMS[i-idx]; + } + + // Find the highest equal dimension + int contiguousidx = SIZE_DIM_OUT -1 ; + + for (int i = contiguousidx ; ndim_a[i] == ndim_b[i]; i--) { + contiguousidx = i; + } + + // Compute the highest number of contiguous data for each Tensor + int input0_contiguous_size = 1; + for(int i = contiguousidx ; i<SIZE_DIM_OUT; ++i){ + input0_contiguous_size *= ndim_a[i]; + } + int input1_contiguous_size = 1; + for(int i = contiguousidx ; i<SIZE_DIM_OUT; ++i){ + input1_contiguous_size *= ndim_b[i]; + } + int output_contiguous_size = 1; + for(int i = contiguousidx ; i<SIZE_DIM_OUT; ++i){ + output_contiguous_size *= OUTPUT_DIMS[i]; + } + // initialize strides to iterate through data because of broadcasting + int stride_post0[contiguousidx ] ; + int stride_post1[contiguousidx ] ; + int stride_step0[contiguousidx ] ; + int stride_step1[contiguousidx ] ; + if (contiguousidx > 0) { + stride_post0[contiguousidx - 1] = 1; + stride_post1[contiguousidx - 1] = 1; + #pragma omp parallel for + for (int i = contiguousidx -2; i != -1; --i) { + stride_post0[i] = stride_post0[i+1]*ndim_a[i+1]; + stride_post1[i] = stride_post1[i+1]*ndim_b[i+1]; + } + #pragma omp parallel for + for (int i = 0; i < contiguousidx ; ++i) { + stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1; + stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1; + } + } + int offsetIn0 = 0; + int offsetIn1 = 0; + int offsetOut = 0; + int nbMatrices = 1; + for(int i = 0; i<contiguousidx ; ++i){ + nbMatrices *= OUTPUT_DIMS[i]; + + } + int dim = contiguousidx - 1; + for(int stack = 0; stack < nbMatrices;){ + for(int i = 0; i < output_contiguous_size; ++i){ + int in0_id = (input0_contiguous_size != 1) ? i : 0; + int in1_id = (input1_contiguous_size != 1) ? i : 0; + outputs[i + offsetOut*output_contiguous_size] = inputs1[in0_id + offsetIn0*input0_contiguous_size] + inputs2[in1_id + offsetIn1*input1_contiguous_size]; + } + if (++stack < nbMatrices) { + int tmp_stack = stack; + while(tmp_stack % OUTPUT_DIMS[dim] == 0) { + tmp_stack /= OUTPUT_DIMS[dim]; + dim--; + } + offsetIn0 += stride_step0[dim]; + offsetIn1 += stride_step1[dim]; + ++offsetOut; + dim = contiguousidx - 1; + } + } +} + + + + +#endif // __AIDGE_EXPORT_CPP_KERNELS_ADD__ \ No newline at end of file diff --git a/aidge_export_cpp/kernels/div.hpp b/aidge_export_cpp/kernels/div.hpp new file mode 100644 index 0000000..f1ff7d0 --- /dev/null +++ b/aidge_export_cpp/kernels/div.hpp @@ -0,0 +1,103 @@ +#ifndef __AIDGE_EXPORT_CPP_KERNELS_DIV__ +#define __AIDGE_EXPORT_CPP_KERNELS_DIV__ + +#include "network/typedefs.hpp" +#include "kernels/activation.hpp" +#include <iostream> +#include <cassert> + + + +template<int NB_ELTS, + int INPUT_A_DIMS[], int INPUT_B_DIMS[], int OUTPUT_DIMS[], + int SIZE_DIM_IN_A, int SIZE_DIM_IN_B, int SIZE_DIM_OUT, int OUT_SIZE, + ActivationFunction_T ACTIVATION, + typename Input_T, typename Output_T> +__attribute__((always_inline)) inline +void div_forward ( + Output_T* __restrict outputs, + const Input_T* __restrict inputs1, + const Input_T* __restrict inputs2) +{ + + int ndim_a[SIZE_DIM_OUT]; + int ndim_b[SIZE_DIM_OUT]; + for (int i= 0; i<SIZE_DIM_OUT; i++){ + int idx = SIZE_DIM_OUT-SIZE_DIM_IN_A; + ndim_a[i] = (i< idx) ? 1 : INPUT_A_DIMS[i-idx]; + } + for (int i= 0; i<SIZE_DIM_OUT; i++){ + int idx = SIZE_DIM_OUT-SIZE_DIM_IN_B; + ndim_b[i] = (i< idx) ? 1 : INPUT_B_DIMS[i-idx]; + } + + // Find the highest equal dimension + int contiguousidx = SIZE_DIM_OUT -1 ; + + for (int i = contiguousidx ; ndim_a[i] == ndim_b[i]; i--) { + contiguousidx = i; + } + + // Compute the highest number of contiguous data for each Tensor + int input0_contiguous_size = 1; + for(int i = contiguousidx ; i<SIZE_DIM_OUT; ++i){ + input0_contiguous_size *= ndim_a[i]; + } + int input1_contiguous_size = 1; + for(int i = contiguousidx ; i<SIZE_DIM_OUT; ++i){ + input1_contiguous_size *= ndim_b[i]; + } + int output_contiguous_size = 1; + for(int i = contiguousidx ; i<SIZE_DIM_OUT; ++i){ + output_contiguous_size *= OUTPUT_DIMS[i]; + } + // initialize strides to iterate through data because of broadcasting + int stride_post0[contiguousidx ] ; + int stride_post1[contiguousidx ] ; + int stride_step0[contiguousidx ] ; + int stride_step1[contiguousidx ] ; + if (contiguousidx > 0) { + stride_post0[contiguousidx - 1] = 1; + stride_post1[contiguousidx - 1] = 1; + for (int i = contiguousidx -2; i != -1; --i) { + stride_post0[i] = stride_post0[i+1]*ndim_a[i+1]; + stride_post1[i] = stride_post1[i+1]*ndim_b[i+1]; + } + for (int i = 0; i < contiguousidx ; ++i) { + stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1; + stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1; + } + } + int offsetIn0 = 0; + int offsetIn1 = 0; + int offsetOut = 0; + int nbMatrices = 1; + for(int i = 0; i<contiguousidx ; ++i){ + nbMatrices *= OUTPUT_DIMS[i]; + + } + int dim = contiguousidx - 1; + for(int stack = 0; stack < nbMatrices;){ + for(int i = 0; i < output_contiguous_size; ++i){ + int in0_id = (input0_contiguous_size != 1) ? i : 0; + int in1_id = (input1_contiguous_size != 1) ? i : 0; + outputs[i + offsetOut*output_contiguous_size] = inputs1[in0_id + offsetIn0*input0_contiguous_size] / inputs2[in1_id + offsetIn1*input1_contiguous_size]; + } + if (++stack < nbMatrices) { + int tmp_stack = stack; + while(tmp_stack % OUTPUT_DIMS[dim] == 0) { + tmp_stack /= OUTPUT_DIMS[dim]; + dim--; + } + offsetIn0 += stride_step0[dim]; + offsetIn1 += stride_step1[dim]; + ++offsetOut; + dim = contiguousidx - 1; + } + } +} + + + + +#endif // __AIDGE_EXPORT_CPP_KERNELS_DIV__ \ No newline at end of file diff --git a/aidge_export_cpp/kernels/mul.hpp b/aidge_export_cpp/kernels/mul.hpp new file mode 100644 index 0000000..cbed0f6 --- /dev/null +++ b/aidge_export_cpp/kernels/mul.hpp @@ -0,0 +1,104 @@ +#ifndef __AIDGE_EXPORT_CPP_KERNELS_MUL__ +#define __AIDGE_EXPORT_CPP_KERNELS_MUL__ + +#include "network/typedefs.hpp" +#include "kernels/activation.hpp" +#include <iostream> +#include <cassert> + + + +template<int NB_ELTS, + int INPUT_A_DIMS[], int INPUT_B_DIMS[], int OUTPUT_DIMS[], + int SIZE_DIM_IN_A, int SIZE_DIM_IN_B, int SIZE_DIM_OUT, int OUT_SIZE, + ActivationFunction_T ACTIVATION, + typename Input_T, typename Output_T> +__attribute__((always_inline)) inline +void mul_forward ( + Output_T* __restrict outputs, + const Input_T* __restrict inputs1, + const Input_T* __restrict inputs2) +{ + int ndim_a[SIZE_DIM_OUT]; + int ndim_b[SIZE_DIM_OUT]; + for (int i= 0; i<SIZE_DIM_OUT; i++){ + int idx = SIZE_DIM_OUT-SIZE_DIM_IN_A; + ndim_a[i] = (i< idx) ? 1 : INPUT_A_DIMS[i-idx]; + } + for (int i= 0; i<SIZE_DIM_OUT; i++){ + int idx = SIZE_DIM_OUT-SIZE_DIM_IN_B; + ndim_b[i] = (i< idx) ? 1 : INPUT_B_DIMS[i-idx]; + } + + // Find the highest equal dimension + int contiguousidx = SIZE_DIM_OUT -1 ; + + for (int i = contiguousidx ; ndim_a[i] == ndim_b[i]; i--) { + contiguousidx = i; + } + + // Compute the highest number of contiguous data for each Tensor + int input0_contiguous_size = 1; + for(int i = contiguousidx ; i<SIZE_DIM_OUT; ++i){ + input0_contiguous_size *= ndim_a[i]; + } + int input1_contiguous_size = 1; + for(int i = contiguousidx ; i<SIZE_DIM_OUT; ++i){ + input1_contiguous_size *= ndim_b[i]; + } + int output_contiguous_size = 1; + for(int i = contiguousidx ; i<SIZE_DIM_OUT; ++i){ + output_contiguous_size *= OUTPUT_DIMS[i]; + } + // initialize strides to iterate through data because of broadcasting + int stride_post0[contiguousidx ] ; + int stride_post1[contiguousidx ] ; + int stride_step0[contiguousidx ] ; + int stride_step1[contiguousidx ] ; + if (contiguousidx > 0) { + stride_post0[contiguousidx - 1] = 1; + stride_post1[contiguousidx - 1] = 1; + #pragma omp parallel for + for (int i = contiguousidx -2; i != -1; --i) { + stride_post0[i] = stride_post0[i+1]*ndim_a[i+1]; + stride_post1[i] = stride_post1[i+1]*ndim_b[i+1]; + } + #pragma omp parallel for + for (int i = 0; i < contiguousidx ; ++i) { + stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1; + stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1; + } + } + int offsetIn0 = 0; + int offsetIn1 = 0; + int offsetOut = 0; + int nbMatrices = 1; + for(int i = 0; i<contiguousidx ; ++i){ + nbMatrices *= OUTPUT_DIMS[i]; + + } + int dim = contiguousidx - 1; + for(int stack = 0; stack < nbMatrices;){ + for(int i = 0; i < output_contiguous_size; ++i){ + int in0_id = (input0_contiguous_size != 1) ? i : 0; + int in1_id = (input1_contiguous_size != 1) ? i : 0; + outputs[i + offsetOut*output_contiguous_size] = inputs1[in0_id + offsetIn0*input0_contiguous_size] * inputs2[in1_id + offsetIn1*input1_contiguous_size]; + } + if (++stack < nbMatrices) { + int tmp_stack = stack; + while(tmp_stack % OUTPUT_DIMS[dim] == 0) { + tmp_stack /= OUTPUT_DIMS[dim]; + dim--; + } + offsetIn0 += stride_step0[dim]; + offsetIn1 += stride_step1[dim]; + ++offsetOut; + dim = contiguousidx - 1; + } + } +} + + + + +#endif // __AIDGE_EXPORT_CPP_KERNELS_MUL__ \ No newline at end of file diff --git a/aidge_export_cpp/kernels/sub.hpp b/aidge_export_cpp/kernels/sub.hpp new file mode 100644 index 0000000..07637cd --- /dev/null +++ b/aidge_export_cpp/kernels/sub.hpp @@ -0,0 +1,103 @@ +#ifndef __AIDGE_EXPORT_CPP_KERNELS_SUB__ +#define __AIDGE_EXPORT_CPP_KERNELS_SUB__ + +#include "network/typedefs.hpp" +#include "kernels/activation.hpp" +#include <iostream> +#include <cassert> + + + +template<int NB_ELTS, + int INPUT_A_DIMS[], int INPUT_B_DIMS[], int OUTPUT_DIMS[], + int SIZE_DIM_IN_A, int SIZE_DIM_IN_B, int SIZE_DIM_OUT, int OUT_SIZE, + ActivationFunction_T ACTIVATION, + typename Input_T, typename Output_T> +__attribute__((always_inline)) inline +void sub_forward ( + Output_T* __restrict outputs, + const Input_T* __restrict inputs1, + const Input_T* __restrict inputs2) +{ + + int ndim_a[SIZE_DIM_OUT]; + int ndim_b[SIZE_DIM_OUT]; + for (int i= 0; i<SIZE_DIM_OUT; i++){ + int idx = SIZE_DIM_OUT-SIZE_DIM_IN_A; + ndim_a[i] = (i< idx) ? 1 : INPUT_A_DIMS[i-idx]; + } + for (int i= 0; i<SIZE_DIM_OUT; i++){ + int idx = SIZE_DIM_OUT-SIZE_DIM_IN_B; + ndim_b[i] = (i< idx) ? 1 : INPUT_B_DIMS[i-idx]; + } + + // Find the highest equal dimension + int contiguousidx = SIZE_DIM_OUT -1 ; + + for (int i = contiguousidx ; ndim_a[i] == ndim_b[i]; i--) { + contiguousidx = i; + } + + // Compute the highest number of contiguous data for each Tensor + int input0_contiguous_size = 1; + for(int i = contiguousidx ; i<SIZE_DIM_OUT; ++i){ + input0_contiguous_size *= ndim_a[i]; + } + int input1_contiguous_size = 1; + for(int i = contiguousidx ; i<SIZE_DIM_OUT; ++i){ + input1_contiguous_size *= ndim_b[i]; + } + int output_contiguous_size = 1; + for(int i = contiguousidx ; i<SIZE_DIM_OUT; ++i){ + output_contiguous_size *= OUTPUT_DIMS[i]; + } + // initialize strides to iterate through data because of broadcasting + int stride_post0[contiguousidx ] ; + int stride_post1[contiguousidx ] ; + int stride_step0[contiguousidx ] ; + int stride_step1[contiguousidx ] ; + if (contiguousidx > 0) { + stride_post0[contiguousidx - 1] = 1; + stride_post1[contiguousidx - 1] = 1; + for (int i = contiguousidx -2; i != -1; --i) { + stride_post0[i] = stride_post0[i+1]*ndim_a[i+1]; + stride_post1[i] = stride_post1[i+1]*ndim_b[i+1]; + } + for (int i = 0; i < contiguousidx ; ++i) { + stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1; + stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1; + } + } + int offsetIn0 = 0; + int offsetIn1 = 0; + int offsetOut = 0; + int nbMatrices = 1; + for(int i = 0; i<contiguousidx ; ++i){ + nbMatrices *= OUTPUT_DIMS[i]; + + } + int dim = contiguousidx - 1; + for(int stack = 0; stack < nbMatrices;){ + for(int i = 0; i < output_contiguous_size; ++i){ + int in0_id = (input0_contiguous_size != 1) ? i : 0; + int in1_id = (input1_contiguous_size != 1) ? i : 0; + outputs[i + offsetOut*output_contiguous_size] = inputs1[in0_id + offsetIn0*input0_contiguous_size] - inputs2[in1_id + offsetIn1*input1_contiguous_size]; + } + if (++stack < nbMatrices) { + int tmp_stack = stack; + while(tmp_stack % OUTPUT_DIMS[dim] == 0) { + tmp_stack /= OUTPUT_DIMS[dim]; + dim--; + } + offsetIn0 += stride_step0[dim]; + offsetIn1 += stride_step1[dim]; + ++offsetOut; + dim = contiguousidx - 1; + } + } +} + + + + +#endif // __AIDGE_EXPORT_CPP_KERNELS_SUB__ \ No newline at end of file diff --git a/aidge_export_cpp/templates/configuration/add_config.jinja b/aidge_export_cpp/templates/configuration/add_config.jinja new file mode 100644 index 0000000..143d004 --- /dev/null +++ b/aidge_export_cpp/templates/configuration/add_config.jinja @@ -0,0 +1,25 @@ +{#- For name header -#} +#ifndef {{ name|upper }}_LAYER_H +#define {{ name|upper }}_LAYER_H +#include "kernels/rescaling.hpp" + +{% include "./_def_io.jinja" %} +{% include "./_meminfo.jinja" %} +{# For layer configuration -#} +#define {{ name|upper }}_NB_ELTS {{ in_dims[0]|join('*') }} +#define {{ name|upper }}_NB_ELTS_B {{ in_dims[1]|join('*')}} + +int {{name|upper}}_OUTPUT_DIMS[] = { {{ out_dims[0]|join(", ") }} }; +int {{name|upper}}_INPUT_A_DIMS[] = { {{ in_dims[0]|join(", ") }} }; +int {{name|upper}}_INPUT_B_DIMS[] = { {{ in_dims[1]|join(", ") }} }; + +#define {{name|upper}}_SIZE_DIM_IN_A {{in_dims[0]|length}} +#define {{name|upper}}_SIZE_DIM_IN_B {{in_dims[1]|length}} +#define {{name|upper}}_SIZE_DIM_OUT {{out_dims[0]|length}} + +#define {{ name|upper }}_OUT_SIZE {{out_size[0]}} +#define {{name|upper }}_SIZE_DIM_OUT {{out_dims[0]|length}} + +#define {{ name|upper }}_ACTIVATION {{ activation }} +static const {{ rescaling }} {{ name|upper }}_RESCALING = {}; +#endif /* {{ name|upper }}_LAYER_H */ diff --git a/aidge_export_cpp/templates/configuration/div_config.jinja b/aidge_export_cpp/templates/configuration/div_config.jinja new file mode 100644 index 0000000..143d004 --- /dev/null +++ b/aidge_export_cpp/templates/configuration/div_config.jinja @@ -0,0 +1,25 @@ +{#- For name header -#} +#ifndef {{ name|upper }}_LAYER_H +#define {{ name|upper }}_LAYER_H +#include "kernels/rescaling.hpp" + +{% include "./_def_io.jinja" %} +{% include "./_meminfo.jinja" %} +{# For layer configuration -#} +#define {{ name|upper }}_NB_ELTS {{ in_dims[0]|join('*') }} +#define {{ name|upper }}_NB_ELTS_B {{ in_dims[1]|join('*')}} + +int {{name|upper}}_OUTPUT_DIMS[] = { {{ out_dims[0]|join(", ") }} }; +int {{name|upper}}_INPUT_A_DIMS[] = { {{ in_dims[0]|join(", ") }} }; +int {{name|upper}}_INPUT_B_DIMS[] = { {{ in_dims[1]|join(", ") }} }; + +#define {{name|upper}}_SIZE_DIM_IN_A {{in_dims[0]|length}} +#define {{name|upper}}_SIZE_DIM_IN_B {{in_dims[1]|length}} +#define {{name|upper}}_SIZE_DIM_OUT {{out_dims[0]|length}} + +#define {{ name|upper }}_OUT_SIZE {{out_size[0]}} +#define {{name|upper }}_SIZE_DIM_OUT {{out_dims[0]|length}} + +#define {{ name|upper }}_ACTIVATION {{ activation }} +static const {{ rescaling }} {{ name|upper }}_RESCALING = {}; +#endif /* {{ name|upper }}_LAYER_H */ diff --git a/aidge_export_cpp/templates/configuration/mul_config.jinja b/aidge_export_cpp/templates/configuration/mul_config.jinja new file mode 100644 index 0000000..143d004 --- /dev/null +++ b/aidge_export_cpp/templates/configuration/mul_config.jinja @@ -0,0 +1,25 @@ +{#- For name header -#} +#ifndef {{ name|upper }}_LAYER_H +#define {{ name|upper }}_LAYER_H +#include "kernels/rescaling.hpp" + +{% include "./_def_io.jinja" %} +{% include "./_meminfo.jinja" %} +{# For layer configuration -#} +#define {{ name|upper }}_NB_ELTS {{ in_dims[0]|join('*') }} +#define {{ name|upper }}_NB_ELTS_B {{ in_dims[1]|join('*')}} + +int {{name|upper}}_OUTPUT_DIMS[] = { {{ out_dims[0]|join(", ") }} }; +int {{name|upper}}_INPUT_A_DIMS[] = { {{ in_dims[0]|join(", ") }} }; +int {{name|upper}}_INPUT_B_DIMS[] = { {{ in_dims[1]|join(", ") }} }; + +#define {{name|upper}}_SIZE_DIM_IN_A {{in_dims[0]|length}} +#define {{name|upper}}_SIZE_DIM_IN_B {{in_dims[1]|length}} +#define {{name|upper}}_SIZE_DIM_OUT {{out_dims[0]|length}} + +#define {{ name|upper }}_OUT_SIZE {{out_size[0]}} +#define {{name|upper }}_SIZE_DIM_OUT {{out_dims[0]|length}} + +#define {{ name|upper }}_ACTIVATION {{ activation }} +static const {{ rescaling }} {{ name|upper }}_RESCALING = {}; +#endif /* {{ name|upper }}_LAYER_H */ diff --git a/aidge_export_cpp/templates/configuration/sub_config.jinja b/aidge_export_cpp/templates/configuration/sub_config.jinja new file mode 100644 index 0000000..143d004 --- /dev/null +++ b/aidge_export_cpp/templates/configuration/sub_config.jinja @@ -0,0 +1,25 @@ +{#- For name header -#} +#ifndef {{ name|upper }}_LAYER_H +#define {{ name|upper }}_LAYER_H +#include "kernels/rescaling.hpp" + +{% include "./_def_io.jinja" %} +{% include "./_meminfo.jinja" %} +{# For layer configuration -#} +#define {{ name|upper }}_NB_ELTS {{ in_dims[0]|join('*') }} +#define {{ name|upper }}_NB_ELTS_B {{ in_dims[1]|join('*')}} + +int {{name|upper}}_OUTPUT_DIMS[] = { {{ out_dims[0]|join(", ") }} }; +int {{name|upper}}_INPUT_A_DIMS[] = { {{ in_dims[0]|join(", ") }} }; +int {{name|upper}}_INPUT_B_DIMS[] = { {{ in_dims[1]|join(", ") }} }; + +#define {{name|upper}}_SIZE_DIM_IN_A {{in_dims[0]|length}} +#define {{name|upper}}_SIZE_DIM_IN_B {{in_dims[1]|length}} +#define {{name|upper}}_SIZE_DIM_OUT {{out_dims[0]|length}} + +#define {{ name|upper }}_OUT_SIZE {{out_size[0]}} +#define {{name|upper }}_SIZE_DIM_OUT {{out_dims[0]|length}} + +#define {{ name|upper }}_ACTIVATION {{ activation }} +static const {{ rescaling }} {{ name|upper }}_RESCALING = {}; +#endif /* {{ name|upper }}_LAYER_H */ diff --git a/aidge_export_cpp/templates/kernel_forward/add_forward.jinja b/aidge_export_cpp/templates/kernel_forward/add_forward.jinja new file mode 100644 index 0000000..3176ced --- /dev/null +++ b/aidge_export_cpp/templates/kernel_forward/add_forward.jinja @@ -0,0 +1,14 @@ +{% filter indent(width=4, first=False) %} +{% include "./_mem_offset.jinja" %} +add_forward<{{name|upper}}_NB_ELTS, + {{name|upper}}_INPUT_A_DIMS, + {{name|upper}}_INPUT_B_DIMS, + {{name|upper}}_OUTPUT_DIMS, + {{name|upper}}_SIZE_DIM_IN_A, + {{name|upper}}_SIZE_DIM_IN_B, + {{name|upper}}_SIZE_DIM_OUT, + {{name|upper}}_OUT_SIZE, + {{name|upper}}_ACTIVATION> + ({{out_name[0]}}, {{in_name[0]}}, {{in_name[1]}}); +{% include "./_save_outputs.jinja" %} +{% endfilter %} diff --git a/aidge_export_cpp/templates/kernel_forward/div_forward.jinja b/aidge_export_cpp/templates/kernel_forward/div_forward.jinja new file mode 100644 index 0000000..4b79357 --- /dev/null +++ b/aidge_export_cpp/templates/kernel_forward/div_forward.jinja @@ -0,0 +1,14 @@ +{% filter indent(width=4, first=False) %} +{% include "./_mem_offset.jinja" %} +div_forward<{{name|upper}}_NB_ELTS, + {{name|upper}}_INPUT_A_DIMS, + {{name|upper}}_INPUT_B_DIMS, + {{name|upper}}_OUTPUT_DIMS, + {{name|upper}}_SIZE_DIM_IN_A, + {{name|upper}}_SIZE_DIM_IN_B, + {{name|upper}}_SIZE_DIM_OUT, + {{name|upper}}_OUT_SIZE, + {{name|upper}}_ACTIVATION> + ({{out_name[0]}}, {{in_name[0]}}, {{in_name[1]}}); +{% include "./_save_outputs.jinja" %} +{% endfilter %} diff --git a/aidge_export_cpp/templates/kernel_forward/mul_forward.jinja b/aidge_export_cpp/templates/kernel_forward/mul_forward.jinja new file mode 100644 index 0000000..9a7170b --- /dev/null +++ b/aidge_export_cpp/templates/kernel_forward/mul_forward.jinja @@ -0,0 +1,14 @@ +{% filter indent(width=4, first=False) %} +{% include "./_mem_offset.jinja" %} +mul_forward<{{name|upper}}_NB_ELTS, + {{name|upper}}_INPUT_A_DIMS, + {{name|upper}}_INPUT_B_DIMS, + {{name|upper}}_OUTPUT_DIMS, + {{name|upper}}_SIZE_DIM_IN_A, + {{name|upper}}_SIZE_DIM_IN_B, + {{name|upper}}_SIZE_DIM_OUT, + {{name|upper}}_OUT_SIZE, + {{name|upper}}_ACTIVATION> + ({{out_name[0]}}, {{in_name[0]}}, {{in_name[1]}}); +{% include "./_save_outputs.jinja" %} +{% endfilter %} diff --git a/aidge_export_cpp/templates/kernel_forward/sub_forward.jinja b/aidge_export_cpp/templates/kernel_forward/sub_forward.jinja new file mode 100644 index 0000000..51b47a8 --- /dev/null +++ b/aidge_export_cpp/templates/kernel_forward/sub_forward.jinja @@ -0,0 +1,14 @@ +{% filter indent(width=4, first=False) %} +{% include "./_mem_offset.jinja" %} +sub_forward<{{name|upper}}_NB_ELTS, + {{name|upper}}_INPUT_A_DIMS, + {{name|upper}}_INPUT_B_DIMS, + {{name|upper}}_OUTPUT_DIMS, + {{name|upper}}_SIZE_DIM_IN_A, + {{name|upper}}_SIZE_DIM_IN_B, + {{name|upper}}_SIZE_DIM_OUT, + {{name|upper}}_OUT_SIZE, + {{name|upper}}_ACTIVATION> + ({{out_name[0]}}, {{in_name[0]}}, {{in_name[1]}}); +{% include "./_save_outputs.jinja" %} +{% endfilter %} -- GitLab From a4dd75a361f3d98565287160a44426ebe1fa8515 Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Tue, 4 Mar 2025 12:29:22 +0000 Subject: [PATCH 10/22] Debug for batchnorm and for matmul and transpose able to broadcast --- aidge_export_cpp/kernels/batchnorm.hpp | 32 +++++--- aidge_export_cpp/kernels/matmul.hpp | 108 +++++++++++++++++++++---- aidge_export_cpp/kernels/transpose.hpp | 3 +- 3 files changed, 114 insertions(+), 29 deletions(-) diff --git a/aidge_export_cpp/kernels/batchnorm.hpp b/aidge_export_cpp/kernels/batchnorm.hpp index 740ea21..0ed5080 100644 --- a/aidge_export_cpp/kernels/batchnorm.hpp +++ b/aidge_export_cpp/kernels/batchnorm.hpp @@ -3,7 +3,9 @@ #include "network/typedefs.hpp" #include "kernels/rescaling.hpp" +#include "kernels/activation.hpp" #include <math.h> +#include <iostream> // WARNING: this kernel only works for 32-bits floating point values @@ -12,30 +14,34 @@ template<int NB_OUTPUTS, ActivationFunction_T ACTIVATION, typename Input_T, typename Output_T, typename Param_T> -__attribute__((always_inline)) inline +__attribute__((always_inline)) inline void batchnorm_forward ( const Input_T* __restrict inputs, Output_T* __restrict outputs, + const Param_T* __restrict scales, const Param_T* __restrict biases, - const Param_T* __restrict variances, const Param_T* __restrict means, - const Param_T* __restrict scales, + const Param_T* __restrict variances, const double epsilon) { - for (unsigned int output = 0; output < NB_OUTPUTS; ++output) { - const Output_T var = sqrt(variances[output] + epsilon); - for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) { - for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) { - const int outputOffset = OUTPUTS_HEIGHT * oy + ox; - - const Output_T normalized = (inputs[outputOffset + output] - means[output]) / var; - const Output_T sAs = scales[output] * normalized + biases[output]; - outputs[outputOffset + output] = sat<Output_T>(sAs, output, ACTIVATION, NoScaling); - } + int featureMapSize = OUTPUTS_HEIGHT * OUTPUTS_WIDTH; + #pragma omp parallel for + for (int ch = 0; ch < NB_OUTPUTS; ++ch) { + int ioIndex = ch * featureMapSize; + #pragma omp parallel for + for (int i = ioIndex; i < ioIndex + featureMapSize; i++) { + outputs[i] = biases[ch]; + } + float var = sqrt(variances[ch] + epsilon); + #pragma omp parallel for + for (int feature = 0; feature < featureMapSize; ++feature) { + outputs[ioIndex + feature] += (scales[ch] * (inputs[ioIndex + feature] - means[ch]) / var); } } + } + #endif // __AIDGE_EXPORT_CPP_KERNELS_BATCHNORM__ diff --git a/aidge_export_cpp/kernels/matmul.hpp b/aidge_export_cpp/kernels/matmul.hpp index 4500993..b284214 100644 --- a/aidge_export_cpp/kernels/matmul.hpp +++ b/aidge_export_cpp/kernels/matmul.hpp @@ -3,31 +3,109 @@ #include "network/typedefs.hpp" #include "kernels/activation.hpp" - +#include <iostream> // Generic function for matmul and activation -template<int M, - int K, - int N, +template<int INPUT_A_DIMS[], int INPUT_B_DIMS[], int OUTPUT_DIMS[], + int _SIZE_DIM_IN_A, int _SIZE_DIM_IN_B, int SIZE_DIM_OUT, ActivationFunction_T ACTIVATION, - typename Input_T, typename Output_T, - typename Rescaling_T> + typename Input_T, typename Output_T> __attribute__((always_inline)) inline void matmul_forward ( const Input_T* __restrict inputs1, const Input_T* __restrict inputs2, - Output_T* __restrict outputs, - const Rescaling_T& __restrict rescaling) + Output_T* __restrict outputs) { - for (int m = 0; m < M; ++m) { - for (int n = 0; n < N; ++n) { - Output_T sum = Output_T(0); - for (int k = 0; k < K; ++k) { - sum += inputs1[K*m + k] * inputs2[N*k + n]; + + //initialize arrays storing broadcasted(or not) dims + int ndim_a[SIZE_DIM_OUT]; + int ndim_b[SIZE_DIM_OUT]; + if ( _SIZE_DIM_IN_A == 1){ + ndim_a[0] = 1; + ndim_a[1] =INPUT_A_DIMS[0]; + } + if ( _SIZE_DIM_IN_B == 1){ + ndim_b[0] =INPUT_B_DIMS[0]; + ndim_b[1] = 1; + } + + for (int i= 0; i<SIZE_DIM_OUT; i++){ + int idx = SIZE_DIM_OUT-_SIZE_DIM_IN_A; + ndim_a[i] = (i< idx) ? 1 :INPUT_A_DIMS[i-idx]; + } + for (int i= 0; i<SIZE_DIM_OUT; i++){ + int idx = SIZE_DIM_OUT-_SIZE_DIM_IN_B; + ndim_b[i] = (i< idx) ? 1 :INPUT_B_DIMS[i-idx]; + } + + // initialize strides to iterate through data because of broadcasting + int stride_post0[SIZE_DIM_OUT-2] ; + int stride_post1[SIZE_DIM_OUT-2] ; + int stride_step0[SIZE_DIM_OUT-2] ; + int stride_step1[SIZE_DIM_OUT-2] ; + if (SIZE_DIM_OUT > 2){ + stride_post0[SIZE_DIM_OUT - 3] = 1; + stride_post1[SIZE_DIM_OUT - 3] = 1; + for (int i = SIZE_DIM_OUT-4; i != -1; --i) { + stride_post0[i] = stride_post0[i+1]*ndim_a[i+1]; + stride_post1[i] = stride_post1[i+1]*ndim_b[i+1]; + } + for (int i = 0; i < SIZE_DIM_OUT-2; ++i) { + stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1; + stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1; + } + + } + + + // if _SIZE_DIM_IN_B == _SIZE_DIM_IN_A, then _SIZE_DIM_IN_A == SIZE_DIM_OUT == _SIZE_DIM_IN_B; + // else it will be broadcasted to the correct dims + + int nbMatrices = 1; + for(int i = SIZE_DIM_OUT -3; i>=0; --i){ + nbMatrices *= OUTPUT_DIMS[i]; + } + int dim = SIZE_DIM_OUT -3; + + + int offsetIn0 = 0; + int offsetIn1 = 0; + int offsetOut = 0; + const int n = ndim_a[SIZE_DIM_OUT - 2]; + const int k = ndim_a[SIZE_DIM_OUT - 1]; + const int m = ndim_b[SIZE_DIM_OUT - 1]; + const int matrix0Size = n*k; + const int matrix1Size = k*m; + const int matrixOutSize = n*m; + + for(int stack = 0; stack < nbMatrices;){ + + for (int i = 0; i < n; ++i) { + + for (int j = 0; j < m; ++j) { + float sum = 0; + + for (int l = 0; l < k; ++l) { + sum += (inputs1[ offsetIn0*matrix0Size + i*k + l] * inputs2[offsetIn1*matrix1Size + l*m + j]); + } + outputs[ offsetOut*matrixOutSize + i*m + j] = sum; + } + } + + if (++stack < nbMatrices) { + int tmp_stack = stack; + while(tmp_stack % OUTPUT_DIMS[dim] == 0) { + tmp_stack /= OUTPUT_DIMS[dim]; + dim--; } - outputs[N*m + n] = activation_forward_value<Output_T>(sum, 0/*not applicable*/, ACTIVATION, rescaling); + offsetIn0 += stride_step0[dim]; + offsetIn1 += stride_step1[dim]; + ++offsetOut; + dim = SIZE_DIM_OUT -3; } + } + } -#endif // __AIDGE_EXPORT_CPP_KERNELS_MATMUL__ +#endif // __AIDGE_EXPORT_CPP_KERNELS_MATMUL__ \ No newline at end of file diff --git a/aidge_export_cpp/kernels/transpose.hpp b/aidge_export_cpp/kernels/transpose.hpp index a6ddce2..2a89e3c 100644 --- a/aidge_export_cpp/kernels/transpose.hpp +++ b/aidge_export_cpp/kernels/transpose.hpp @@ -27,9 +27,10 @@ void transpose_forward ( int indices[SIZE_OUTPUT_DIMS]; for (int i = 0; i<SIZE_OUTPUT_DIMS;++i){indices[i] = 0;} - + #pragma omp parallel for for (int i = 0; i < SIZE; ++i) { int idx = 0; + #pragma omp parallel for for (int j = SIZE_OUTPUT_DIMS -1; j >=0; --j) { idx += indices[PERM[j]] * newStrides[j]; } -- GitLab From ace17ef9620f1ba1843f795c349be513ac47d21c Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Wed, 5 Mar 2025 14:45:09 +0000 Subject: [PATCH 11/22] Jinja files matmul modified to accomodate for broadcasting modificatio of matmul --- .../configuration/matmul_config.jinja | 21 +++++++++++++++---- .../kernel_forward/matmul_forward.jinja | 15 +++++++++---- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/aidge_export_cpp/templates/configuration/matmul_config.jinja b/aidge_export_cpp/templates/configuration/matmul_config.jinja index fece988..0c28e06 100644 --- a/aidge_export_cpp/templates/configuration/matmul_config.jinja +++ b/aidge_export_cpp/templates/configuration/matmul_config.jinja @@ -1,13 +1,26 @@ {#- For name header -#} #ifndef {{ name|upper }}_LAYER_H #define {{ name|upper }}_LAYER_H +#include "kernels/rescaling.hpp" {# For layer configuration -#} -#define {{ name|upper }}_M {{ inputs_dims[0][0] }} -#define {{ name|upper }}_K {{ inputs_dims[0][1] }} -#define {{ name|upper }}_N {{ inputs_dims[1][1] }} +{% include "./_def_io.jinja" %} +{% include "./_meminfo.jinja" %} +#define {{ name|upper }}_B {{ in_dims[0][0]}} +#define {{ name|upper }}_C {{ in_chan[0]}} +#define {{ name|upper }}_M {{ in_height[0]}} +#define {{ name|upper }}_K {{ in_width[0] }} +#define {{ name|upper }}_N {{ out_width[0] }} + +#define {{name|upper}}_SIZE_DIM_IN_A {{in_dims[0]|length}} +#define {{name|upper}}_SIZE_DIM_IN_B {{in_dims[1]|length}} +#define {{name|upper}}_SIZE_DIM_OUT {{out_dims[0]|length}} + +int {{name|upper}}_OUTPUT_DIMS[] = { {{ out_dims[0]|join(", ") }} }; +int {{name|upper}}_INPUT_A_DIMS[] = { {{ in_dims[0]|join(", ") }} }; +int {{name|upper}}_INPUT_B_DIMS[] = { {{ in_dims[1]|join(", ") }} }; + #define {{ name|upper }}_ACTIVATION {{ activation }} -static const {{ rescaling }} {{ name|upper }}_RESCALING = {}; {#- Calculate sizes #} diff --git a/aidge_export_cpp/templates/kernel_forward/matmul_forward.jinja b/aidge_export_cpp/templates/kernel_forward/matmul_forward.jinja index ce80ffd..4ed0264 100644 --- a/aidge_export_cpp/templates/kernel_forward/matmul_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/matmul_forward.jinja @@ -1,5 +1,12 @@ -matmul_forward<{{name|upper}}_M, - {{name|upper}}_K, - {{name|upper}}_N, +{% filter indent(width=4, first=False) %} +{% include "./_mem_offset.jinja" %} +matmul_forward<{{name|upper}}_INPUT_A_DIMS, + {{name|upper}}_INPUT_B_DIMS, + {{name|upper}}_OUTPUT_DIMS, + {{name|upper}}_SIZE_DIM_IN_A, + {{name|upper}}_SIZE_DIM_IN_B, + {{name|upper}}_SIZE_DIM_OUT, {{name|upper}}_ACTIVATION> - ({{inputs1_name}}, {{inputs2_name}}, {{outputs_name}}, {{name|upper}}_RESCALING); \ No newline at end of file + ({{in_name[0]}}, {{in_name[1]}}, {{out_name[0]}}); +{% include "./_save_outputs.jinja" %} +{% endfilter %} \ No newline at end of file -- GitLab From 8a315efbf4986f9903d163216bdac188ee2faeec Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Tue, 25 Mar 2025 08:37:11 +0000 Subject: [PATCH 12/22] Add operator helpers for transpose, erf, matmul and batchnorm --- aidge_export_cpp/operators.py | 166 ++++++++++++++++++++++++++++------ 1 file changed, 138 insertions(+), 28 deletions(-) diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py index f04dbb3..16890e1 100644 --- a/aidge_export_cpp/operators.py +++ b/aidge_export_cpp/operators.py @@ -4,27 +4,14 @@ from pathlib import Path import aidge_core from aidge_core.export_utils import ExportNode, ExportNodeCpp, generate_file from aidge_export_cpp.utils import ROOT +from aidge_export_cpp.utils.converter import numpy_dtype2ctype from aidge_export_cpp import ExportLibCpp ############################################## ############## Export functions ############## ############################################## -def numpy_dtype2ctype(dtype): - if dtype == np.int8: - return "int8_t" - elif dtype == np.int16: - return "int16_t" - elif dtype == np.int32: - return "int32_t" - elif dtype == np.int64: - return "int64_t" - elif dtype == np.float32: - return "float" - elif dtype == np.float64: - return "double" - # Add more dtype mappings as needed - else: - raise ValueError(f"Unsupported {dtype} dtype") + + def export_params(name: str, array: np.ndarray, @@ -103,6 +90,7 @@ class ConvCPP(ExportNodeCpp): self.attributes["padding"] = [0, 0] self.attributes["activation"] = "Linear" self.attributes["rescaling"] = "NoScaling" + self.attributes["groups"] = 1 self.config_template = str( ROOT / "templates" / "configuration" / "convolution_config.jinja") self.forward_template = str( @@ -144,21 +132,53 @@ class PaddedConvCPP(ExportNodeCpp): str(ROOT / "kernels" / "activation.hpp"), str(ROOT / "kernels" / "rescaling.hpp") ] + self.attributes["groups"] = 1 + +@ExportLibCpp.register_metaop("PaddedConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +class PaddedConvDepthWiseCPP(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + # TODO find a way to retrive attr for meta op + for n in self.operator.get_micro_graph().get_nodes(): + if n.type() == "Pad2D": + self.attributes["padding"] = n.get_operator( + ).attr.begin_end_borders + if n.type() == "ConvDepthWise2D": + self.attributes["kernel_dims"] = n.get_operator( + ).attr.kernel_dims + self.attributes["stride_dims"] = n.get_operator( + ).attr.stride_dims + self.attributes["dilation_dims"] = n.get_operator( + ).attr.dilation_dims + self.attributes["activation"] = "Linear" + self.attributes["rescaling"] = "NoScaling" + self.config_template = str( + ROOT / "templates" / "configuration" / "convolution_config.jinja") + self.forward_template = str( + ROOT / "templates" / "kernel_forward" / "convolution_forward.jinja") + self.include_list = [] + self.kernels_to_copy = [ + str(ROOT / "kernels" / "convolution.hpp"), + str(ROOT / "kernels" / "macs.hpp"), + str(ROOT / "kernels" / "activation.hpp"), + str(ROOT / "kernels" / "rescaling.hpp") + ] + self.attributes["groups"] = self.attributes["out_chan"][0] @ExportLibCpp.register("Add", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) class AddCPP(ExportNodeCpp): def __init__(self, node, mem_info): super().__init__(node, mem_info) - self.attributes["elemwise_op"] = "Add" + self.attributes["add_op"] = "Add" self.attributes["activation"] = "Linear" self.attributes["rescaling"] = "NoScaling" self.config_template = str( - ROOT / "templates" / "configuration" / "elemwise_config.jinja") + ROOT / "templates" / "configuration" / "add_config.jinja") self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "elemwise_forward.jinja") + ROOT / "templates" / "kernel_forward" / "add_forward.jinja") self.include_list = [] self.kernels_to_copy = [ - str(ROOT / "kernels" / "elemwise.hpp"), + str(ROOT / "kernels" / "add.hpp"), str(ROOT / "kernels" / "activation.hpp"), str(ROOT / "kernels" / "rescaling.hpp") ] @@ -167,16 +187,16 @@ class AddCPP(ExportNodeCpp): class SubCPP(ExportNodeCpp): def __init__(self, node, mem_info): super().__init__(node, mem_info) - self.attributes["elemwise_op"] = "Sub" + self.attributes["sub_op"] = "Sub" self.attributes["activation"] = "Linear" self.attributes["rescaling"] = "NoScaling" self.config_template = str( - ROOT / "templates" / "configuration" / "elemwise_config.jinja") + ROOT / "templates" / "configuration" / "sub_config.jinja") self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "elemwise_forward.jinja") + ROOT / "templates" / "kernel_forward" / "sub_forward.jinja") self.include_list = [] self.kernels_to_copy = [ - str(ROOT / "kernels" / "elemwise.hpp"), + str(ROOT / "kernels" / "sub.hpp"), str(ROOT / "kernels" / "activation.hpp"), str(ROOT / "kernels" / "rescaling.hpp") ] @@ -186,20 +206,39 @@ class SubCPP(ExportNodeCpp): class MulCPP(ExportNodeCpp): def __init__(self, node, mem_info): super().__init__(node, mem_info) - self.attributes["elemwise_op"] = "Mul" + self.attributes["mul_op"] = "Mul" + self.attributes["activation"] = "Linear" + self.attributes["rescaling"] = "NoScaling" + self.config_template = str( + ROOT / "templates" / "configuration" / "mul_config.jinja") + self.forward_template = str( + ROOT / "templates" / "kernel_forward" / "mul_forward.jinja") + self.include_list = [] + self.kernels_to_copy = [ + str(ROOT / "kernels" / "mul.hpp"), + str(ROOT / "kernels" / "activation.hpp"), + str(ROOT / "kernels" / "rescaling.hpp") + ] + +@ExportLibCpp.register("Div", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +class DivCPP(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + self.attributes["div_op"] = "Div" self.attributes["activation"] = "Linear" self.attributes["rescaling"] = "NoScaling" self.config_template = str( - ROOT / "templates" / "configuration" / "elemwise_config.jinja") + ROOT / "templates" / "configuration" / "div_config.jinja") self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "elemwise_forward.jinja") + ROOT / "templates" / "kernel_forward" / "div_forward.jinja") self.include_list = [] self.kernels_to_copy = [ - str(ROOT / "kernels" / "elemwise.hpp"), + str(ROOT / "kernels" / "div.hpp"), str(ROOT / "kernels" / "activation.hpp"), str(ROOT / "kernels" / "rescaling.hpp") ] + @ExportLibCpp.register("MaxPooling2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) class MaxPoolCPP(ExportNodeCpp): def __init__(self, node, mem_info): @@ -295,3 +334,74 @@ class FcCPP(ExportNodeCpp): str(ROOT / "kernels" / "activation.hpp"), str(ROOT / "kernels" / "rescaling.hpp") ] + +@ExportLibCpp.register("MatMul", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +class MatMulCPP(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + self.attributes["activation"] = "Linear" + self.attributes["rescaling"] = "NoScaling" + self.config_template = str( + ROOT / "templates" / "configuration" / "matmul_config.jinja") + self.forward_template = str( + ROOT / "templates" / "kernel_forward" / "matmul_forward.jinja") + self.include_list = [] + self.kernels_to_copy = [ + str(ROOT / "kernels" / "matmul.hpp"), + str(ROOT / "kernels" / "activation.hpp"), + str(ROOT / "kernels" / "rescaling.hpp") + ] + +@ExportLibCpp.register("Erf", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +class ErfCPP(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + self.attributes["activation"] = "Linear" + self.attributes["rescaling"] = "NoScaling" + self.config_template = str( + ROOT / "templates" / "configuration" / "erf_config.jinja") + self.forward_template = str( + ROOT / "templates" / "kernel_forward" / "erf_forward.jinja") + self.include_list = [] + self.kernels_to_copy = [ + str(ROOT / "kernels" / "erf.hpp"), + str(ROOT / "kernels" / "activation.hpp"), + str(ROOT / "kernels" / "rescaling.hpp") + ] + +@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +class TransposeCPP(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + # Get parameter permutation from transpose + self.attributes["output_dims_order"] = self.operator.attr.get_attr("output_dims_order") + + self.attributes["activation"] = "Linear" + self.attributes["rescaling"] = "NoScaling" + self.config_template = str( + ROOT / "templates" / "configuration" / "transpose_config.jinja") + self.forward_template = str( + ROOT / "templates" / "kernel_forward" / "transpose_forward.jinja") + self.include_list = [] + self.kernels_to_copy = [ + str(ROOT / "kernels" / "transpose.hpp"), + str(ROOT / "kernels" / "activation.hpp"), + str(ROOT / "kernels" / "rescaling.hpp") + ] + +@ExportLibCpp.register("BatchNorm2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +class BatchNorm2DCPP(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + self.attributes["activation"] = "Linear" + self.attributes["rescaling"] = "NoScaling" + self.config_template = str( + ROOT / "templates" / "configuration" / "batchnorm_config.jinja") + self.forward_template = str( + ROOT / "templates" / "kernel_forward" / "batchnorm_forward.jinja") + self.include_list = [] + self.kernels_to_copy = [ + str(ROOT / "kernels" / "batchnorm.hpp"), + str(ROOT / "kernels" / "activation.hpp"), + str(ROOT / "kernels" / "rescaling.hpp") + ] \ No newline at end of file -- GitLab From f64c927a746af341cd70a75937ced5992cb4827a Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Thu, 27 Mar 2025 10:26:36 +0000 Subject: [PATCH 13/22] Added new parameter groups --- aidge_export_cpp/kernels/convolution.hpp | 149 +++++++++-------------- 1 file changed, 56 insertions(+), 93 deletions(-) diff --git a/aidge_export_cpp/kernels/convolution.hpp b/aidge_export_cpp/kernels/convolution.hpp index efc7ee7..b623369 100644 --- a/aidge_export_cpp/kernels/convolution.hpp +++ b/aidge_export_cpp/kernels/convolution.hpp @@ -6,114 +6,77 @@ #include "network/utils.hpp" #include "kernels/macs.hpp" #include "kernels/activation.hpp" +#include <omp.h> +#include <iostream> + +// Weights index en NHWC +constexpr int inds_pos(int n, int c, int h, int w, int N, int C, int H, int W) { + return n * (H * W * C) + + h * (W * C) + + w * C + + c; +} +// Image index in CHW +constexpr int inds_pos(int c, int h, int w, int C, int H, int W) { + return c * (H * W) + + h * W + + w; +} -template<int NB_CHANNELS, - int CHANNELS_HEIGHT, int CHANNELS_WIDTH, - int NB_OUTPUTS, - int OUTPUTS_HEIGHT, int OUTPUTS_WIDTH, +template<int NB_CHANNELS, + int IN_HEIGHT, int IN_WIDTH, + int NB_OUTPUTS, int GROUPS, + int OUT_HEIGHT, int OUT_WIDTH, int PADDING_Y, int PADDING_X, int STRIDE_Y, int STRIDE_X, int DILATION_Y, int DILATION_X, int KERNEL_HEIGHT, int KERNEL_WIDTH, ActivationFunction_T ACTIVATION, - typename Input_T, typename Output_T, + typename Input_T, typename Output_T, typename Weight_T, typename Bias_T, typename Rescaling_T> -__attribute__((always_inline)) inline +__attribute__((always_inline)) inline void convolution_forward( - const Input_T* __restrict inputs, + const Input_T* __restrict inputs, Output_T* __restrict outputs, const Weight_T* __restrict weights, const Bias_T* __restrict biases, const Rescaling_T& __restrict rescaling) { - constexpr int DILATED_KERNEL_HEIGHT - = KERNEL_HEIGHT + (DILATION_Y - 1) * (KERNEL_HEIGHT - 1); - - constexpr int DILATED_KERNEL_WIDTH - = KERNEL_WIDTH + (DILATION_X - 1) * (KERNEL_WIDTH - 1); - - constexpr int OUTPUTS_HEIGHT_NOPAD - = (CHANNELS_HEIGHT - DILATION_Y * (KERNEL_HEIGHT - 1) - 1 + STRIDE_Y) / STRIDE_Y; - constexpr int OUTPUTS_WIDTH_NOPAD - = (CHANNELS_WIDTH - DILATION_X * (KERNEL_WIDTH - 1) - 1 + STRIDE_X) / STRIDE_X; - - for (int oy = 0; oy < OUTPUTS_HEIGHT; ++oy) { - const int syMin = (PADDING_Y == 0) ? 0 - : max(PADDING_Y - (oy * STRIDE_Y), 0); - const int syMax = (PADDING_Y == 0 - && OUTPUTS_HEIGHT == OUTPUTS_HEIGHT_NOPAD) ? DILATED_KERNEL_HEIGHT - : clamp(CHANNELS_HEIGHT + PADDING_Y - (oy * STRIDE_Y), - 0, DILATED_KERNEL_HEIGHT); - const int iy = (oy * STRIDE_Y) - PADDING_Y; - -#pragma omp parallel for collapse(2) - for (int ox = 0; ox < OUTPUTS_WIDTH; ++ox) { - for (int output = 0; output < NB_OUTPUTS; ++output) { - // moved to inner loop for collapsing --> - const int sxMin = (PADDING_X == 0) ? 0 - : max(PADDING_X - (ox * STRIDE_X), 0); - const int sxMax = (PADDING_X == 0 - && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD) - ? DILATED_KERNEL_WIDTH - : clamp(CHANNELS_WIDTH + PADDING_X - (ox * STRIDE_X), - 0, DILATED_KERNEL_WIDTH); - const int ix = (ox * STRIDE_X) - PADDING_X; - - const int oPos = (ox + OUTPUTS_WIDTH * oy); - int oOffset = NB_OUTPUTS * oPos; - - // <-- - - Bias_T weightedSum = biases[output]; - for (int sy = 0; sy < KERNEL_HEIGHT; ++sy) { - if ((PADDING_Y != 0 - || OUTPUTS_HEIGHT != OUTPUTS_HEIGHT_NOPAD) - && ((sy*DILATION_Y < syMin) || (sy*DILATION_Y >= syMax))) - { - continue; - } - - const int iPos = ix + CHANNELS_WIDTH * (iy + sy*DILATION_Y); - int iOffset = NB_CHANNELS * iPos; - - const int wOffset = (output*KERNEL_HEIGHT + sy) * KERNEL_WIDTH * NB_CHANNELS; - - if (DILATION_X == 1 && ((PADDING_X == 0 && OUTPUTS_WIDTH == OUTPUTS_WIDTH_NOPAD) - || sxMax - sxMin == KERNEL_WIDTH)) - { - macsOnRange<KERNEL_WIDTH * NB_CHANNELS>( - inputs + iOffset, - weights + wOffset, - weightedSum); - } - else { - for (int sx = 0; sx < KERNEL_WIDTH; ++sx) { - if ((PADDING_X != 0 - || OUTPUTS_WIDTH != OUTPUTS_WIDTH_NOPAD) - && ((sx*DILATION_X < sxMin) || (sx*DILATION_X >= sxMax))) - { - continue; - } - - int iOffsetInRange = iOffset - + sx * DILATION_X * NB_CHANNELS; - - macsOnRange<NB_CHANNELS>( - // same input line so no wrapping can occur - inputs + iOffsetInRange, - weights + wOffset + sx * NB_CHANNELS, - weightedSum); - } - } - } - - outputs[oOffset + output] = activation_forward_value<Output_T>(weightedSum, output, ACTIVATION, rescaling); - } - } + if (NB_CHANNELS % GROUPS != 0 || NB_OUTPUTS % GROUPS != 0) { + throw std::invalid_argument("Groups must be a divisor of both NB_CHANNELS and NB_OUTPUTS!"); + } + + int c_in_g = NB_CHANNELS / GROUPS; + int c_out_g = NB_OUTPUTS / GROUPS; + #pragma omp parallel for + for (int oc = 0; oc < NB_OUTPUTS; oc++) { + int g_oc = oc / c_out_g; + #pragma omp parallel for + for (int i = 0; i < OUT_HEIGHT; ++i) { + #pragma omp parallel for + for (int j = 0; j < OUT_WIDTH; ++j) { + Output_T value = biases[oc]; + #pragma omp parallel for + for (int ic = g_oc * c_in_g; ic < (g_oc + 1) * c_in_g; ++ic) { + #pragma omp parallel for + for (int m = 0; m < KERNEL_HEIGHT; ++m) { + #pragma omp parallel for + for (int n = 0; n < KERNEL_WIDTH; ++n) { + int i_p = i * STRIDE_X - PADDING_X + m * DILATION_X; + int j_p = j * STRIDE_Y - PADDING_Y + n * DILATION_Y; + if (i_p >= 0 && i_p < IN_HEIGHT && j_p >= 0 && j_p < IN_WIDTH) { + value += weights[inds_pos(oc, ic % c_in_g, m, n, NB_OUTPUTS, c_in_g, KERNEL_HEIGHT, KERNEL_WIDTH)] * + inputs[inds_pos(ic, i_p, j_p, NB_CHANNELS, IN_HEIGHT, IN_WIDTH)]; + } + } + } + } + outputs[inds_pos(oc, i, j, NB_OUTPUTS, OUT_HEIGHT, OUT_WIDTH)] = activation_forward_value<Output_T>(value, oc, ACTIVATION, rescaling); + } + } } } - -#endif // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__ +#endif // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__ \ No newline at end of file -- GitLab From 05c867bef62d9508b7ef1eae0c33edec1718b887 Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Fri, 28 Mar 2025 13:12:16 +0000 Subject: [PATCH 14/22] Cleaned file --- aidge_export_cpp/kernels/elemwise.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/aidge_export_cpp/kernels/elemwise.hpp b/aidge_export_cpp/kernels/elemwise.hpp index 6f73bc0..1b45c59 100644 --- a/aidge_export_cpp/kernels/elemwise.hpp +++ b/aidge_export_cpp/kernels/elemwise.hpp @@ -26,7 +26,6 @@ void elemwise_forward ( switch (ELEM_OP) { case Add: { - // std::cout <<"Add " << std::endl; int ndim_a[SIZE_DIM_OUT]; int ndim_b[SIZE_DIM_OUT]; for (int i= 0; i<SIZE_DIM_OUT; i++){ @@ -182,7 +181,6 @@ void elemwise_forward ( break; } case Mul: { - // std::cout<< "MUL " << std::endl; int ndim_a[SIZE_DIM_OUT]; int ndim_b[SIZE_DIM_OUT]; @@ -258,8 +256,6 @@ void elemwise_forward ( break; } case Div: { - - std::cout<< "DIV " << std::endl; int ndim_a[SIZE_DIM_OUT]; int ndim_b[SIZE_DIM_OUT]; for (int i= 0; i<SIZE_DIM_OUT; i++){ -- GitLab From 81d728acbb0b50c07b5c86dff61c87d250b97113 Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Fri, 28 Mar 2025 13:48:54 +0000 Subject: [PATCH 15/22] New transpose method used for exporting ConvNeXt onnx --- aidge_export_cpp/kernels/transpose_diff.hpp | 57 +++++++++++++++++++ .../configuration/transpose_config.jinja | 4 +- .../kernel_forward/transpose_forward.jinja | 3 +- 3 files changed, 59 insertions(+), 5 deletions(-) create mode 100644 aidge_export_cpp/kernels/transpose_diff.hpp diff --git a/aidge_export_cpp/kernels/transpose_diff.hpp b/aidge_export_cpp/kernels/transpose_diff.hpp new file mode 100644 index 0000000..298e5d9 --- /dev/null +++ b/aidge_export_cpp/kernels/transpose_diff.hpp @@ -0,0 +1,57 @@ +#ifndef __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ +#define __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ + +#include "network/typedefs.hpp" +#include <cstring> +#include <cstdio> +#include <iostream> + +using namespace std; + +template< int INPUT_DIMS[], int PERM[], int OUTPUT_DIMS[], + int SIZE_OUTPUT_DIMS, int SIZE, + typename Input_T, typename Output_T> +__attribute__((always_inline)) inline +void transpose_forward ( + const Input_T* __restrict inputs, + Output_T* __restrict outputs) + { + + int newStrides[SIZE_OUTPUT_DIMS]; + for (int i = 0; i<SIZE_OUTPUT_DIMS;++i){newStrides[i] = 1;} + for (int i = 0; i < SIZE_OUTPUT_DIMS; ++i) { + for (int j = i + 1; j < SIZE_OUTPUT_DIMS; ++j) { + newStrides[i] *= OUTPUT_DIMS[j]; + } + } + + int indices[SIZE_OUTPUT_DIMS]; + for (int i = 0; i<SIZE_OUTPUT_DIMS;++i){indices[i] = 0;} +// #pragma omp parallel for + for (int i = 0; i < SIZE; ++i) { + int idx = 0; +// #pragma omp parallel for + for (int j = SIZE_OUTPUT_DIMS -1; j >=0; --j) { + idx += indices[PERM[j]] * newStrides[j]; + } + + outputs[idx] = inputs[i]; + + + for (int j = SIZE_OUTPUT_DIMS - 1; j >= 0; --j) { + if (indices[j] < INPUT_DIMS[j] - 1) { + indices[j]++; + break; + } + else { + indices[j] = 0; + } + } + } + + +} + + + +#endif // __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ diff --git a/aidge_export_cpp/templates/configuration/transpose_config.jinja b/aidge_export_cpp/templates/configuration/transpose_config.jinja index c3eabc5..01e57b8 100644 --- a/aidge_export_cpp/templates/configuration/transpose_config.jinja +++ b/aidge_export_cpp/templates/configuration/transpose_config.jinja @@ -12,6 +12,4 @@ int {{name|upper}}_OUTPUT_DIMS[] = { {{ out_dims[0]|join(", ") }} }; int {{name|upper}}_INPUT_DIMS[] = { {{ in_dims[0]|join(", ") }} }; int {{name|upper}}_PERM[] = { {{ output_dims_order|join(", ") }} }; - - -#endif /* {{ name|upper }}_LAYER_H */ +#endif /* {{ name|upper }}_LAYER_H */ \ No newline at end of file diff --git a/aidge_export_cpp/templates/kernel_forward/transpose_forward.jinja b/aidge_export_cpp/templates/kernel_forward/transpose_forward.jinja index 2f8d939..2a5433c 100644 --- a/aidge_export_cpp/templates/kernel_forward/transpose_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/transpose_forward.jinja @@ -7,5 +7,4 @@ transpose_forward<{{ name|upper }}_INPUT_DIMS, {{name|upper}}_SIZE> ({{in_name[0]}}, {{out_name[0]}}); {% include "./_save_outputs.jinja" %} -{% endfilter %} - +{% endfilter %} \ No newline at end of file -- GitLab From 2a06f059865e9581daf58fece9f128c06a333ee2 Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Fri, 28 Mar 2025 14:07:16 +0000 Subject: [PATCH 16/22] Add new parameter groups for convolution --- .../kernels/convolution_groups.hpp | 84 ++++++++++ aidge_export_cpp/operators.py | 150 ++++++++---------- .../configuration/convolution_config.jinja | 1 - .../kernel_forward/convolution_forward.jinja | 2 +- 4 files changed, 154 insertions(+), 83 deletions(-) create mode 100644 aidge_export_cpp/kernels/convolution_groups.hpp diff --git a/aidge_export_cpp/kernels/convolution_groups.hpp b/aidge_export_cpp/kernels/convolution_groups.hpp new file mode 100644 index 0000000..7d73f79 --- /dev/null +++ b/aidge_export_cpp/kernels/convolution_groups.hpp @@ -0,0 +1,84 @@ +#ifndef __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__ +#define __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__ + +#include "network/typedefs.hpp" +#include "kernels/rescaling.hpp" +#include "network/utils.hpp" +#include "kernels/macs.hpp" +#include "kernels/activation.hpp" +#include <omp.h> +#include <iostream> + +// Weights index en NHWC +constexpr int inds_pos(int n, int c, int h, int w, int N, int C, int H, int W) { + return n * (H * W * C) + + h * (W * C) + + w * C + + c; +} + +// Image index in CHW +constexpr int inds_pos(int c, int h, int w, int C, int H, int W) { + return c * (H * W) + + h * W + + w; +} + + + +template<int NB_CHANNELS, + int IN_HEIGHT, int IN_WIDTH, + int NB_OUTPUTS, int GROUPS, + int OUT_HEIGHT, int OUT_WIDTH, + int PADDING_Y, int PADDING_X, + int STRIDE_Y, int STRIDE_X, + int DILATION_Y, int DILATION_X, + int KERNEL_HEIGHT, int KERNEL_WIDTH, + ActivationFunction_T ACTIVATION, + typename Input_T, typename Output_T, + typename Weight_T, typename Bias_T, + typename Rescaling_T> +__attribute__((always_inline)) inline +void convolution_forward( + const Input_T* __restrict inputs, + Output_T* __restrict outputs, + const Weight_T* __restrict weights, + const Bias_T* __restrict biases, + const Rescaling_T& __restrict rescaling) +{ + + if (NB_CHANNELS % GROUPS != 0 || NB_OUTPUTS % GROUPS != 0) { + throw std::invalid_argument("Groups must be a divisor of both NB_CHANNELS and NB_OUTPUTS!"); + } + + int c_in_g = NB_CHANNELS / GROUPS; + int c_out_g = NB_OUTPUTS / GROUPS; + #pragma omp parallel for + for (int oc = 0; oc < NB_OUTPUTS; oc++) { + int g_oc = oc / c_out_g; + #pragma omp parallel for + for (int i = 0; i < OUT_HEIGHT; ++i) { + #pragma omp parallel for + for (int j = 0; j < OUT_WIDTH; ++j) { + Output_T value = biases[oc]; + #pragma omp parallel for + for (int ic = g_oc * c_in_g; ic < (g_oc + 1) * c_in_g; ++ic) { + #pragma omp parallel for + for (int m = 0; m < KERNEL_HEIGHT; ++m) { + #pragma omp parallel for + for (int n = 0; n < KERNEL_WIDTH; ++n) { + int i_p = i * STRIDE_X - PADDING_X + m * DILATION_X; + int j_p = j * STRIDE_Y - PADDING_Y + n * DILATION_Y; + if (i_p >= 0 && i_p < IN_HEIGHT && j_p >= 0 && j_p < IN_WIDTH) { + value += weights[inds_pos(oc, ic % c_in_g, m, n, NB_OUTPUTS, c_in_g, KERNEL_HEIGHT, KERNEL_WIDTH)] * + inputs[inds_pos(ic, i_p, j_p, NB_CHANNELS, IN_HEIGHT, IN_WIDTH)]; + } + } + } + } + outputs[inds_pos(oc, i, j, NB_OUTPUTS, OUT_HEIGHT, OUT_WIDTH)] = activation_forward_value<Output_T>(value, oc, ACTIVATION, rescaling); + } + } + } +} +#endif // __AIDGE_EXPORT_CPP_KERNELS_CONVOLUTION__ \ No newline at end of file diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py index a511637..5ee9992 100644 --- a/aidge_export_cpp/operators.py +++ b/aidge_export_cpp/operators.py @@ -4,14 +4,27 @@ from pathlib import Path import aidge_core from aidge_core.export_utils import ExportNode, ExportNodeCpp, generate_file from aidge_export_cpp.utils import ROOT -from aidge_export_cpp.utils.converter import numpy_dtype2ctype from aidge_export_cpp import ExportLibCpp ############################################## ############## Export functions ############## ############################################## - - +def numpy_dtype2ctype(dtype): + if dtype == np.int8: + return "int8_t" + elif dtype == np.int16: + return "int16_t" + elif dtype == np.int32: + return "int32_t" + elif dtype == np.int64: + return "int64_t" + elif dtype == np.float32: + return "float" + elif dtype == np.float64: + return "double" + # Add more dtype mappings as needed + else: + raise ValueError(f"Unsupported {dtype} dtype") def export_params(name: str, array: np.ndarray, @@ -43,7 +56,7 @@ class ProducerCPP(ExportNode): super().__init__(node, mem_info) self.values = np.array(self.operator.get_output(0)) - if len(self.values.shape) == 4: # Note: export in HWC + if len(self.values.shape) == 4: # Note: export in HWC self.values = np.transpose(self.values, (0, 2, 3, 1)) def export(self, export_folder: Path): @@ -130,6 +143,24 @@ def _setup_conv2D(conv): str(ROOT / "kernels" / "rescaling.hpp") ] + +def _setup_elemwise_op(elemwise, op): + """Common code (template and kernel setup) shared across all the different elementWise operator (Add, Sub,...).""" + + elemwise.attributes["elemwise_op"] = op + elemwise.attributes["activation"] = "Linear" + elemwise.attributes["rescaling"] = "NoScaling" + elemwise.config_template = str( + ROOT / "templates" / "configuration" / "elemwise_config.jinja") + elemwise.forward_template = str( + ROOT / "templates" / "kernel_forward" / "elemwise_forward.jinja") + elemwise.include_list = [] + elemwise.kernels_to_copy = [ + str(ROOT / "kernels" / "elemwise.hpp"), + str(ROOT / "kernels" / "activation.hpp"), + str(ROOT / "kernels" / "rescaling.hpp") + ] + @ExportLibCpp.register("Conv2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) class ConvCPP(ExportNodeCpp): def __init__(self, node, mem_info): @@ -137,7 +168,7 @@ class ConvCPP(ExportNodeCpp): # No padding with Conv # Use PaddedConv to add padding attribute self.attributes["padding"] = [0, 0] - + self.attributes["groups"] = 1 _setup_conv2D(self) @ExportLibCpp.register_metaop("PaddedConv2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) @@ -156,25 +187,28 @@ class PaddedConvCPP(ExportNodeCpp): ).attr.stride_dims self.attributes["dilation_dims"] = n.get_operator( ).attr.dilation_dims - + self.attributes["groups"] = 1 _setup_conv2D(self) -def _setup_elemwise_op(elemwise, op): - """Common code (template and kernel setup) shared across all the different elementWise operator (Add, Sub,...).""" +@ExportLibCpp.register_metaop("PaddedConvDepthWise2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +class PaddedConvDepthWiseCPP(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + # TODO find a way to retrive attr for meta op + for n in self.operator.get_micro_graph().get_nodes(): + if n.type() == "Pad2D": + self.attributes["padding"] = n.get_operator( + ).attr.begin_end_borders + if n.type() == "ConvDepthWise2D": + self.attributes["kernel_dims"] = n.get_operator( + ).attr.kernel_dims + self.attributes["stride_dims"] = n.get_operator( + ).attr.stride_dims + self.attributes["dilation_dims"] = n.get_operator( + ).attr.dilation_dims - elemwise.attributes["elemwise_op"] = op - elemwise.attributes["activation"] = "Linear" - elemwise.attributes["rescaling"] = "NoScaling" - elemwise.config_template = str( - ROOT / "templates" / "configuration" / "elemwise_config.jinja") - elemwise.forward_template = str( - ROOT / "templates" / "kernel_forward" / "elemwise_forward.jinja") - elemwise.include_list = [] - elemwise.kernels_to_copy = [ - str(ROOT / "kernels" / "elemwise.hpp"), - str(ROOT / "kernels" / "activation.hpp"), - str(ROOT / "kernels" / "rescaling.hpp") - ] + self.attributes["groups"] = self.attributes["out_chan"][0] + _setup_conv2D(self) @ExportLibCpp.register("Add", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) class AddCPP(ExportNodeCpp): @@ -197,6 +231,14 @@ class MulCPP(ExportNodeCpp): _setup_elemwise_op(self, "Mul") +@ExportLibCpp.register("Div", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +class MulCPP(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + + _setup_elemwise_op(self, "Div") + + def _setup_pooling(pooling): """Common code (template and kernel setup) shared across all the different pooling operator.""" @@ -211,25 +253,6 @@ def _setup_pooling(pooling): str(ROOT / "kernels" / "rescaling.hpp") ] -@ExportLibCpp.register("Div", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class DivCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - self.attributes["div_op"] = "Div" - self.attributes["activation"] = "Linear" - self.attributes["rescaling"] = "NoScaling" - self.config_template = str( - ROOT / "templates" / "configuration" / "div_config.jinja") - self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "div_forward.jinja") - self.include_list = [] - self.kernels_to_copy = [ - str(ROOT / "kernels" / "div.hpp"), - str(ROOT / "kernels" / "activation.hpp"), - str(ROOT / "kernels" / "rescaling.hpp") - ] - - @ExportLibCpp.register("MaxPooling2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) class MaxPoolCPP(ExportNodeCpp): def __init__(self, node, mem_info): @@ -297,23 +320,20 @@ class FcCPP(ExportNodeCpp): str(ROOT / "kernels" / "rescaling.hpp") ] -@ExportLibCpp.register("MatMul", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class MatMulCPP(ExportNodeCpp): +@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class TransposeCPP(ExportNodeCpp): def __init__(self, node, mem_info): super().__init__(node, mem_info) - self.attributes["activation"] = "Linear" - self.attributes["rescaling"] = "NoScaling" self.config_template = str( - ROOT / "templates" / "configuration" / "matmul_config.jinja") + ROOT / "templates" / "configuration" / "transpose_ND_config.jinja") self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "matmul_forward.jinja") + ROOT / "templates" / "kernel_forward" / "transpose_ND_forward.jinja") self.include_list = [] self.kernels_to_copy = [ - str(ROOT / "kernels" / "matmul.hpp"), - str(ROOT / "kernels" / "activation.hpp"), - str(ROOT / "kernels" / "rescaling.hpp") + str(ROOT / "kernels" / "transpose.hpp") ] + @ExportLibCpp.register("Erf", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) class ErfCPP(ExportNodeCpp): def __init__(self, node, mem_info): @@ -331,26 +351,6 @@ class ErfCPP(ExportNodeCpp): str(ROOT / "kernels" / "rescaling.hpp") ] -@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class TransposeCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - # Get parameter permutation from transpose - self.attributes["output_dims_order"] = self.operator.attr.get_attr("output_dims_order") - - self.attributes["activation"] = "Linear" - self.attributes["rescaling"] = "NoScaling" - self.config_template = str( - ROOT / "templates" / "configuration" / "transpose_config.jinja") - self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "transpose_forward.jinja") - self.include_list = [] - self.kernels_to_copy = [ - str(ROOT / "kernels" / "transpose.hpp"), - str(ROOT / "kernels" / "activation.hpp"), - str(ROOT / "kernels" / "rescaling.hpp") - ] - @ExportLibCpp.register("BatchNorm2D", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) class BatchNorm2DCPP(ExportNodeCpp): def __init__(self, node, mem_info): @@ -366,16 +366,4 @@ class BatchNorm2DCPP(ExportNodeCpp): str(ROOT / "kernels" / "batchnorm.hpp"), str(ROOT / "kernels" / "activation.hpp"), str(ROOT / "kernels" / "rescaling.hpp") - ] -@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) -class TransposeCPP(ExportNodeCpp): - def __init__(self, node, mem_info): - super().__init__(node, mem_info) - self.config_template = str( - ROOT / "templates" / "configuration" / "transpose_ND_config.jinja") - self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "transpose_ND_forward.jinja") - self.include_list = [] - self.kernels_to_copy = [ - str(ROOT / "kernels" / "transpose.hpp") ] \ No newline at end of file diff --git a/aidge_export_cpp/templates/configuration/convolution_config.jinja b/aidge_export_cpp/templates/configuration/convolution_config.jinja index 041e5b5..417f240 100644 --- a/aidge_export_cpp/templates/configuration/convolution_config.jinja +++ b/aidge_export_cpp/templates/configuration/convolution_config.jinja @@ -23,5 +23,4 @@ static const {{ rescaling }} {{ name|upper }}_RESCALING = {}; #define {{ name|upper }}_WEIGHTS_SIZE {{ weights_size }} #define {{ name|upper }}_BIASES_SIZE {{ out_chan[0] }} - #endif /* {{ name|upper }}_LAYER_H */ diff --git a/aidge_export_cpp/templates/kernel_forward/convolution_forward.jinja b/aidge_export_cpp/templates/kernel_forward/convolution_forward.jinja index 1760aa3..98b9e03 100644 --- a/aidge_export_cpp/templates/kernel_forward/convolution_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/convolution_forward.jinja @@ -19,4 +19,4 @@ convolution_forward<{{ in_name[0]|upper }}_NB_CHANNELS, ({{in_name[0]}}, {{out_name[0]}}, {{in_name[1]}}, {{in_name[2]}}, {{name|upper}}_RESCALING); {% include "./_save_outputs.jinja" %} -{% endfilter %} +{% endfilter %} \ No newline at end of file -- GitLab From f06fd92b9b665e781d1fb0c9370d880a6751dce6 Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Fri, 28 Mar 2025 14:09:16 +0000 Subject: [PATCH 17/22] Add for div operator --- aidge_export_cpp/static/include/network/typedefs.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aidge_export_cpp/static/include/network/typedefs.hpp b/aidge_export_cpp/static/include/network/typedefs.hpp index acece91..9b83602 100644 --- a/aidge_export_cpp/static/include/network/typedefs.hpp +++ b/aidge_export_cpp/static/include/network/typedefs.hpp @@ -19,7 +19,8 @@ typedef enum { typedef enum { Add, Sub, - Mul + Mul, + Div } ElemWise_T; typedef enum { -- GitLab From 9fc76615f2efcb283fd5d2c3ceb1a2170b5ab665 Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Tue, 1 Apr 2025 07:44:24 +0000 Subject: [PATCH 18/22] Delete debug includes --- aidge_export_cpp/kernels/add.hpp | 3 --- aidge_export_cpp/kernels/batchnorm.hpp | 1 - aidge_export_cpp/kernels/convolution_groups.hpp | 1 - aidge_export_cpp/kernels/erf.hpp | 7 +++---- aidge_export_cpp/kernels/matmul.hpp | 2 +- 5 files changed, 4 insertions(+), 10 deletions(-) diff --git a/aidge_export_cpp/kernels/add.hpp b/aidge_export_cpp/kernels/add.hpp index 03ba2c5..eb8a93b 100644 --- a/aidge_export_cpp/kernels/add.hpp +++ b/aidge_export_cpp/kernels/add.hpp @@ -3,9 +3,6 @@ #include "network/typedefs.hpp" #include "kernels/activation.hpp" -#include <iostream> -#include <cassert> - template<int NB_ELTS, diff --git a/aidge_export_cpp/kernels/batchnorm.hpp b/aidge_export_cpp/kernels/batchnorm.hpp index 0ed5080..201ef16 100644 --- a/aidge_export_cpp/kernels/batchnorm.hpp +++ b/aidge_export_cpp/kernels/batchnorm.hpp @@ -5,7 +5,6 @@ #include "kernels/rescaling.hpp" #include "kernels/activation.hpp" #include <math.h> -#include <iostream> // WARNING: this kernel only works for 32-bits floating point values diff --git a/aidge_export_cpp/kernels/convolution_groups.hpp b/aidge_export_cpp/kernels/convolution_groups.hpp index 7d73f79..321ffc7 100644 --- a/aidge_export_cpp/kernels/convolution_groups.hpp +++ b/aidge_export_cpp/kernels/convolution_groups.hpp @@ -7,7 +7,6 @@ #include "kernels/macs.hpp" #include "kernels/activation.hpp" #include <omp.h> -#include <iostream> // Weights index en NHWC constexpr int inds_pos(int n, int c, int h, int w, int N, int C, int H, int W) { diff --git a/aidge_export_cpp/kernels/erf.hpp b/aidge_export_cpp/kernels/erf.hpp index b509133..768f3b9 100644 --- a/aidge_export_cpp/kernels/erf.hpp +++ b/aidge_export_cpp/kernels/erf.hpp @@ -1,10 +1,9 @@ -#ifndef __AIDGE_EXPORT_CPP_KERNELS_ERP__ -#define __AIDGE_EXPORT_CPP_KERNELS_ERP__ +#ifndef __AIDGE_EXPORT_CPP_KERNELS_ERF__ +#define __AIDGE_EXPORT_CPP_KERNELS_ERF__ #include "network/typedefs.hpp" #include <cmath> #include <math.h> -#include <iostream> template<int _NB_ELTS, typename Input_T, typename Output_T> @@ -37,4 +36,4 @@ void erf_forward ( } -#endif // __AIDGE_EXPORT_CPP_KERNELS_ERP_ \ No newline at end of file +#endif // __AIDGE_EXPORT_CPP_KERNELS_ERF_ \ No newline at end of file diff --git a/aidge_export_cpp/kernels/matmul.hpp b/aidge_export_cpp/kernels/matmul.hpp index b284214..1403a01 100644 --- a/aidge_export_cpp/kernels/matmul.hpp +++ b/aidge_export_cpp/kernels/matmul.hpp @@ -3,7 +3,7 @@ #include "network/typedefs.hpp" #include "kernels/activation.hpp" -#include <iostream> + // Generic function for matmul and activation template<int INPUT_A_DIMS[], int INPUT_B_DIMS[], int OUTPUT_DIMS[], -- GitLab From 4a4517821e3d9c70f6844f74b581e0c34304920a Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Tue, 1 Apr 2025 07:52:41 +0000 Subject: [PATCH 19/22] Add missing files --- .../templates/configuration/batchnorm_config.jinja | 2 +- .../templates/configuration/matmul_config.jinja | 6 ------ .../templates/kernel_forward/batchnorm_forward.jinja | 2 +- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/aidge_export_cpp/templates/configuration/batchnorm_config.jinja b/aidge_export_cpp/templates/configuration/batchnorm_config.jinja index 701ba7c..3706ee6 100644 --- a/aidge_export_cpp/templates/configuration/batchnorm_config.jinja +++ b/aidge_export_cpp/templates/configuration/batchnorm_config.jinja @@ -8,4 +8,4 @@ #define {{ name|upper }}_ACTIVATION {{ activation }} #define {{ name|upper }}_EPSILON {{ epsilon }} -#endif /* {{ name|upper }}_LAYER_H */ +#endif /* {{ name|upper }}_LAYER_H */ \ No newline at end of file diff --git a/aidge_export_cpp/templates/configuration/matmul_config.jinja b/aidge_export_cpp/templates/configuration/matmul_config.jinja index 4e380e5..6ef27fe 100644 --- a/aidge_export_cpp/templates/configuration/matmul_config.jinja +++ b/aidge_export_cpp/templates/configuration/matmul_config.jinja @@ -7,7 +7,6 @@ {% include "./_meminfo.jinja" %} {# For layer configuration -#} -<<<<<<< HEAD {% include "./_def_io.jinja" %} {% include "./_meminfo.jinja" %} #define {{ name|upper }}_B {{ in_dims[0][0]}} @@ -24,11 +23,6 @@ int {{name|upper}}_OUTPUT_DIMS[] = { {{ out_dims[0]|join(", ") }} }; int {{name|upper}}_INPUT_A_DIMS[] = { {{ in_dims[0]|join(", ") }} }; int {{name|upper}}_INPUT_B_DIMS[] = { {{ in_dims[1]|join(", ") }} }; -======= -#define {{ name|upper }}_M {{ in_dims[0][0] }} -#define {{ name|upper }}_K {{ in_dims[0][1] }} -#define {{ name|upper }}_N {{ in_dims[1][1] }} ->>>>>>> origin/dev #define {{ name|upper }}_ACTIVATION {{ activation }} {#- Calculate sizes #} diff --git a/aidge_export_cpp/templates/kernel_forward/batchnorm_forward.jinja b/aidge_export_cpp/templates/kernel_forward/batchnorm_forward.jinja index 5a759b8..a18e3a7 100644 --- a/aidge_export_cpp/templates/kernel_forward/batchnorm_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/batchnorm_forward.jinja @@ -6,4 +6,4 @@ batchnorm_forward<{{ out_name[0]|upper }}_NB_OUTPUTS, {{name|upper}}_ACTIVATION> ({{in_name[0]}}, {{out_name[0]}}, {{in_name[1]}}, {{in_name[2]}}, {{in_name[3]}}, {{in_name[4]}}, {{name|upper}}_EPSILON); {% include "./_save_outputs.jinja" %} -{% endfilter %} +{% endfilter %} \ No newline at end of file -- GitLab From b40da6dbf9c19b2bffb8aafb8e533dc59ffc1118 Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Tue, 1 Apr 2025 07:53:16 +0000 Subject: [PATCH 20/22] Corrected typo --- aidge_export_cpp/operators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py index 5ee9992..18089fd 100644 --- a/aidge_export_cpp/operators.py +++ b/aidge_export_cpp/operators.py @@ -232,7 +232,7 @@ class MulCPP(ExportNodeCpp): _setup_elemwise_op(self, "Mul") @ExportLibCpp.register("Div", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) -class MulCPP(ExportNodeCpp): +class DivCPP(ExportNodeCpp): def __init__(self, node, mem_info): super().__init__(node, mem_info) -- GitLab From dc0225cb2a571a9b995bcc31c151c3820f889f05 Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Wed, 2 Apr 2025 14:25:23 +0000 Subject: [PATCH 21/22] Delete debug includes --- aidge_export_cpp/kernels/div.hpp | 4 ---- aidge_export_cpp/kernels/elemwise.hpp | 3 --- aidge_export_cpp/kernels/mul.hpp | 4 ---- aidge_export_cpp/kernels/pooling.hpp | 2 -- aidge_export_cpp/kernels/sub.hpp | 3 --- aidge_export_cpp/kernels/transpose_diff.hpp | 7 ++----- 6 files changed, 2 insertions(+), 21 deletions(-) diff --git a/aidge_export_cpp/kernels/div.hpp b/aidge_export_cpp/kernels/div.hpp index f1ff7d0..44640aa 100644 --- a/aidge_export_cpp/kernels/div.hpp +++ b/aidge_export_cpp/kernels/div.hpp @@ -3,10 +3,6 @@ #include "network/typedefs.hpp" #include "kernels/activation.hpp" -#include <iostream> -#include <cassert> - - template<int NB_ELTS, int INPUT_A_DIMS[], int INPUT_B_DIMS[], int OUTPUT_DIMS[], diff --git a/aidge_export_cpp/kernels/elemwise.hpp b/aidge_export_cpp/kernels/elemwise.hpp index 1b45c59..9b97959 100644 --- a/aidge_export_cpp/kernels/elemwise.hpp +++ b/aidge_export_cpp/kernels/elemwise.hpp @@ -3,9 +3,6 @@ #include "network/typedefs.hpp" #include "kernels/activation.hpp" -#include <iostream> -#include <cassert> - template<int NB_ELTS, ElemWise_T ELEM_OP, diff --git a/aidge_export_cpp/kernels/mul.hpp b/aidge_export_cpp/kernels/mul.hpp index cbed0f6..b3ff9e1 100644 --- a/aidge_export_cpp/kernels/mul.hpp +++ b/aidge_export_cpp/kernels/mul.hpp @@ -3,10 +3,6 @@ #include "network/typedefs.hpp" #include "kernels/activation.hpp" -#include <iostream> -#include <cassert> - - template<int NB_ELTS, int INPUT_A_DIMS[], int INPUT_B_DIMS[], int OUTPUT_DIMS[], diff --git a/aidge_export_cpp/kernels/pooling.hpp b/aidge_export_cpp/kernels/pooling.hpp index 14a2473..8f6de40 100644 --- a/aidge_export_cpp/kernels/pooling.hpp +++ b/aidge_export_cpp/kernels/pooling.hpp @@ -5,8 +5,6 @@ #include "network/utils.hpp" #include <limits> #include <stdexcept> -#include <iostream> - void reorder_NCHW_NHWC_pool(const float* input, float* output, int N, int C, int H, int W, bool direct = true) { diff --git a/aidge_export_cpp/kernels/sub.hpp b/aidge_export_cpp/kernels/sub.hpp index 07637cd..2576edc 100644 --- a/aidge_export_cpp/kernels/sub.hpp +++ b/aidge_export_cpp/kernels/sub.hpp @@ -3,9 +3,6 @@ #include "network/typedefs.hpp" #include "kernels/activation.hpp" -#include <iostream> -#include <cassert> - template<int NB_ELTS, diff --git a/aidge_export_cpp/kernels/transpose_diff.hpp b/aidge_export_cpp/kernels/transpose_diff.hpp index 298e5d9..712d9b4 100644 --- a/aidge_export_cpp/kernels/transpose_diff.hpp +++ b/aidge_export_cpp/kernels/transpose_diff.hpp @@ -2,9 +2,7 @@ #define __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ #include "network/typedefs.hpp" -#include <cstring> -#include <cstdio> -#include <iostream> + using namespace std; @@ -27,10 +25,9 @@ void transpose_forward ( int indices[SIZE_OUTPUT_DIMS]; for (int i = 0; i<SIZE_OUTPUT_DIMS;++i){indices[i] = 0;} -// #pragma omp parallel for + for (int i = 0; i < SIZE; ++i) { int idx = 0; -// #pragma omp parallel for for (int j = SIZE_OUTPUT_DIMS -1; j >=0; --j) { idx += indices[PERM[j]] * newStrides[j]; } -- GitLab From f74ec2c3a86cfb5783036d9abee25c7baa9289c7 Mon Sep 17 00:00:00 2001 From: Matthew Newson <matthew.newson@cea.fr> Date: Thu, 3 Apr 2025 12:10:41 +0000 Subject: [PATCH 22/22] Add ifdef pragma or delete unneeded pragma --- aidge_export_cpp/kernels/add.hpp | 2 -- aidge_export_cpp/kernels/batchnorm.hpp | 6 ++++++ aidge_export_cpp/kernels/convolution_groups.hpp | 14 ++++++++------ aidge_export_cpp/kernels/erf.hpp | 3 ++- aidge_export_cpp/kernels/mul.hpp | 2 -- 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/aidge_export_cpp/kernels/add.hpp b/aidge_export_cpp/kernels/add.hpp index eb8a93b..52b58f5 100644 --- a/aidge_export_cpp/kernels/add.hpp +++ b/aidge_export_cpp/kernels/add.hpp @@ -55,12 +55,10 @@ void add_forward ( if (contiguousidx > 0) { stride_post0[contiguousidx - 1] = 1; stride_post1[contiguousidx - 1] = 1; - #pragma omp parallel for for (int i = contiguousidx -2; i != -1; --i) { stride_post0[i] = stride_post0[i+1]*ndim_a[i+1]; stride_post1[i] = stride_post1[i+1]*ndim_b[i+1]; } - #pragma omp parallel for for (int i = 0; i < contiguousidx ; ++i) { stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1; stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1; diff --git a/aidge_export_cpp/kernels/batchnorm.hpp b/aidge_export_cpp/kernels/batchnorm.hpp index 201ef16..4100e6d 100644 --- a/aidge_export_cpp/kernels/batchnorm.hpp +++ b/aidge_export_cpp/kernels/batchnorm.hpp @@ -25,15 +25,21 @@ void batchnorm_forward ( { int featureMapSize = OUTPUTS_HEIGHT * OUTPUTS_WIDTH; +#ifdef _OPENMP #pragma omp parallel for +#endif for (int ch = 0; ch < NB_OUTPUTS; ++ch) { int ioIndex = ch * featureMapSize; +#ifdef _OPENMP #pragma omp parallel for +#endif for (int i = ioIndex; i < ioIndex + featureMapSize; i++) { outputs[i] = biases[ch]; } float var = sqrt(variances[ch] + epsilon); +#ifdef _OPENMP #pragma omp parallel for +#endif for (int feature = 0; feature < featureMapSize; ++feature) { outputs[ioIndex + feature] += (scales[ch] * (inputs[ioIndex + feature] - means[ch]) / var); } diff --git a/aidge_export_cpp/kernels/convolution_groups.hpp b/aidge_export_cpp/kernels/convolution_groups.hpp index 321ffc7..17cb1bf 100644 --- a/aidge_export_cpp/kernels/convolution_groups.hpp +++ b/aidge_export_cpp/kernels/convolution_groups.hpp @@ -6,7 +6,6 @@ #include "network/utils.hpp" #include "kernels/macs.hpp" #include "kernels/activation.hpp" -#include <omp.h> // Weights index en NHWC constexpr int inds_pos(int n, int c, int h, int w, int N, int C, int H, int W) { @@ -52,19 +51,22 @@ void convolution_forward( int c_in_g = NB_CHANNELS / GROUPS; int c_out_g = NB_OUTPUTS / GROUPS; - #pragma omp parallel for +#ifdef _OPENMP + #pragma omp parallel for collapse(3) +#endif for (int oc = 0; oc < NB_OUTPUTS; oc++) { - int g_oc = oc / c_out_g; - #pragma omp parallel for for (int i = 0; i < OUT_HEIGHT; ++i) { - #pragma omp parallel for for (int j = 0; j < OUT_WIDTH; ++j) { + int g_oc = oc / c_out_g; Output_T value = biases[oc]; - #pragma omp parallel for for (int ic = g_oc * c_in_g; ic < (g_oc + 1) * c_in_g; ++ic) { +#ifdef _OPENMP #pragma omp parallel for +#endif for (int m = 0; m < KERNEL_HEIGHT; ++m) { +#ifdef _OPENMP #pragma omp parallel for +#endif for (int n = 0; n < KERNEL_WIDTH; ++n) { int i_p = i * STRIDE_X - PADDING_X + m * DILATION_X; int j_p = j * STRIDE_Y - PADDING_Y + n * DILATION_Y; diff --git a/aidge_export_cpp/kernels/erf.hpp b/aidge_export_cpp/kernels/erf.hpp index 768f3b9..88aafe2 100644 --- a/aidge_export_cpp/kernels/erf.hpp +++ b/aidge_export_cpp/kernels/erf.hpp @@ -19,8 +19,9 @@ void erf_forward ( double a5 = 1.061405429; double p = 0.3275911; - +#ifdef _OPENMP #pragma omp parallel for +#endif for (int i = 0; i < _NB_ELTS; ++i) { int sign = 1; if (inputs[i] < 0) diff --git a/aidge_export_cpp/kernels/mul.hpp b/aidge_export_cpp/kernels/mul.hpp index b3ff9e1..5c1ba62 100644 --- a/aidge_export_cpp/kernels/mul.hpp +++ b/aidge_export_cpp/kernels/mul.hpp @@ -54,12 +54,10 @@ void mul_forward ( if (contiguousidx > 0) { stride_post0[contiguousidx - 1] = 1; stride_post1[contiguousidx - 1] = 1; - #pragma omp parallel for for (int i = contiguousidx -2; i != -1; --i) { stride_post0[i] = stride_post0[i+1]*ndim_a[i+1]; stride_post1[i] = stride_post1[i+1]*ndim_b[i+1]; } - #pragma omp parallel for for (int i = 0; i < contiguousidx ; ++i) { stride_step0[i] = (ndim_a[i] == 1) ? 1 - stride_post0[i] : 1; stride_step1[i] = (ndim_b[i] == 1) ? 1 - stride_post1[i] : 1; -- GitLab