From b071317e07d5ab0a712b4d3b9898e7e3ea03d5bb Mon Sep 17 00:00:00 2001 From: Wissam Boussella <wissam.boussella@cea.fr> Date: Thu, 27 Feb 2025 13:58:28 +0100 Subject: [PATCH 1/3] Add transpose function for 4D tensors and related templates --- aidge_export_cpp/kernels/transpose.hpp | 78 +++++++++++++++++++ aidge_export_cpp/operators.py | 17 +++- .../configuration/transpose_4D_config.jinja | 13 ++++ .../kernel_forward/transpose_4D_forward.jinja | 7 ++ 4 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 aidge_export_cpp/kernels/transpose.hpp create mode 100644 aidge_export_cpp/templates/configuration/transpose_4D_config.jinja create mode 100644 aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja diff --git a/aidge_export_cpp/kernels/transpose.hpp b/aidge_export_cpp/kernels/transpose.hpp new file mode 100644 index 0000000..4273d20 --- /dev/null +++ b/aidge_export_cpp/kernels/transpose.hpp @@ -0,0 +1,78 @@ +#ifndef __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ +#define __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ + +#include "network/typedefs.hpp" + +/** + * @brief Transposes a 4-dimensional tensor based on the specified permutation. + * + * This function rearranges the dimensions of a 4D tensor according to the + * permutation array provided. The input tensor is expected to have dimensions + * [in_dim1][in_dim2][in_dim3][in_dim4], and the output tensor will have + * dimensions reordered as specified by the permute array. + * + * @tparam T Data type of the tensor elements (e.g., float, double). + * @param[in] inputs Pointer to the input tensor data stored in contiguous memory. + * @param[out] outputs Pointer to the pre-allocated memory for the transposed tensor. + * Ensure this memory is appropriately sized to hold the transposed data. + * @param[in] in_dim1 Size of the first dimension of the input tensor. + * @param[in] in_dim2 Size of the second dimension of the input tensor. + * @param[in] in_dim3 Size of the third dimension of the input tensor. + * @param[in] in_dim4 Size of the fourth dimension of the input tensor. + * @param[in] permute Array of four unsigned integers specifying the desired permutation + * of dimensions. Each value should be in the range [0, 3], defining + * the new order of dimensions for the output tensor. + */ +template <typename T> +void transpose_4D_forward(const T* inputs, + T* outputs, + unsigned int in_dim1, + unsigned int in_dim2, + unsigned int in_dim3, + unsigned int in_dim4, + const unsigned int* permute) +{ + unsigned int in_dims[4] = {in_dim1, in_dim2, in_dim3, in_dim4}; + unsigned int out_dims[4]; + for (unsigned int i = 0; i < 4; ++i) { + out_dims[i] = in_dims[permute[i]]; + } + + unsigned int in_strides[4] = { + in_dim2 * in_dim3 * in_dim4, + in_dim3 * in_dim4, + in_dim4, + 1 + }; + + unsigned int out_strides[4] = { + out_dims[1] * out_dims[2] * out_dims[3], + out_dims[2] * out_dims[3], + out_dims[3], + 1 + }; + + for (unsigned int i = 0; i < in_dim1; ++i) { + for (unsigned int j = 0; j < in_dim2; ++j) { + for (unsigned int k = 0; k < in_dim3; ++k) { + for (unsigned int l = 0; l < in_dim4; ++l) { + // Compute the linear index in the input tensor + unsigned int input_index = i * in_strides[0] + + j * in_strides[1] + + k * in_strides[2] + + l * in_strides[3]; + + unsigned int in_idx[4] = {i, j, k, l}; + unsigned int output_index = 0; + for (unsigned int m = 0; m < 4; ++m) { + output_index += in_idx[permute[m]] * out_strides[m]; + } + + outputs[output_index] = inputs[input_index]; + } + } + } + } +} + +#endif // __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ \ No newline at end of file diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py index f04dbb3..fcc57ed 100644 --- a/aidge_export_cpp/operators.py +++ b/aidge_export_cpp/operators.py @@ -56,8 +56,8 @@ class ProducerCPP(ExportNode): super().__init__(node, mem_info) self.values = np.array(self.operator.get_output(0)) - if len(self.values.shape) == 4: # Note: export in HWC - self.values = np.transpose(self.values, (0, 2, 3, 1)) + # if len(self.values.shape) == 4: # Note: export in HWC + # self.values = np.transpose(self.values, (0, 2, 3, 1)) def export(self, export_folder: Path): header_path = f"include/parameters/{self.attributes['name']}.h" @@ -295,3 +295,16 @@ class FcCPP(ExportNodeCpp): str(ROOT / "kernels" / "activation.hpp"), str(ROOT / "kernels" / "rescaling.hpp") ] + +@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +class TransposeCPP(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + self.config_template = str( + ROOT / "templates" / "configuration" / "transpose_4D_config.jinja") + self.forward_template = str( + ROOT / "templates" / "kernel_forward" / "transpose_4D_forward.jinja") + self.include_list = [] + self.kernels_to_copy = [ + str(ROOT / "kernels" / "transpose.hpp") + ] \ No newline at end of file diff --git a/aidge_export_cpp/templates/configuration/transpose_4D_config.jinja b/aidge_export_cpp/templates/configuration/transpose_4D_config.jinja new file mode 100644 index 0000000..62c5ac7 --- /dev/null +++ b/aidge_export_cpp/templates/configuration/transpose_4D_config.jinja @@ -0,0 +1,13 @@ +{#- For name header -#} +#ifndef {{ name|upper }}_LAYER_H +#define {{ name|upper }}_LAYER_H + +{# For layer configuration -#} +{% include "./_def_io.jinja" %} +{% include "./_meminfo.jinja" %} +{# 4D kernels are not supported yet that's why define NAME_DIM1 #} +#define {{ name|upper }}_DIM1 1 + +static const unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} }; + +#endif /* {{ name|upper }}_LAYER_H */ diff --git a/aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja b/aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja new file mode 100644 index 0000000..db04ac5 --- /dev/null +++ b/aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja @@ -0,0 +1,7 @@ +transpose_4D_forward({{in_name[0]}}, + {{out_name[0]}}, + {{name|upper}}_DIM1, + {{ in_name[0]|upper }}_NB_CHANNELS, + {{ in_name[0]|upper }}_IN_HEIGHT, + {{ in_name[0]|upper }}_IN_WIDTH, + {{name|upper}}_PERMUTE); \ No newline at end of file -- GitLab From ac47d3e6968688ef2c9ccf524e6f13e717f253f7 Mon Sep 17 00:00:00 2001 From: Wissam Boussella <wissam.boussella@cea.fr> Date: Tue, 4 Mar 2025 11:35:32 +0100 Subject: [PATCH 2/3] keep transpose in nhwc in operator.py, but need to be changed New method for transpose.hpp, now take in case multi-dimensionnal Change config and forward to be compatble with the new transpose --- aidge_export_cpp/kernels/transpose.hpp | 128 ++++++++++-------- aidge_export_cpp/operators.py | 10 +- .../configuration/transpose_4D_config.jinja | 13 -- .../configuration/transpose_ND_config.jinja | 15 ++ .../kernel_forward/transpose_4D_forward.jinja | 7 - .../kernel_forward/transpose_ND_forward.jinja | 1 + 6 files changed, 90 insertions(+), 84 deletions(-) delete mode 100644 aidge_export_cpp/templates/configuration/transpose_4D_config.jinja create mode 100644 aidge_export_cpp/templates/configuration/transpose_ND_config.jinja delete mode 100644 aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja create mode 100644 aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja diff --git a/aidge_export_cpp/kernels/transpose.hpp b/aidge_export_cpp/kernels/transpose.hpp index 4273d20..082d738 100644 --- a/aidge_export_cpp/kernels/transpose.hpp +++ b/aidge_export_cpp/kernels/transpose.hpp @@ -1,78 +1,88 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + #ifndef __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ #define __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ -#include "network/typedefs.hpp" - /** - * @brief Transposes a 4-dimensional tensor based on the specified permutation. + * @brief Transposes an N-dimensional tensor based on the specified permutation. + * + * This function rearranges the dimensions of an N-dimensional tensor according to the + * permutation array provided. The input tensor is expected to have dimensions specified + * by `in_dims`, and the output tensor will have dimensions reordered as specified by the + * `permute` array. * - * This function rearranges the dimensions of a 4D tensor according to the - * permutation array provided. The input tensor is expected to have dimensions - * [in_dim1][in_dim2][in_dim3][in_dim4], and the output tensor will have - * dimensions reordered as specified by the permute array. + * Based on Tensor::copyTranspose from aidge.aidge_core * - * @tparam T Data type of the tensor elements (e.g., float, double). - * @param[in] inputs Pointer to the input tensor data stored in contiguous memory. - * @param[out] outputs Pointer to the pre-allocated memory for the transposed tensor. - * Ensure this memory is appropriately sized to hold the transposed data. - * @param[in] in_dim1 Size of the first dimension of the input tensor. - * @param[in] in_dim2 Size of the second dimension of the input tensor. - * @param[in] in_dim3 Size of the third dimension of the input tensor. - * @param[in] in_dim4 Size of the fourth dimension of the input tensor. - * @param[in] permute Array of four unsigned integers specifying the desired permutation - * of dimensions. Each value should be in the range [0, 3], defining - * the new order of dimensions for the output tensor. + * @tparam T Data type of the tensor elements. + * @param[in] inputs Pointer to the input tensor data stored in contiguous memory. + * @param[in] in_dims Array containing the size of each dimension of the input tensor. + * @param[in] nb_dims Number of dimensions of the input tensor. + * @param[in] permute Array of unsigned integers specifying the desired permutation + * of dimensions. Each value should be in the range [0, nb_dims-1], + * defining the new order of dimensions for the output tensor. + * @param[in] total_size Total number of elements in the input/output tensor. + * @param[out] outputs Pointer to the pre-allocated memory for the transposed tensor. + * Ensure this memory is appropriately sized to hold the transposed data. */ template <typename T> -void transpose_4D_forward(const T* inputs, - T* outputs, - unsigned int in_dim1, - unsigned int in_dim2, - unsigned int in_dim3, - unsigned int in_dim4, - const unsigned int* permute) +void transpose_ND_forward(const T *__restrict inputs, + const unsigned int *in_dims, + const unsigned int nb_dims, + const unsigned int *permute, + const unsigned int total_size, + T *__restrict outputs) { - unsigned int in_dims[4] = {in_dim1, in_dim2, in_dim3, in_dim4}; - unsigned int out_dims[4]; - for (unsigned int i = 0; i < 4; ++i) { - out_dims[i] = in_dims[permute[i]]; + // Compute strides for input tensor + unsigned int in_strides[nb_dims]; + in_strides[nb_dims - 1] = 1; + for (int i = nb_dims - 2; i >= 0; --i) + { + in_strides[i] = in_strides[i + 1] * in_dims[i + 1]; } - unsigned int in_strides[4] = { - in_dim2 * in_dim3 * in_dim4, - in_dim3 * in_dim4, - in_dim4, - 1 - }; + // Compute dimensions and strides for output tensor + unsigned int out_dims[nb_dims]; + unsigned int out_strides[nb_dims]; + out_strides[nb_dims - 1] = 1; + for (unsigned int i = 0; i < nb_dims; ++i) + { + out_dims[i] = in_dims[permute[i]]; + } + for (int i = nb_dims - 2; i >= 0; --i) + { + out_strides[i] = out_strides[i + 1] * out_dims[i + 1]; + } - unsigned int out_strides[4] = { - out_dims[1] * out_dims[2] * out_dims[3], - out_dims[2] * out_dims[3], - out_dims[3], - 1 - }; + unsigned int current_idx[nb_dims]; - for (unsigned int i = 0; i < in_dim1; ++i) { - for (unsigned int j = 0; j < in_dim2; ++j) { - for (unsigned int k = 0; k < in_dim3; ++k) { - for (unsigned int l = 0; l < in_dim4; ++l) { - // Compute the linear index in the input tensor - unsigned int input_index = i * in_strides[0] + - j * in_strides[1] + - k * in_strides[2] + - l * in_strides[3]; + // Iterate over all elements in the input tensor + for (unsigned int idx = 0; idx < total_size; ++idx) + { - unsigned int in_idx[4] = {i, j, k, l}; - unsigned int output_index = 0; - for (unsigned int m = 0; m < 4; ++m) { - output_index += in_idx[permute[m]] * out_strides[m]; - } + unsigned int remaining = idx; + for (unsigned int i = 0; i < nb_dims; ++i) + { + current_idx[i] = remaining / in_strides[i]; + remaining = remaining % in_strides[i]; + } - outputs[output_index] = inputs[input_index]; - } - } + unsigned int output_index = 0; + for (unsigned int i = 0; i < nb_dims; ++i) + { + output_index += current_idx[permute[i]] * out_strides[i]; } + + outputs[output_index] = inputs[idx]; } } -#endif // __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ \ No newline at end of file +#endif // __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py index fcc57ed..54c3805 100644 --- a/aidge_export_cpp/operators.py +++ b/aidge_export_cpp/operators.py @@ -56,8 +56,8 @@ class ProducerCPP(ExportNode): super().__init__(node, mem_info) self.values = np.array(self.operator.get_output(0)) - # if len(self.values.shape) == 4: # Note: export in HWC - # self.values = np.transpose(self.values, (0, 2, 3, 1)) + if len(self.values.shape) == 4: # Note: export in HWC + self.values = np.transpose(self.values, (0, 2, 3, 1)) def export(self, export_folder: Path): header_path = f"include/parameters/{self.attributes['name']}.h" @@ -296,14 +296,14 @@ class FcCPP(ExportNodeCpp): str(ROOT / "kernels" / "rescaling.hpp") ] -@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) +@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) class TransposeCPP(ExportNodeCpp): def __init__(self, node, mem_info): super().__init__(node, mem_info) self.config_template = str( - ROOT / "templates" / "configuration" / "transpose_4D_config.jinja") + ROOT / "templates" / "configuration" / "transpose_ND_config.jinja") self.forward_template = str( - ROOT / "templates" / "kernel_forward" / "transpose_4D_forward.jinja") + ROOT / "templates" / "kernel_forward" / "transpose_ND_forward.jinja") self.include_list = [] self.kernels_to_copy = [ str(ROOT / "kernels" / "transpose.hpp") diff --git a/aidge_export_cpp/templates/configuration/transpose_4D_config.jinja b/aidge_export_cpp/templates/configuration/transpose_4D_config.jinja deleted file mode 100644 index 62c5ac7..0000000 --- a/aidge_export_cpp/templates/configuration/transpose_4D_config.jinja +++ /dev/null @@ -1,13 +0,0 @@ -{#- For name header -#} -#ifndef {{ name|upper }}_LAYER_H -#define {{ name|upper }}_LAYER_H - -{# For layer configuration -#} -{% include "./_def_io.jinja" %} -{% include "./_meminfo.jinja" %} -{# 4D kernels are not supported yet that's why define NAME_DIM1 #} -#define {{ name|upper }}_DIM1 1 - -static const unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} }; - -#endif /* {{ name|upper }}_LAYER_H */ diff --git a/aidge_export_cpp/templates/configuration/transpose_ND_config.jinja b/aidge_export_cpp/templates/configuration/transpose_ND_config.jinja new file mode 100644 index 0000000..e5ef4ff --- /dev/null +++ b/aidge_export_cpp/templates/configuration/transpose_ND_config.jinja @@ -0,0 +1,15 @@ +{#- For name header -#} +#ifndef {{ name|upper }}_LAYER_H +#define {{ name|upper }}_LAYER_H + +{# For layer configuration -#} +{% include "./_def_io.jinja" %} +{% include "./_meminfo.jinja" %} +{# Export suppose that batchsize = 1#} +#define {{ name|upper }}_NB_DIMS {{ in_dims[0] | length }} + +static constexpr unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} }; +static constexpr unsigned int {{ name|upper }}_DIMS[] = { {{ in_dims[0] | join(', ') }}}; + + +#endif /* {{ name|upper }}_LAYER_H */ \ No newline at end of file diff --git a/aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja b/aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja deleted file mode 100644 index db04ac5..0000000 --- a/aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja +++ /dev/null @@ -1,7 +0,0 @@ -transpose_4D_forward({{in_name[0]}}, - {{out_name[0]}}, - {{name|upper}}_DIM1, - {{ in_name[0]|upper }}_NB_CHANNELS, - {{ in_name[0]|upper }}_IN_HEIGHT, - {{ in_name[0]|upper }}_IN_WIDTH, - {{name|upper}}_PERMUTE); \ No newline at end of file diff --git a/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja b/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja new file mode 100644 index 0000000..8f39fbc --- /dev/null +++ b/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja @@ -0,0 +1 @@ +transpose_ND_forward<{{in_cdtype[0]}}>({{in_name[0]}},{{name|upper}}_DIMS,{{name|upper}}_NB_DIMS,{{name|upper}}_PERMUTE,{{ out_name[0]|upper }}_SIZE,{{out_name[0]}}); \ No newline at end of file -- GitLab From b7b395f31fa63e8c8139960a34ee9839ef8ea8dc Mon Sep 17 00:00:00 2001 From: Wissam Boussella <wissam.boussella@cea.fr> Date: Tue, 4 Mar 2025 12:09:17 +0100 Subject: [PATCH 3/3] now NB_DIMS is tezmplate's parameter --- aidge_export_cpp/kernels/transpose.hpp | 29 +++++++++---------- .../kernel_forward/transpose_ND_forward.jinja | 2 +- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/aidge_export_cpp/kernels/transpose.hpp b/aidge_export_cpp/kernels/transpose.hpp index 082d738..31c9e27 100644 --- a/aidge_export_cpp/kernels/transpose.hpp +++ b/aidge_export_cpp/kernels/transpose.hpp @@ -23,60 +23,59 @@ * Based on Tensor::copyTranspose from aidge.aidge_core * * @tparam T Data type of the tensor elements. + * @tparam NB_DIMS Number of dimensions of the input tensor. * @param[in] inputs Pointer to the input tensor data stored in contiguous memory. * @param[in] in_dims Array containing the size of each dimension of the input tensor. - * @param[in] nb_dims Number of dimensions of the input tensor. * @param[in] permute Array of unsigned integers specifying the desired permutation - * of dimensions. Each value should be in the range [0, nb_dims-1], + * of dimensions. Each value should be in the range [0, NB_DIMS-1], * defining the new order of dimensions for the output tensor. * @param[in] total_size Total number of elements in the input/output tensor. * @param[out] outputs Pointer to the pre-allocated memory for the transposed tensor. * Ensure this memory is appropriately sized to hold the transposed data. */ -template <typename T> +template <typename T,unsigned int NB_DIMS> void transpose_ND_forward(const T *__restrict inputs, const unsigned int *in_dims, - const unsigned int nb_dims, const unsigned int *permute, const unsigned int total_size, T *__restrict outputs) { // Compute strides for input tensor - unsigned int in_strides[nb_dims]; - in_strides[nb_dims - 1] = 1; - for (int i = nb_dims - 2; i >= 0; --i) + unsigned int in_strides[NB_DIMS]; + in_strides[NB_DIMS - 1] = 1; + for (int i = NB_DIMS - 2; i >= 0; --i) { in_strides[i] = in_strides[i + 1] * in_dims[i + 1]; } // Compute dimensions and strides for output tensor - unsigned int out_dims[nb_dims]; - unsigned int out_strides[nb_dims]; - out_strides[nb_dims - 1] = 1; - for (unsigned int i = 0; i < nb_dims; ++i) + unsigned int out_dims[NB_DIMS]; + unsigned int out_strides[NB_DIMS]; + out_strides[NB_DIMS - 1] = 1; + for (unsigned int i = 0; i < NB_DIMS; ++i) { out_dims[i] = in_dims[permute[i]]; } - for (int i = nb_dims - 2; i >= 0; --i) + for (int i = NB_DIMS - 2; i >= 0; --i) { out_strides[i] = out_strides[i + 1] * out_dims[i + 1]; } - unsigned int current_idx[nb_dims]; + unsigned int current_idx[NB_DIMS]; // Iterate over all elements in the input tensor for (unsigned int idx = 0; idx < total_size; ++idx) { unsigned int remaining = idx; - for (unsigned int i = 0; i < nb_dims; ++i) + for (unsigned int i = 0; i < NB_DIMS; ++i) { current_idx[i] = remaining / in_strides[i]; remaining = remaining % in_strides[i]; } unsigned int output_index = 0; - for (unsigned int i = 0; i < nb_dims; ++i) + for (unsigned int i = 0; i < NB_DIMS; ++i) { output_index += current_idx[permute[i]] * out_strides[i]; } diff --git a/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja b/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja index 8f39fbc..25af5bd 100644 --- a/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja +++ b/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja @@ -1 +1 @@ -transpose_ND_forward<{{in_cdtype[0]}}>({{in_name[0]}},{{name|upper}}_DIMS,{{name|upper}}_NB_DIMS,{{name|upper}}_PERMUTE,{{ out_name[0]|upper }}_SIZE,{{out_name[0]}}); \ No newline at end of file +transpose_ND_forward<{{in_cdtype[0]}},{{name|upper}}_NB_DIMS>({{in_name[0]}},{{name|upper}}_DIMS,{{name|upper}}_PERMUTE,{{ out_name[0]|upper }}_SIZE,{{out_name[0]}}); \ No newline at end of file -- GitLab