diff --git a/aidge_export_cpp/kernels/transpose.hpp b/aidge_export_cpp/kernels/transpose.hpp new file mode 100644 index 0000000000000000000000000000000000000000..31c9e27869c5e2fde701f6700fd4964ea4cefd29 --- /dev/null +++ b/aidge_export_cpp/kernels/transpose.hpp @@ -0,0 +1,87 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ +#define __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ + +/** + * @brief Transposes an N-dimensional tensor based on the specified permutation. + * + * This function rearranges the dimensions of an N-dimensional tensor according to the + * permutation array provided. The input tensor is expected to have dimensions specified + * by `in_dims`, and the output tensor will have dimensions reordered as specified by the + * `permute` array. + * + * Based on Tensor::copyTranspose from aidge.aidge_core + * + * @tparam T Data type of the tensor elements. + * @tparam NB_DIMS Number of dimensions of the input tensor. + * @param[in] inputs Pointer to the input tensor data stored in contiguous memory. + * @param[in] in_dims Array containing the size of each dimension of the input tensor. + * @param[in] permute Array of unsigned integers specifying the desired permutation + * of dimensions. Each value should be in the range [0, NB_DIMS-1], + * defining the new order of dimensions for the output tensor. + * @param[in] total_size Total number of elements in the input/output tensor. + * @param[out] outputs Pointer to the pre-allocated memory for the transposed tensor. + * Ensure this memory is appropriately sized to hold the transposed data. + */ +template <typename T,unsigned int NB_DIMS> +void transpose_ND_forward(const T *__restrict inputs, + const unsigned int *in_dims, + const unsigned int *permute, + const unsigned int total_size, + T *__restrict outputs) +{ + // Compute strides for input tensor + unsigned int in_strides[NB_DIMS]; + in_strides[NB_DIMS - 1] = 1; + for (int i = NB_DIMS - 2; i >= 0; --i) + { + in_strides[i] = in_strides[i + 1] * in_dims[i + 1]; + } + + // Compute dimensions and strides for output tensor + unsigned int out_dims[NB_DIMS]; + unsigned int out_strides[NB_DIMS]; + out_strides[NB_DIMS - 1] = 1; + for (unsigned int i = 0; i < NB_DIMS; ++i) + { + out_dims[i] = in_dims[permute[i]]; + } + for (int i = NB_DIMS - 2; i >= 0; --i) + { + out_strides[i] = out_strides[i + 1] * out_dims[i + 1]; + } + + unsigned int current_idx[NB_DIMS]; + + // Iterate over all elements in the input tensor + for (unsigned int idx = 0; idx < total_size; ++idx) + { + + unsigned int remaining = idx; + for (unsigned int i = 0; i < NB_DIMS; ++i) + { + current_idx[i] = remaining / in_strides[i]; + remaining = remaining % in_strides[i]; + } + + unsigned int output_index = 0; + for (unsigned int i = 0; i < NB_DIMS; ++i) + { + output_index += current_idx[permute[i]] * out_strides[i]; + } + + outputs[output_index] = inputs[idx]; + } +} + +#endif // __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py index f04dbb3bd134fbb517f677649b05032e5944f886..54c38055b2c622aa1796bc1e7ff2dd46e60afcba 100644 --- a/aidge_export_cpp/operators.py +++ b/aidge_export_cpp/operators.py @@ -57,7 +57,7 @@ class ProducerCPP(ExportNode): self.values = np.array(self.operator.get_output(0)) if len(self.values.shape) == 4: # Note: export in HWC - self.values = np.transpose(self.values, (0, 2, 3, 1)) + self.values = np.transpose(self.values, (0, 2, 3, 1)) def export(self, export_folder: Path): header_path = f"include/parameters/{self.attributes['name']}.h" @@ -295,3 +295,16 @@ class FcCPP(ExportNodeCpp): str(ROOT / "kernels" / "activation.hpp"), str(ROOT / "kernels" / "rescaling.hpp") ] + +@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any))) +class TransposeCPP(ExportNodeCpp): + def __init__(self, node, mem_info): + super().__init__(node, mem_info) + self.config_template = str( + ROOT / "templates" / "configuration" / "transpose_ND_config.jinja") + self.forward_template = str( + ROOT / "templates" / "kernel_forward" / "transpose_ND_forward.jinja") + self.include_list = [] + self.kernels_to_copy = [ + str(ROOT / "kernels" / "transpose.hpp") + ] \ No newline at end of file diff --git a/aidge_export_cpp/templates/configuration/transpose_ND_config.jinja b/aidge_export_cpp/templates/configuration/transpose_ND_config.jinja new file mode 100644 index 0000000000000000000000000000000000000000..e5ef4ffbf8ced740f00b5ba716348bc9cc06ca8c --- /dev/null +++ b/aidge_export_cpp/templates/configuration/transpose_ND_config.jinja @@ -0,0 +1,15 @@ +{#- For name header -#} +#ifndef {{ name|upper }}_LAYER_H +#define {{ name|upper }}_LAYER_H + +{# For layer configuration -#} +{% include "./_def_io.jinja" %} +{% include "./_meminfo.jinja" %} +{# Export suppose that batchsize = 1#} +#define {{ name|upper }}_NB_DIMS {{ in_dims[0] | length }} + +static constexpr unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} }; +static constexpr unsigned int {{ name|upper }}_DIMS[] = { {{ in_dims[0] | join(', ') }}}; + + +#endif /* {{ name|upper }}_LAYER_H */ \ No newline at end of file diff --git a/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja b/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja new file mode 100644 index 0000000000000000000000000000000000000000..25af5bd9a3cdab4c91d5f2f09dae9144348729db --- /dev/null +++ b/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja @@ -0,0 +1 @@ +transpose_ND_forward<{{in_cdtype[0]}},{{name|upper}}_NB_DIMS>({{in_name[0]}},{{name|upper}}_DIMS,{{name|upper}}_PERMUTE,{{ out_name[0]|upper }}_SIZE,{{out_name[0]}}); \ No newline at end of file