Skip to content
Snippets Groups Projects
Commit ac47d3e6 authored by Wissam Boussella's avatar Wissam Boussella
Browse files

keep transpose in nhwc in operator.py, but need to be changed

New method for transpose.hpp, now take in case multi-dimensionnal
Change config and forward to be compatble with the new transpose
parent b071317e
No related branches found
No related tags found
2 merge requests!39Update 0.2.1 -> 0.3.0,!30Add transpose function for 4D tensors and related templates
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ #ifndef __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
#define __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ #define __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
#include "network/typedefs.hpp"
/** /**
* @brief Transposes a 4-dimensional tensor based on the specified permutation. * @brief Transposes an N-dimensional tensor based on the specified permutation.
*
* This function rearranges the dimensions of an N-dimensional tensor according to the
* permutation array provided. The input tensor is expected to have dimensions specified
* by `in_dims`, and the output tensor will have dimensions reordered as specified by the
* `permute` array.
* *
* This function rearranges the dimensions of a 4D tensor according to the * Based on Tensor::copyTranspose from aidge.aidge_core
* permutation array provided. The input tensor is expected to have dimensions
* [in_dim1][in_dim2][in_dim3][in_dim4], and the output tensor will have
* dimensions reordered as specified by the permute array.
* *
* @tparam T Data type of the tensor elements (e.g., float, double). * @tparam T Data type of the tensor elements.
* @param[in] inputs Pointer to the input tensor data stored in contiguous memory. * @param[in] inputs Pointer to the input tensor data stored in contiguous memory.
* @param[out] outputs Pointer to the pre-allocated memory for the transposed tensor. * @param[in] in_dims Array containing the size of each dimension of the input tensor.
* Ensure this memory is appropriately sized to hold the transposed data. * @param[in] nb_dims Number of dimensions of the input tensor.
* @param[in] in_dim1 Size of the first dimension of the input tensor. * @param[in] permute Array of unsigned integers specifying the desired permutation
* @param[in] in_dim2 Size of the second dimension of the input tensor. * of dimensions. Each value should be in the range [0, nb_dims-1],
* @param[in] in_dim3 Size of the third dimension of the input tensor. * defining the new order of dimensions for the output tensor.
* @param[in] in_dim4 Size of the fourth dimension of the input tensor. * @param[in] total_size Total number of elements in the input/output tensor.
* @param[in] permute Array of four unsigned integers specifying the desired permutation * @param[out] outputs Pointer to the pre-allocated memory for the transposed tensor.
* of dimensions. Each value should be in the range [0, 3], defining * Ensure this memory is appropriately sized to hold the transposed data.
* the new order of dimensions for the output tensor.
*/ */
template <typename T> template <typename T>
void transpose_4D_forward(const T* inputs, void transpose_ND_forward(const T *__restrict inputs,
T* outputs, const unsigned int *in_dims,
unsigned int in_dim1, const unsigned int nb_dims,
unsigned int in_dim2, const unsigned int *permute,
unsigned int in_dim3, const unsigned int total_size,
unsigned int in_dim4, T *__restrict outputs)
const unsigned int* permute)
{ {
unsigned int in_dims[4] = {in_dim1, in_dim2, in_dim3, in_dim4}; // Compute strides for input tensor
unsigned int out_dims[4]; unsigned int in_strides[nb_dims];
for (unsigned int i = 0; i < 4; ++i) { in_strides[nb_dims - 1] = 1;
out_dims[i] = in_dims[permute[i]]; for (int i = nb_dims - 2; i >= 0; --i)
{
in_strides[i] = in_strides[i + 1] * in_dims[i + 1];
} }
unsigned int in_strides[4] = { // Compute dimensions and strides for output tensor
in_dim2 * in_dim3 * in_dim4, unsigned int out_dims[nb_dims];
in_dim3 * in_dim4, unsigned int out_strides[nb_dims];
in_dim4, out_strides[nb_dims - 1] = 1;
1 for (unsigned int i = 0; i < nb_dims; ++i)
}; {
out_dims[i] = in_dims[permute[i]];
}
for (int i = nb_dims - 2; i >= 0; --i)
{
out_strides[i] = out_strides[i + 1] * out_dims[i + 1];
}
unsigned int out_strides[4] = { unsigned int current_idx[nb_dims];
out_dims[1] * out_dims[2] * out_dims[3],
out_dims[2] * out_dims[3],
out_dims[3],
1
};
for (unsigned int i = 0; i < in_dim1; ++i) { // Iterate over all elements in the input tensor
for (unsigned int j = 0; j < in_dim2; ++j) { for (unsigned int idx = 0; idx < total_size; ++idx)
for (unsigned int k = 0; k < in_dim3; ++k) { {
for (unsigned int l = 0; l < in_dim4; ++l) {
// Compute the linear index in the input tensor
unsigned int input_index = i * in_strides[0] +
j * in_strides[1] +
k * in_strides[2] +
l * in_strides[3];
unsigned int in_idx[4] = {i, j, k, l}; unsigned int remaining = idx;
unsigned int output_index = 0; for (unsigned int i = 0; i < nb_dims; ++i)
for (unsigned int m = 0; m < 4; ++m) { {
output_index += in_idx[permute[m]] * out_strides[m]; current_idx[i] = remaining / in_strides[i];
} remaining = remaining % in_strides[i];
}
outputs[output_index] = inputs[input_index]; unsigned int output_index = 0;
} for (unsigned int i = 0; i < nb_dims; ++i)
} {
output_index += current_idx[permute[i]] * out_strides[i];
} }
outputs[output_index] = inputs[idx];
} }
} }
#endif // __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__ #endif // __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
\ No newline at end of file
...@@ -56,8 +56,8 @@ class ProducerCPP(ExportNode): ...@@ -56,8 +56,8 @@ class ProducerCPP(ExportNode):
super().__init__(node, mem_info) super().__init__(node, mem_info)
self.values = np.array(self.operator.get_output(0)) self.values = np.array(self.operator.get_output(0))
# if len(self.values.shape) == 4: # Note: export in HWC if len(self.values.shape) == 4: # Note: export in HWC
# self.values = np.transpose(self.values, (0, 2, 3, 1)) self.values = np.transpose(self.values, (0, 2, 3, 1))
def export(self, export_folder: Path): def export(self, export_folder: Path):
header_path = f"include/parameters/{self.attributes['name']}.h" header_path = f"include/parameters/{self.attributes['name']}.h"
...@@ -296,14 +296,14 @@ class FcCPP(ExportNodeCpp): ...@@ -296,14 +296,14 @@ class FcCPP(ExportNodeCpp):
str(ROOT / "kernels" / "rescaling.hpp") str(ROOT / "kernels" / "rescaling.hpp")
] ]
@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32))) @ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
class TransposeCPP(ExportNodeCpp): class TransposeCPP(ExportNodeCpp):
def __init__(self, node, mem_info): def __init__(self, node, mem_info):
super().__init__(node, mem_info) super().__init__(node, mem_info)
self.config_template = str( self.config_template = str(
ROOT / "templates" / "configuration" / "transpose_4D_config.jinja") ROOT / "templates" / "configuration" / "transpose_ND_config.jinja")
self.forward_template = str( self.forward_template = str(
ROOT / "templates" / "kernel_forward" / "transpose_4D_forward.jinja") ROOT / "templates" / "kernel_forward" / "transpose_ND_forward.jinja")
self.include_list = [] self.include_list = []
self.kernels_to_copy = [ self.kernels_to_copy = [
str(ROOT / "kernels" / "transpose.hpp") str(ROOT / "kernels" / "transpose.hpp")
......
...@@ -5,9 +5,11 @@ ...@@ -5,9 +5,11 @@
{# For layer configuration -#} {# For layer configuration -#}
{% include "./_def_io.jinja" %} {% include "./_def_io.jinja" %}
{% include "./_meminfo.jinja" %} {% include "./_meminfo.jinja" %}
{# 4D kernels are not supported yet that's why define NAME_DIM1 #} {# Export suppose that batchsize = 1#}
#define {{ name|upper }}_DIM1 1 #define {{ name|upper }}_NB_DIMS {{ in_dims[0] | length }}
static const unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} }; static constexpr unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} };
static constexpr unsigned int {{ name|upper }}_DIMS[] = { {{ in_dims[0] | join(', ') }}};
#endif /* {{ name|upper }}_LAYER_H */
#endif /* {{ name|upper }}_LAYER_H */
\ No newline at end of file
transpose_4D_forward({{in_name[0]}},
{{out_name[0]}},
{{name|upper}}_DIM1,
{{ in_name[0]|upper }}_NB_CHANNELS,
{{ in_name[0]|upper }}_IN_HEIGHT,
{{ in_name[0]|upper }}_IN_WIDTH,
{{name|upper}}_PERMUTE);
\ No newline at end of file
transpose_ND_forward<{{in_cdtype[0]}}>({{in_name[0]}},{{name|upper}}_DIMS,{{name|upper}}_NB_DIMS,{{name|upper}}_PERMUTE,{{ out_name[0]|upper }}_SIZE,{{out_name[0]}});
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment