Skip to content
Snippets Groups Projects
Commit ac47d3e6 authored by Wissam Boussella's avatar Wissam Boussella
Browse files

keep transpose in nhwc in operator.py, but need to be changed

New method for transpose.hpp, now take in case multi-dimensionnal
Change config and forward to be compatble with the new transpose
parent b071317e
No related branches found
No related tags found
2 merge requests!39Update 0.2.1 -> 0.3.0,!30Add transpose function for 4D tensors and related templates
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
#define __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
#include "network/typedefs.hpp"
/**
* @brief Transposes a 4-dimensional tensor based on the specified permutation.
* @brief Transposes an N-dimensional tensor based on the specified permutation.
*
* This function rearranges the dimensions of an N-dimensional tensor according to the
* permutation array provided. The input tensor is expected to have dimensions specified
* by `in_dims`, and the output tensor will have dimensions reordered as specified by the
* `permute` array.
*
* This function rearranges the dimensions of a 4D tensor according to the
* permutation array provided. The input tensor is expected to have dimensions
* [in_dim1][in_dim2][in_dim3][in_dim4], and the output tensor will have
* dimensions reordered as specified by the permute array.
* Based on Tensor::copyTranspose from aidge.aidge_core
*
* @tparam T Data type of the tensor elements (e.g., float, double).
* @param[in] inputs Pointer to the input tensor data stored in contiguous memory.
* @param[out] outputs Pointer to the pre-allocated memory for the transposed tensor.
* Ensure this memory is appropriately sized to hold the transposed data.
* @param[in] in_dim1 Size of the first dimension of the input tensor.
* @param[in] in_dim2 Size of the second dimension of the input tensor.
* @param[in] in_dim3 Size of the third dimension of the input tensor.
* @param[in] in_dim4 Size of the fourth dimension of the input tensor.
* @param[in] permute Array of four unsigned integers specifying the desired permutation
* of dimensions. Each value should be in the range [0, 3], defining
* the new order of dimensions for the output tensor.
* @tparam T Data type of the tensor elements.
* @param[in] inputs Pointer to the input tensor data stored in contiguous memory.
* @param[in] in_dims Array containing the size of each dimension of the input tensor.
* @param[in] nb_dims Number of dimensions of the input tensor.
* @param[in] permute Array of unsigned integers specifying the desired permutation
* of dimensions. Each value should be in the range [0, nb_dims-1],
* defining the new order of dimensions for the output tensor.
* @param[in] total_size Total number of elements in the input/output tensor.
* @param[out] outputs Pointer to the pre-allocated memory for the transposed tensor.
* Ensure this memory is appropriately sized to hold the transposed data.
*/
template <typename T>
void transpose_4D_forward(const T* inputs,
T* outputs,
unsigned int in_dim1,
unsigned int in_dim2,
unsigned int in_dim3,
unsigned int in_dim4,
const unsigned int* permute)
void transpose_ND_forward(const T *__restrict inputs,
const unsigned int *in_dims,
const unsigned int nb_dims,
const unsigned int *permute,
const unsigned int total_size,
T *__restrict outputs)
{
unsigned int in_dims[4] = {in_dim1, in_dim2, in_dim3, in_dim4};
unsigned int out_dims[4];
for (unsigned int i = 0; i < 4; ++i) {
out_dims[i] = in_dims[permute[i]];
// Compute strides for input tensor
unsigned int in_strides[nb_dims];
in_strides[nb_dims - 1] = 1;
for (int i = nb_dims - 2; i >= 0; --i)
{
in_strides[i] = in_strides[i + 1] * in_dims[i + 1];
}
unsigned int in_strides[4] = {
in_dim2 * in_dim3 * in_dim4,
in_dim3 * in_dim4,
in_dim4,
1
};
// Compute dimensions and strides for output tensor
unsigned int out_dims[nb_dims];
unsigned int out_strides[nb_dims];
out_strides[nb_dims - 1] = 1;
for (unsigned int i = 0; i < nb_dims; ++i)
{
out_dims[i] = in_dims[permute[i]];
}
for (int i = nb_dims - 2; i >= 0; --i)
{
out_strides[i] = out_strides[i + 1] * out_dims[i + 1];
}
unsigned int out_strides[4] = {
out_dims[1] * out_dims[2] * out_dims[3],
out_dims[2] * out_dims[3],
out_dims[3],
1
};
unsigned int current_idx[nb_dims];
for (unsigned int i = 0; i < in_dim1; ++i) {
for (unsigned int j = 0; j < in_dim2; ++j) {
for (unsigned int k = 0; k < in_dim3; ++k) {
for (unsigned int l = 0; l < in_dim4; ++l) {
// Compute the linear index in the input tensor
unsigned int input_index = i * in_strides[0] +
j * in_strides[1] +
k * in_strides[2] +
l * in_strides[3];
// Iterate over all elements in the input tensor
for (unsigned int idx = 0; idx < total_size; ++idx)
{
unsigned int in_idx[4] = {i, j, k, l};
unsigned int output_index = 0;
for (unsigned int m = 0; m < 4; ++m) {
output_index += in_idx[permute[m]] * out_strides[m];
}
unsigned int remaining = idx;
for (unsigned int i = 0; i < nb_dims; ++i)
{
current_idx[i] = remaining / in_strides[i];
remaining = remaining % in_strides[i];
}
outputs[output_index] = inputs[input_index];
}
}
unsigned int output_index = 0;
for (unsigned int i = 0; i < nb_dims; ++i)
{
output_index += current_idx[permute[i]] * out_strides[i];
}
outputs[output_index] = inputs[idx];
}
}
#endif // __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
\ No newline at end of file
#endif // __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
......@@ -56,8 +56,8 @@ class ProducerCPP(ExportNode):
super().__init__(node, mem_info)
self.values = np.array(self.operator.get_output(0))
# if len(self.values.shape) == 4: # Note: export in HWC
# self.values = np.transpose(self.values, (0, 2, 3, 1))
if len(self.values.shape) == 4: # Note: export in HWC
self.values = np.transpose(self.values, (0, 2, 3, 1))
def export(self, export_folder: Path):
header_path = f"include/parameters/{self.attributes['name']}.h"
......@@ -296,14 +296,14 @@ class FcCPP(ExportNodeCpp):
str(ROOT / "kernels" / "rescaling.hpp")
]
@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
class TransposeCPP(ExportNodeCpp):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
self.config_template = str(
ROOT / "templates" / "configuration" / "transpose_4D_config.jinja")
ROOT / "templates" / "configuration" / "transpose_ND_config.jinja")
self.forward_template = str(
ROOT / "templates" / "kernel_forward" / "transpose_4D_forward.jinja")
ROOT / "templates" / "kernel_forward" / "transpose_ND_forward.jinja")
self.include_list = []
self.kernels_to_copy = [
str(ROOT / "kernels" / "transpose.hpp")
......
......@@ -5,9 +5,11 @@
{# For layer configuration -#}
{% include "./_def_io.jinja" %}
{% include "./_meminfo.jinja" %}
{# 4D kernels are not supported yet that's why define NAME_DIM1 #}
#define {{ name|upper }}_DIM1 1
{# Export suppose that batchsize = 1#}
#define {{ name|upper }}_NB_DIMS {{ in_dims[0] | length }}
static const unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} };
static constexpr unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} };
static constexpr unsigned int {{ name|upper }}_DIMS[] = { {{ in_dims[0] | join(', ') }}};
#endif /* {{ name|upper }}_LAYER_H */
#endif /* {{ name|upper }}_LAYER_H */
\ No newline at end of file
transpose_4D_forward({{in_name[0]}},
{{out_name[0]}},
{{name|upper}}_DIM1,
{{ in_name[0]|upper }}_NB_CHANNELS,
{{ in_name[0]|upper }}_IN_HEIGHT,
{{ in_name[0]|upper }}_IN_WIDTH,
{{name|upper}}_PERMUTE);
\ No newline at end of file
transpose_ND_forward<{{in_cdtype[0]}}>({{in_name[0]}},{{name|upper}}_DIMS,{{name|upper}}_NB_DIMS,{{name|upper}}_PERMUTE,{{ out_name[0]|upper }}_SIZE,{{out_name[0]}});
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment