Skip to content
Snippets Groups Projects
Commit 61f42977 authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Improved transpose

parent a7953380
No related branches found
No related tags found
2 merge requests!710.4.0,!56Improved transpose
Pipeline #76429 passed with warnings
......@@ -25,59 +25,28 @@
* @tparam T Data type of the tensor elements.
* @tparam NB_DIMS Number of dimensions of the input tensor.
* @param[in] inputs Pointer to the input tensor data stored in contiguous memory.
* @param[in] in_dims Array containing the size of each dimension of the input tensor.
* @param[in] permute Array of unsigned integers specifying the desired permutation
* of dimensions. Each value should be in the range [0, NB_DIMS-1],
* defining the new order of dimensions for the output tensor.
* @param[in] total_size Total number of elements in the input/output tensor.
* @param[out] outputs Pointer to the pre-allocated memory for the transposed tensor.
* Ensure this memory is appropriately sized to hold the transposed data.
*/
template <typename T,unsigned int NB_DIMS>
template <typename T, unsigned int NB_DIMS, unsigned int NB_ELTS,
const int PERMUTE[], const int IN_STRIDE[], const int OUT_STRIDE[]>
__attribute__((always_inline)) inline
void transpose_ND_forward(const T *__restrict inputs,
const unsigned int *in_dims,
const unsigned int *permute,
const unsigned int total_size,
T *__restrict outputs)
{
// Compute strides for input tensor
unsigned int in_strides[NB_DIMS];
in_strides[NB_DIMS - 1] = 1;
for (int i = NB_DIMS - 2; i >= 0; --i)
{
in_strides[i] = in_strides[i + 1] * in_dims[i + 1];
}
// Compute dimensions and strides for output tensor
unsigned int out_dims[NB_DIMS];
unsigned int out_strides[NB_DIMS];
out_strides[NB_DIMS - 1] = 1;
for (unsigned int i = 0; i < NB_DIMS; ++i)
{
out_dims[i] = in_dims[permute[i]];
}
for (int i = NB_DIMS - 2; i >= 0; --i)
{
out_strides[i] = out_strides[i + 1] * out_dims[i + 1];
}
unsigned int current_idx[NB_DIMS];
// Iterate over all elements in the input tensor
for (unsigned int idx = 0; idx < total_size; ++idx)
{
unsigned int remaining = idx;
for (unsigned int i = 0; i < NB_DIMS; ++i)
{
current_idx[i] = remaining / in_strides[i];
remaining = remaining % in_strides[i];
for (unsigned int idx = 0; idx < NB_ELTS; ++idx) {
unsigned int input_index = idx;
for (unsigned int i = 0; i < NB_DIMS; ++i) {
current_idx[i] = input_index / IN_STRIDE[i];
input_index %= IN_STRIDE[i];
}
unsigned int output_index = 0;
for (unsigned int i = 0; i < NB_DIMS; ++i)
{
output_index += current_idx[permute[i]] * out_strides[i];
for (unsigned int i = 0; i < NB_DIMS; ++i) {
output_index += current_idx[PERMUTE[i]] * OUT_STRIDE[i];
}
outputs[output_index] = inputs[idx];
......
......@@ -7,6 +7,27 @@ from aidge_export_cpp import ExportLibCpp
class TransposeCPP(ExportNodeCpp):
def __init__(self, node, mem_info):
super().__init__(node, mem_info)
nbdims = len(self.attributes["in_dims"][0])
# Compute input strides
in_strides = [0] * nbdims
in_strides[nbdims - 1] = 1
for i in range(nbdims - 2, -1, -1):
in_strides[i] = in_strides[i + 1] * self.attributes["in_dims"][0][i + 1]
# Compute output dimensions based on permutation
out_dims = [self.attributes["in_dims"][0][self.attributes["output_dims_order"][i]] for i in range(nbdims)]
# Compute output strides
out_strides = [0] * nbdims
out_strides[nbdims - 1] = 1
for i in range(nbdims - 2, -1, -1):
out_strides[i] = out_strides[i + 1] * out_dims[i + 1]
self.attributes["in_strides"] = in_strides
self.attributes["out_strides"] = out_strides
self.config_template = str(
ROOT / "templates" / "configuration" / "transpose_ND_config.jinja")
self.forward_template = str(
......
......@@ -8,8 +8,10 @@
{# Export suppose that batchsize = 1#}
#define {{ name|upper }}_NB_ELTS {{ in_dims[0]|join('*') }}
#define {{ name|upper }}_NB_DIMS {{ in_dims[0] | length }}
constexpr int {{ name|upper }}_IN_STRIDES[] = { {{ in_strides | join(', ') }} };
constexpr int {{ name|upper }}_OUT_STRIDES[] = { {{ out_strides | join(', ') }} };
static constexpr unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} };
static constexpr unsigned int {{ name|upper }}_DIMS[] = { {{ in_dims[0] | join(', ') }}};
constexpr int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} };
constexpr int {{ name|upper }}_DIMS[] = { {{ in_dims[0] | join(', ') }} };
#endif /* {{ name|upper }}_LAYER_H */
\ No newline at end of file
{% filter indent(width=4, first=False) %}
{% include "./_mem_offset.jinja" %}
transpose_ND_forward<{{in_cdtype[0]}},
{{name|upper}}_NB_DIMS>
({{in_name[0]}},
{{name|upper}}_DIMS,
{{name|upper}}_PERMUTE,
{{name|upper}}_NB_DIMS,
{{name|upper}}_NB_ELTS,
{{name|upper}}_PERMUTE,
{{name|upper}}_IN_STRIDES,
{{name|upper}}_OUT_STRIDES>
({{in_name[0]}},
{{out_name[0]}});
{% include "./_save_outputs.jinja" %}
{% include "./_aidge_cmp.jinja" %}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment