Improved transpose

61f42977 · Olivier BICHLER · a7953380 · 61f42977 · 61f42977 · 61f42977
Commit 61f42977 authored 1 month ago by Olivier BICHLER
--- a/aidge_export_cpp/kernels/transpose.hpp
+++ b/aidge_export_cpp/kernels/transpose.hpp
@@ -25,59 +25,28 @@
 * @tparam T        Data type of the tensor elements.
 * @tparam NB_DIMS  Number of dimensions of the input tensor.
 * @param[in]  inputs      Pointer to the input tensor data stored in contiguous memory.
- * @param[in]  in_dims     Array containing the size of each dimension of the input tensor.
- * @param[in]  permute     Array of unsigned integers specifying the desired permutation
- *                         of dimensions. Each value should be in the range [0, NB_DIMS-1],
- *                         defining the new order of dimensions for the output tensor.
- * @param[in]  total_size  Total number of elements in the input/output tensor.
 * @param[out] outputs     Pointer to the pre-allocated memory for the transposed tensor.
 *                         Ensure this memory is appropriately sized to hold the transposed data.
 */
-template <typename T,unsigned int NB_DIMS>
+template <typename T, unsigned int NB_DIMS, unsigned int NB_ELTS,
+          const int PERMUTE[], const int IN_STRIDE[], const int OUT_STRIDE[]>
+__attribute__((always_inline)) inline
 void transpose_ND_forward(const T *__restrict inputs,
-                          const unsigned int *in_dims,
-                          const unsigned int *permute,
-                          const unsigned int total_size,
                          T *__restrict outputs)
 {
-    // Compute strides for input tensor
-    unsigned int in_strides[NB_DIMS];
-    in_strides[NB_DIMS - 1] = 1;
-    for (int i = NB_DIMS - 2; i >= 0; --i)
-    {
-        in_strides[i] = in_strides[i + 1] * in_dims[i + 1];
-    }
-
-    // Compute dimensions and strides for output tensor
-    unsigned int out_dims[NB_DIMS];
-    unsigned int out_strides[NB_DIMS];
-    out_strides[NB_DIMS - 1] = 1;
-    for (unsigned int i = 0; i < NB_DIMS; ++i)
-    {
-        out_dims[i] = in_dims[permute[i]];
-    }
-    for (int i = NB_DIMS - 2; i >= 0; --i)
-    {
-        out_strides[i] = out_strides[i + 1] * out_dims[i + 1];
-    }
-
    unsigned int current_idx[NB_DIMS];

    // Iterate over all elements in the input tensor
-    for (unsigned int idx = 0; idx < total_size; ++idx)
-    {
-
-        unsigned int remaining = idx;
-        for (unsigned int i = 0; i < NB_DIMS; ++i)
-        {
-            current_idx[i] = remaining / in_strides[i];
-            remaining = remaining % in_strides[i];
+    for (unsigned int idx = 0; idx < NB_ELTS; ++idx) {
+        unsigned int input_index = idx;
+        for (unsigned int i = 0; i < NB_DIMS; ++i) {
+            current_idx[i] = input_index / IN_STRIDE[i];
+            input_index %= IN_STRIDE[i];
        }

        unsigned int output_index = 0;
-        for (unsigned int i = 0; i < NB_DIMS; ++i)
-        {
-            output_index += current_idx[permute[i]] * out_strides[i];
+        for (unsigned int i = 0; i < NB_DIMS; ++i) {
+            output_index += current_idx[PERMUTE[i]] * OUT_STRIDE[i];
        }

        outputs[output_index] = inputs[idx];

--- a/aidge_export_cpp/operators/Transpose.py
+++ b/aidge_export_cpp/operators/Transpose.py
@@ -7,6 +7,27 @@ from aidge_export_cpp import ExportLibCpp
 class TransposeCPP(ExportNodeCpp):
    def __init__(self, node, mem_info):
        super().__init__(node, mem_info)
+
+        nbdims = len(self.attributes["in_dims"][0])
+
+        # Compute input strides
+        in_strides = [0] * nbdims
+        in_strides[nbdims - 1] = 1
+        for i in range(nbdims - 2, -1, -1):
+            in_strides[i] = in_strides[i + 1] * self.attributes["in_dims"][0][i + 1]
+
+        # Compute output dimensions based on permutation
+        out_dims = [self.attributes["in_dims"][0][self.attributes["output_dims_order"][i]] for i in range(nbdims)]
+
+        # Compute output strides
+        out_strides = [0] * nbdims
+        out_strides[nbdims - 1] = 1
+        for i in range(nbdims - 2, -1, -1):
+            out_strides[i] = out_strides[i + 1] * out_dims[i + 1]
+
+        self.attributes["in_strides"] = in_strides
+        self.attributes["out_strides"] = out_strides
+
        self.config_template = str(
            ROOT / "templates" / "configuration" / "transpose_ND_config.jinja")
        self.forward_template = str(

--- a/aidge_export_cpp/templates/configuration/transpose_ND_config.jinja
+++ b/aidge_export_cpp/templates/configuration/transpose_ND_config.jinja
@@ -8,8 +8,10 @@
 {# Export suppose that batchsize = 1#}
 #define {{ name|upper }}_NB_ELTS {{ in_dims[0]|join('*') }}
 #define {{ name|upper }}_NB_DIMS {{ in_dims[0] | length }}
+constexpr int {{ name|upper }}_IN_STRIDES[] = { {{ in_strides | join(', ') }} };
+constexpr int {{ name|upper }}_OUT_STRIDES[] = { {{ out_strides | join(', ') }} };

-static constexpr unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} };
-static constexpr unsigned int {{ name|upper }}_DIMS[] = { {{ in_dims[0] | join(', ') }}};
+constexpr int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} };
+constexpr int {{ name|upper }}_DIMS[] = { {{ in_dims[0] | join(', ') }} };

 #endif /* {{ name|upper }}_LAYER_H */
\ No newline at end of file
--- a/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja
+++ b/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja
 {% filter indent(width=4, first=False) %}
 {% include "./_mem_offset.jinja" %}
 transpose_ND_forward<{{in_cdtype[0]}},
-                     {{name|upper}}_NB_DIMS>
-                    ({{in_name[0]}},
-                     {{name|upper}}_DIMS,
-                     {{name|upper}}_PERMUTE,
+                     {{name|upper}}_NB_DIMS,
                     {{name|upper}}_NB_ELTS,
+                     {{name|upper}}_PERMUTE,
+                     {{name|upper}}_IN_STRIDES,
+                     {{name|upper}}_OUT_STRIDES>
+                    ({{in_name[0]}},
                     {{out_name[0]}});
 {% include "./_save_outputs.jinja" %}
 {% include "./_aidge_cmp.jinja" %}