keep transpose in nhwc in operator.py, but need to be changed

New method for transpose.hpp, now take in case multi-dimensionnal Change config and forward to be compatble with the new transpose

keep transpose in nhwc in operator.py, but need to be changed
New method for transpose.hpp, now take in case multi-dimensionnal Change config and forward to be compatble with the new transpose
ac47d3e6 · Wissam Boussella · b071317e · ac47d3e6 · ac47d3e6 · ac47d3e6
Commit ac47d3e6 authored 4 months ago by Wissam Boussella
--- a/aidge_export_cpp/kernels/transpose.hpp
+++ b/aidge_export_cpp/kernels/transpose.hpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
 #ifndef __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
 #define __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__

-#include "network/typedefs.hpp"
-
 /**
- * @brief Transposes a 4-dimensional tensor based on the specified permutation.
+ * @brief Transposes an N-dimensional tensor based on the specified permutation.
+ *
+ * This function rearranges the dimensions of an N-dimensional tensor according to the
+ * permutation array provided. The input tensor is expected to have dimensions specified
+ * by `in_dims`, and the output tensor will have dimensions reordered as specified by the
+ * `permute` array.
 *
- * This function rearranges the dimensions of a 4D tensor according to the
- * permutation array provided. The input tensor is expected to have dimensions
- * [in_dim1][in_dim2][in_dim3][in_dim4], and the output tensor will have
- * dimensions reordered as specified by the permute array.
+ * Based on Tensor::copyTranspose from aidge.aidge_core
 *
- * @tparam T        Data type of the tensor elements (e.g., float, double).
- * @param[in]  inputs   Pointer to the input tensor data stored in contiguous memory.
- * @param[out] outputs  Pointer to the pre-allocated memory for the transposed tensor.
- *                      Ensure this memory is appropriately sized to hold the transposed data.
- * @param[in]  in_dim1  Size of the first dimension of the input tensor.
- * @param[in]  in_dim2  Size of the second dimension of the input tensor.
- * @param[in]  in_dim3  Size of the third dimension of the input tensor.
- * @param[in]  in_dim4  Size of the fourth dimension of the input tensor.
- * @param[in]  permute  Array of four unsigned integers specifying the desired permutation
- *                      of dimensions. Each value should be in the range [0, 3], defining
- *                      the new order of dimensions for the output tensor.
+ * @tparam T        Data type of the tensor elements.
+ * @param[in]  inputs      Pointer to the input tensor data stored in contiguous memory.
+ * @param[in]  in_dims     Array containing the size of each dimension of the input tensor.
+ * @param[in]  nb_dims     Number of dimensions of the input tensor.
+ * @param[in]  permute     Array of unsigned integers specifying the desired permutation
+ *                         of dimensions. Each value should be in the range [0, nb_dims-1],
+ *                         defining the new order of dimensions for the output tensor.
+ * @param[in]  total_size  Total number of elements in the input/output tensor.
+ * @param[out] outputs     Pointer to the pre-allocated memory for the transposed tensor.
+ *                         Ensure this memory is appropriately sized to hold the transposed data.
 */
 template <typename T>
-void transpose_4D_forward(const T* inputs,
-                          T* outputs,
-                          unsigned int in_dim1,
-                          unsigned int in_dim2,
-                          unsigned int in_dim3,
-                          unsigned int in_dim4,
-                          const unsigned int* permute)
+void transpose_ND_forward(const T *__restrict inputs,
+                          const unsigned int *in_dims,
+                          const unsigned int nb_dims,
+                          const unsigned int *permute,
+                          const unsigned int total_size,
+                          T *__restrict outputs)
 {
-    unsigned int in_dims[4] = {in_dim1, in_dim2, in_dim3, in_dim4};
-    unsigned int out_dims[4];
-    for (unsigned int i = 0; i < 4; ++i) {
-        out_dims[i] = in_dims[permute[i]];
+    // Compute strides for input tensor
+    unsigned int in_strides[nb_dims];
+    in_strides[nb_dims - 1] = 1;
+    for (int i = nb_dims - 2; i >= 0; --i)
+    {
+        in_strides[i] = in_strides[i + 1] * in_dims[i + 1];
    }

-    unsigned int in_strides[4] = {
-        in_dim2 * in_dim3 * in_dim4,
-        in_dim3 * in_dim4,
-        in_dim4,
-        1
-    };
+    // Compute dimensions and strides for output tensor
+    unsigned int out_dims[nb_dims];
+    unsigned int out_strides[nb_dims];
+    out_strides[nb_dims - 1] = 1;
+    for (unsigned int i = 0; i < nb_dims; ++i)
+    {
+        out_dims[i] = in_dims[permute[i]];
+    }
+    for (int i = nb_dims - 2; i >= 0; --i)
+    {
+        out_strides[i] = out_strides[i + 1] * out_dims[i + 1];
+    }

-    unsigned int out_strides[4] = {
-        out_dims[1] * out_dims[2] * out_dims[3],
-        out_dims[2] * out_dims[3],
-        out_dims[3],
-        1
-    };
+    unsigned int current_idx[nb_dims];

-    for (unsigned int i = 0; i < in_dim1; ++i) {
-        for (unsigned int j = 0; j < in_dim2; ++j) {
-            for (unsigned int k = 0; k < in_dim3; ++k) {
-                for (unsigned int l = 0; l < in_dim4; ++l) {
-                    // Compute the linear index in the input tensor
-                    unsigned int input_index = i * in_strides[0] +
-                                               j * in_strides[1] +
-                                               k * in_strides[2] +
-                                               l * in_strides[3];
+    // Iterate over all elements in the input tensor
+    for (unsigned int idx = 0; idx < total_size; ++idx)
+    {

-                    unsigned int in_idx[4] = {i, j, k, l};
-                    unsigned int output_index = 0;
-                    for (unsigned int m = 0; m < 4; ++m) {
-                        output_index += in_idx[permute[m]] * out_strides[m];
-                    }
+        unsigned int remaining = idx;
+        for (unsigned int i = 0; i < nb_dims; ++i)
+        {
+            current_idx[i] = remaining / in_strides[i];
+            remaining = remaining % in_strides[i];
+        }

-                    outputs[output_index] = inputs[input_index];
-                }
-            }
+        unsigned int output_index = 0;
+        for (unsigned int i = 0; i < nb_dims; ++i)
+        {
+            output_index += current_idx[permute[i]] * out_strides[i];
        }
+
+        outputs[output_index] = inputs[idx];
    }
 }

-#endif // __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
\ No newline at end of file
+#endif // __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
--- a/aidge_export_cpp/operators.py
+++ b/aidge_export_cpp/operators.py
@@ -56,8 +56,8 @@ class ProducerCPP(ExportNode):
        super().__init__(node, mem_info)
        self.values = np.array(self.operator.get_output(0))

-        # if len(self.values.shape) == 4:  # Note: export in HWC
-        #     self.values =  np.transpose(self.values, (0, 2, 3, 1))
+        if len(self.values.shape) == 4:  # Note: export in HWC
+            self.values =  np.transpose(self.values, (0, 2, 3, 1))

    def export(self, export_folder: Path):
        header_path = f"include/parameters/{self.attributes['name']}.h"
@@ -296,14 +296,14 @@ class FcCPP(ExportNodeCpp):
            str(ROOT / "kernels" / "rescaling.hpp")
        ]

-@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
+@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
 class TransposeCPP(ExportNodeCpp):
    def __init__(self, node, mem_info):
        super().__init__(node, mem_info)
        self.config_template = str(
-            ROOT / "templates" / "configuration" / "transpose_4D_config.jinja")
+            ROOT / "templates" / "configuration" / "transpose_ND_config.jinja")
        self.forward_template = str(
-            ROOT / "templates" / "kernel_forward" / "transpose_4D_forward.jinja")
+            ROOT / "templates" / "kernel_forward" / "transpose_ND_forward.jinja")
        self.include_list = []
        self.kernels_to_copy = [
            str(ROOT / "kernels" / "transpose.hpp")

--- a/aidge_export_cpp/templates/configuration/transpose_4D_config.jinja
+++ b/aidge_export_cpp/templates/configuration/transpose_4D_config.jinja
@@ -5,9 +5,11 @@
 {# For layer configuration -#}
 {% include "./_def_io.jinja" %}
 {% include "./_meminfo.jinja" %}
-{# 4D kernels are not supported yet that's why define NAME_DIM1 #}
-#define {{ name|upper }}_DIM1 1
+{# Export suppose that batchsize = 1#}
+#define {{ name|upper }}_NB_DIMS {{ in_dims[0] | length }}

-static const unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} };
+static constexpr unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} };
+static constexpr unsigned int {{ name|upper }}_DIMS[] = { {{ in_dims[0] | join(', ') }}};

-#endif /* {{ name|upper }}_LAYER_H */
+
+#endif /* {{ name|upper }}_LAYER_H */
\ No newline at end of file
--- a/aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja
+++ b/aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja
-transpose_4D_forward({{in_name[0]}},
-                    {{out_name[0]}},
-                    {{name|upper}}_DIM1,
-                    {{ in_name[0]|upper }}_NB_CHANNELS,
-                    {{ in_name[0]|upper }}_IN_HEIGHT,
-                    {{ in_name[0]|upper }}_IN_WIDTH,
-                    {{name|upper}}_PERMUTE);
\ No newline at end of file
--- a/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja
+++ b/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja
+transpose_ND_forward<{{in_cdtype[0]}}>({{in_name[0]}},{{name|upper}}_DIMS,{{name|upper}}_NB_DIMS,{{name|upper}}_PERMUTE,{{ out_name[0]|upper }}_SIZE,{{out_name[0]}});
\ No newline at end of file