Add transpose function for 4D tensors and related templates

b071317e · Wissam Boussella · 8cb3a2c1 · b071317e · b071317e · b071317e
Commit b071317e authored 4 months ago by Wissam Boussella
--- a/aidge_export_cpp/kernels/transpose.hpp
+++ b/aidge_export_cpp/kernels/transpose.hpp
+#ifndef __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
+#define __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
+
+#include "network/typedefs.hpp"
+
+/**
+ * @brief Transposes a 4-dimensional tensor based on the specified permutation.
+ *
+ * This function rearranges the dimensions of a 4D tensor according to the
+ * permutation array provided. The input tensor is expected to have dimensions
+ * [in_dim1][in_dim2][in_dim3][in_dim4], and the output tensor will have
+ * dimensions reordered as specified by the permute array.
+ *
+ * @tparam T        Data type of the tensor elements (e.g., float, double).
+ * @param[in]  inputs   Pointer to the input tensor data stored in contiguous memory.
+ * @param[out] outputs  Pointer to the pre-allocated memory for the transposed tensor.
+ *                      Ensure this memory is appropriately sized to hold the transposed data.
+ * @param[in]  in_dim1  Size of the first dimension of the input tensor.
+ * @param[in]  in_dim2  Size of the second dimension of the input tensor.
+ * @param[in]  in_dim3  Size of the third dimension of the input tensor.
+ * @param[in]  in_dim4  Size of the fourth dimension of the input tensor.
+ * @param[in]  permute  Array of four unsigned integers specifying the desired permutation
+ *                      of dimensions. Each value should be in the range [0, 3], defining
+ *                      the new order of dimensions for the output tensor.
+ */
+template <typename T>
+void transpose_4D_forward(const T* inputs,
+                          T* outputs,
+                          unsigned int in_dim1,
+                          unsigned int in_dim2,
+                          unsigned int in_dim3,
+                          unsigned int in_dim4,
+                          const unsigned int* permute)
+{
+    unsigned int in_dims[4] = {in_dim1, in_dim2, in_dim3, in_dim4};
+    unsigned int out_dims[4];
+    for (unsigned int i = 0; i < 4; ++i) {
+        out_dims[i] = in_dims[permute[i]];
+    }
+
+    unsigned int in_strides[4] = {
+        in_dim2 * in_dim3 * in_dim4,
+        in_dim3 * in_dim4,
+        in_dim4,
+        1
+    };
+
+    unsigned int out_strides[4] = {
+        out_dims[1] * out_dims[2] * out_dims[3],
+        out_dims[2] * out_dims[3],
+        out_dims[3],
+        1
+    };
+
+    for (unsigned int i = 0; i < in_dim1; ++i) {
+        for (unsigned int j = 0; j < in_dim2; ++j) {
+            for (unsigned int k = 0; k < in_dim3; ++k) {
+                for (unsigned int l = 0; l < in_dim4; ++l) {
+                    // Compute the linear index in the input tensor
+                    unsigned int input_index = i * in_strides[0] +
+                                               j * in_strides[1] +
+                                               k * in_strides[2] +
+                                               l * in_strides[3];
+
+                    unsigned int in_idx[4] = {i, j, k, l};
+                    unsigned int output_index = 0;
+                    for (unsigned int m = 0; m < 4; ++m) {
+                        output_index += in_idx[permute[m]] * out_strides[m];
+                    }
+
+                    outputs[output_index] = inputs[input_index];
+                }
+            }
+        }
+    }
+}
+
+#endif // __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
\ No newline at end of file
--- a/aidge_export_cpp/operators.py
+++ b/aidge_export_cpp/operators.py
@@ -56,8 +56,8 @@ class ProducerCPP(ExportNode):
        super().__init__(node, mem_info)
        self.values = np.array(self.operator.get_output(0))

-        if len(self.values.shape) == 4:  # Note: export in HWC
-            self.values = np.transpose(self.values, (0, 2, 3, 1))
+        # if len(self.values.shape) == 4:  # Note: export in HWC
+        #     self.values =  np.transpose(self.values, (0, 2, 3, 1))

    def export(self, export_folder: Path):
        header_path = f"include/parameters/{self.attributes['name']}.h"
@@ -295,3 +295,16 @@ class FcCPP(ExportNodeCpp):
            str(ROOT / "kernels" / "activation.hpp"),
            str(ROOT / "kernels" / "rescaling.hpp")
        ]
+
+@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
+class TransposeCPP(ExportNodeCpp):
+    def __init__(self, node, mem_info):
+        super().__init__(node, mem_info)
+        self.config_template = str(
+            ROOT / "templates" / "configuration" / "transpose_4D_config.jinja")
+        self.forward_template = str(
+            ROOT / "templates" / "kernel_forward" / "transpose_4D_forward.jinja")
+        self.include_list = []
+        self.kernels_to_copy = [
+            str(ROOT / "kernels" / "transpose.hpp")
+        ]
\ No newline at end of file
--- a/aidge_export_cpp/templates/configuration/transpose_4D_config.jinja
+++ b/aidge_export_cpp/templates/configuration/transpose_4D_config.jinja
+{#- For name header -#}
+#ifndef {{ name|upper }}_LAYER_H
+#define {{ name|upper }}_LAYER_H
+
+{# For layer configuration -#}
+{% include "./_def_io.jinja" %}
+{% include "./_meminfo.jinja" %}
+{# 4D kernels are not supported yet that's why define NAME_DIM1 #}
+#define {{ name|upper }}_DIM1 1
+
+static const unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} };
+
+#endif /* {{ name|upper }}_LAYER_H */
--- a/aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja
+++ b/aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja
+transpose_4D_forward({{in_name[0]}},
+                    {{out_name[0]}},
+                    {{name|upper}}_DIM1,
+                    {{ in_name[0]|upper }}_NB_CHANNELS,
+                    {{ in_name[0]|upper }}_IN_HEIGHT,
+                    {{ in_name[0]|upper }}_IN_WIDTH,
+                    {{name|upper}}_PERMUTE);
\ No newline at end of file