From b071317e07d5ab0a712b4d3b9898e7e3ea03d5bb Mon Sep 17 00:00:00 2001
From: Wissam Boussella <wissam.boussella@cea.fr>
Date: Thu, 27 Feb 2025 13:58:28 +0100
Subject: [PATCH 1/3] Add transpose function for 4D tensors and related
 templates

---
 aidge_export_cpp/kernels/transpose.hpp        | 78 +++++++++++++++++++
 aidge_export_cpp/operators.py                 | 17 +++-
 .../configuration/transpose_4D_config.jinja   | 13 ++++
 .../kernel_forward/transpose_4D_forward.jinja |  7 ++
 4 files changed, 113 insertions(+), 2 deletions(-)
 create mode 100644 aidge_export_cpp/kernels/transpose.hpp
 create mode 100644 aidge_export_cpp/templates/configuration/transpose_4D_config.jinja
 create mode 100644 aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja

diff --git a/aidge_export_cpp/kernels/transpose.hpp b/aidge_export_cpp/kernels/transpose.hpp
new file mode 100644
index 0000000..4273d20
--- /dev/null
+++ b/aidge_export_cpp/kernels/transpose.hpp
@@ -0,0 +1,78 @@
+#ifndef __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
+#define __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
+
+#include "network/typedefs.hpp"
+
+/**
+ * @brief Transposes a 4-dimensional tensor based on the specified permutation.
+ *
+ * This function rearranges the dimensions of a 4D tensor according to the
+ * permutation array provided. The input tensor is expected to have dimensions
+ * [in_dim1][in_dim2][in_dim3][in_dim4], and the output tensor will have
+ * dimensions reordered as specified by the permute array.
+ *
+ * @tparam T        Data type of the tensor elements (e.g., float, double).
+ * @param[in]  inputs   Pointer to the input tensor data stored in contiguous memory.
+ * @param[out] outputs  Pointer to the pre-allocated memory for the transposed tensor.
+ *                      Ensure this memory is appropriately sized to hold the transposed data.
+ * @param[in]  in_dim1  Size of the first dimension of the input tensor.
+ * @param[in]  in_dim2  Size of the second dimension of the input tensor.
+ * @param[in]  in_dim3  Size of the third dimension of the input tensor.
+ * @param[in]  in_dim4  Size of the fourth dimension of the input tensor.
+ * @param[in]  permute  Array of four unsigned integers specifying the desired permutation
+ *                      of dimensions. Each value should be in the range [0, 3], defining
+ *                      the new order of dimensions for the output tensor.
+ */
+template <typename T>
+void transpose_4D_forward(const T* inputs,
+                          T* outputs,
+                          unsigned int in_dim1,
+                          unsigned int in_dim2,
+                          unsigned int in_dim3,
+                          unsigned int in_dim4,
+                          const unsigned int* permute)
+{
+    unsigned int in_dims[4] = {in_dim1, in_dim2, in_dim3, in_dim4};
+    unsigned int out_dims[4];
+    for (unsigned int i = 0; i < 4; ++i) {
+        out_dims[i] = in_dims[permute[i]];
+    }
+
+    unsigned int in_strides[4] = {
+        in_dim2 * in_dim3 * in_dim4,
+        in_dim3 * in_dim4,
+        in_dim4,
+        1
+    };
+
+    unsigned int out_strides[4] = {
+        out_dims[1] * out_dims[2] * out_dims[3],
+        out_dims[2] * out_dims[3],
+        out_dims[3],
+        1
+    };
+
+    for (unsigned int i = 0; i < in_dim1; ++i) {
+        for (unsigned int j = 0; j < in_dim2; ++j) {
+            for (unsigned int k = 0; k < in_dim3; ++k) {
+                for (unsigned int l = 0; l < in_dim4; ++l) {
+                    // Compute the linear index in the input tensor
+                    unsigned int input_index = i * in_strides[0] +
+                                               j * in_strides[1] +
+                                               k * in_strides[2] +
+                                               l * in_strides[3];
+
+                    unsigned int in_idx[4] = {i, j, k, l};
+                    unsigned int output_index = 0;
+                    for (unsigned int m = 0; m < 4; ++m) {
+                        output_index += in_idx[permute[m]] * out_strides[m];
+                    }
+
+                    outputs[output_index] = inputs[input_index];
+                }
+            }
+        }
+    }
+}
+
+#endif // __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
\ No newline at end of file
diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py
index f04dbb3..fcc57ed 100644
--- a/aidge_export_cpp/operators.py
+++ b/aidge_export_cpp/operators.py
@@ -56,8 +56,8 @@ class ProducerCPP(ExportNode):
         super().__init__(node, mem_info)
         self.values = np.array(self.operator.get_output(0))
 
-        if len(self.values.shape) == 4:  # Note: export in HWC
-            self.values = np.transpose(self.values, (0, 2, 3, 1))
+        # if len(self.values.shape) == 4:  # Note: export in HWC
+        #     self.values =  np.transpose(self.values, (0, 2, 3, 1))
 
     def export(self, export_folder: Path):
         header_path = f"include/parameters/{self.attributes['name']}.h"
@@ -295,3 +295,16 @@ class FcCPP(ExportNodeCpp):
             str(ROOT / "kernels" / "activation.hpp"),
             str(ROOT / "kernels" / "rescaling.hpp")
         ]
+
+@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
+class TransposeCPP(ExportNodeCpp):
+    def __init__(self, node, mem_info):
+        super().__init__(node, mem_info)
+        self.config_template = str(
+            ROOT / "templates" / "configuration" / "transpose_4D_config.jinja")
+        self.forward_template = str(
+            ROOT / "templates" / "kernel_forward" / "transpose_4D_forward.jinja")
+        self.include_list = []
+        self.kernels_to_copy = [
+            str(ROOT / "kernels" / "transpose.hpp")
+        ]
\ No newline at end of file
diff --git a/aidge_export_cpp/templates/configuration/transpose_4D_config.jinja b/aidge_export_cpp/templates/configuration/transpose_4D_config.jinja
new file mode 100644
index 0000000..62c5ac7
--- /dev/null
+++ b/aidge_export_cpp/templates/configuration/transpose_4D_config.jinja
@@ -0,0 +1,13 @@
+{#- For name header -#}
+#ifndef {{ name|upper }}_LAYER_H
+#define {{ name|upper }}_LAYER_H
+
+{# For layer configuration -#}
+{% include "./_def_io.jinja" %}
+{% include "./_meminfo.jinja" %}
+{# 4D kernels are not supported yet that's why define NAME_DIM1 #}
+#define {{ name|upper }}_DIM1 1
+
+static const unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} };
+
+#endif /* {{ name|upper }}_LAYER_H */
diff --git a/aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja b/aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja
new file mode 100644
index 0000000..db04ac5
--- /dev/null
+++ b/aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja
@@ -0,0 +1,7 @@
+transpose_4D_forward({{in_name[0]}},
+                    {{out_name[0]}},
+                    {{name|upper}}_DIM1,
+                    {{ in_name[0]|upper }}_NB_CHANNELS,
+                    {{ in_name[0]|upper }}_IN_HEIGHT,
+                    {{ in_name[0]|upper }}_IN_WIDTH,
+                    {{name|upper}}_PERMUTE);
\ No newline at end of file
-- 
GitLab


From ac47d3e6968688ef2c9ccf524e6f13e717f253f7 Mon Sep 17 00:00:00 2001
From: Wissam Boussella <wissam.boussella@cea.fr>
Date: Tue, 4 Mar 2025 11:35:32 +0100
Subject: [PATCH 2/3] keep transpose in nhwc in operator.py, but need to be
 changed New method for transpose.hpp, now take in case multi-dimensionnal
 Change config and forward to be compatble with the new transpose

---
 aidge_export_cpp/kernels/transpose.hpp        | 128 ++++++++++--------
 aidge_export_cpp/operators.py                 |  10 +-
 .../configuration/transpose_4D_config.jinja   |  13 --
 .../configuration/transpose_ND_config.jinja   |  15 ++
 .../kernel_forward/transpose_4D_forward.jinja |   7 -
 .../kernel_forward/transpose_ND_forward.jinja |   1 +
 6 files changed, 90 insertions(+), 84 deletions(-)
 delete mode 100644 aidge_export_cpp/templates/configuration/transpose_4D_config.jinja
 create mode 100644 aidge_export_cpp/templates/configuration/transpose_ND_config.jinja
 delete mode 100644 aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja
 create mode 100644 aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja

diff --git a/aidge_export_cpp/kernels/transpose.hpp b/aidge_export_cpp/kernels/transpose.hpp
index 4273d20..082d738 100644
--- a/aidge_export_cpp/kernels/transpose.hpp
+++ b/aidge_export_cpp/kernels/transpose.hpp
@@ -1,78 +1,88 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
 #ifndef __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
 #define __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
 
-#include "network/typedefs.hpp"
-
 /**
- * @brief Transposes a 4-dimensional tensor based on the specified permutation.
+ * @brief Transposes an N-dimensional tensor based on the specified permutation.
+ *
+ * This function rearranges the dimensions of an N-dimensional tensor according to the
+ * permutation array provided. The input tensor is expected to have dimensions specified
+ * by `in_dims`, and the output tensor will have dimensions reordered as specified by the
+ * `permute` array.
  *
- * This function rearranges the dimensions of a 4D tensor according to the
- * permutation array provided. The input tensor is expected to have dimensions
- * [in_dim1][in_dim2][in_dim3][in_dim4], and the output tensor will have
- * dimensions reordered as specified by the permute array.
+ * Based on Tensor::copyTranspose from aidge.aidge_core
  *
- * @tparam T        Data type of the tensor elements (e.g., float, double).
- * @param[in]  inputs   Pointer to the input tensor data stored in contiguous memory.
- * @param[out] outputs  Pointer to the pre-allocated memory for the transposed tensor.
- *                      Ensure this memory is appropriately sized to hold the transposed data.
- * @param[in]  in_dim1  Size of the first dimension of the input tensor.
- * @param[in]  in_dim2  Size of the second dimension of the input tensor.
- * @param[in]  in_dim3  Size of the third dimension of the input tensor.
- * @param[in]  in_dim4  Size of the fourth dimension of the input tensor.
- * @param[in]  permute  Array of four unsigned integers specifying the desired permutation
- *                      of dimensions. Each value should be in the range [0, 3], defining
- *                      the new order of dimensions for the output tensor.
+ * @tparam T        Data type of the tensor elements.
+ * @param[in]  inputs      Pointer to the input tensor data stored in contiguous memory.
+ * @param[in]  in_dims     Array containing the size of each dimension of the input tensor.
+ * @param[in]  nb_dims     Number of dimensions of the input tensor.
+ * @param[in]  permute     Array of unsigned integers specifying the desired permutation
+ *                         of dimensions. Each value should be in the range [0, nb_dims-1],
+ *                         defining the new order of dimensions for the output tensor.
+ * @param[in]  total_size  Total number of elements in the input/output tensor.
+ * @param[out] outputs     Pointer to the pre-allocated memory for the transposed tensor.
+ *                         Ensure this memory is appropriately sized to hold the transposed data.
  */
 template <typename T>
-void transpose_4D_forward(const T* inputs,
-                          T* outputs,
-                          unsigned int in_dim1,
-                          unsigned int in_dim2,
-                          unsigned int in_dim3,
-                          unsigned int in_dim4,
-                          const unsigned int* permute)
+void transpose_ND_forward(const T *__restrict inputs,
+                          const unsigned int *in_dims,
+                          const unsigned int nb_dims,
+                          const unsigned int *permute,
+                          const unsigned int total_size,
+                          T *__restrict outputs)
 {
-    unsigned int in_dims[4] = {in_dim1, in_dim2, in_dim3, in_dim4};
-    unsigned int out_dims[4];
-    for (unsigned int i = 0; i < 4; ++i) {
-        out_dims[i] = in_dims[permute[i]];
+    // Compute strides for input tensor
+    unsigned int in_strides[nb_dims];
+    in_strides[nb_dims - 1] = 1;
+    for (int i = nb_dims - 2; i >= 0; --i)
+    {
+        in_strides[i] = in_strides[i + 1] * in_dims[i + 1];
     }
 
-    unsigned int in_strides[4] = {
-        in_dim2 * in_dim3 * in_dim4,
-        in_dim3 * in_dim4,
-        in_dim4,
-        1
-    };
+    // Compute dimensions and strides for output tensor
+    unsigned int out_dims[nb_dims];
+    unsigned int out_strides[nb_dims];
+    out_strides[nb_dims - 1] = 1;
+    for (unsigned int i = 0; i < nb_dims; ++i)
+    {
+        out_dims[i] = in_dims[permute[i]];
+    }
+    for (int i = nb_dims - 2; i >= 0; --i)
+    {
+        out_strides[i] = out_strides[i + 1] * out_dims[i + 1];
+    }
 
-    unsigned int out_strides[4] = {
-        out_dims[1] * out_dims[2] * out_dims[3],
-        out_dims[2] * out_dims[3],
-        out_dims[3],
-        1
-    };
+    unsigned int current_idx[nb_dims];
 
-    for (unsigned int i = 0; i < in_dim1; ++i) {
-        for (unsigned int j = 0; j < in_dim2; ++j) {
-            for (unsigned int k = 0; k < in_dim3; ++k) {
-                for (unsigned int l = 0; l < in_dim4; ++l) {
-                    // Compute the linear index in the input tensor
-                    unsigned int input_index = i * in_strides[0] +
-                                               j * in_strides[1] +
-                                               k * in_strides[2] +
-                                               l * in_strides[3];
+    // Iterate over all elements in the input tensor
+    for (unsigned int idx = 0; idx < total_size; ++idx)
+    {
 
-                    unsigned int in_idx[4] = {i, j, k, l};
-                    unsigned int output_index = 0;
-                    for (unsigned int m = 0; m < 4; ++m) {
-                        output_index += in_idx[permute[m]] * out_strides[m];
-                    }
+        unsigned int remaining = idx;
+        for (unsigned int i = 0; i < nb_dims; ++i)
+        {
+            current_idx[i] = remaining / in_strides[i];
+            remaining = remaining % in_strides[i];
+        }
 
-                    outputs[output_index] = inputs[input_index];
-                }
-            }
+        unsigned int output_index = 0;
+        for (unsigned int i = 0; i < nb_dims; ++i)
+        {
+            output_index += current_idx[permute[i]] * out_strides[i];
         }
+
+        outputs[output_index] = inputs[idx];
     }
 }
 
-#endif // __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
\ No newline at end of file
+#endif // __AIDGE_EXPORT_CPP_KERNELS_TRANSPOSE__
diff --git a/aidge_export_cpp/operators.py b/aidge_export_cpp/operators.py
index fcc57ed..54c3805 100644
--- a/aidge_export_cpp/operators.py
+++ b/aidge_export_cpp/operators.py
@@ -56,8 +56,8 @@ class ProducerCPP(ExportNode):
         super().__init__(node, mem_info)
         self.values = np.array(self.operator.get_output(0))
 
-        # if len(self.values.shape) == 4:  # Note: export in HWC
-        #     self.values =  np.transpose(self.values, (0, 2, 3, 1))
+        if len(self.values.shape) == 4:  # Note: export in HWC
+            self.values =  np.transpose(self.values, (0, 2, 3, 1))
 
     def export(self, export_folder: Path):
         header_path = f"include/parameters/{self.attributes['name']}.h"
@@ -296,14 +296,14 @@ class FcCPP(ExportNodeCpp):
             str(ROOT / "kernels" / "rescaling.hpp")
         ]
 
-@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.float32)))
+@ExportLibCpp.register("Transpose", aidge_core.ImplSpec(aidge_core.IOSpec(aidge_core.dtype.any)))
 class TransposeCPP(ExportNodeCpp):
     def __init__(self, node, mem_info):
         super().__init__(node, mem_info)
         self.config_template = str(
-            ROOT / "templates" / "configuration" / "transpose_4D_config.jinja")
+            ROOT / "templates" / "configuration" / "transpose_ND_config.jinja")
         self.forward_template = str(
-            ROOT / "templates" / "kernel_forward" / "transpose_4D_forward.jinja")
+            ROOT / "templates" / "kernel_forward" / "transpose_ND_forward.jinja")
         self.include_list = []
         self.kernels_to_copy = [
             str(ROOT / "kernels" / "transpose.hpp")
diff --git a/aidge_export_cpp/templates/configuration/transpose_4D_config.jinja b/aidge_export_cpp/templates/configuration/transpose_4D_config.jinja
deleted file mode 100644
index 62c5ac7..0000000
--- a/aidge_export_cpp/templates/configuration/transpose_4D_config.jinja
+++ /dev/null
@@ -1,13 +0,0 @@
-{#- For name header -#}
-#ifndef {{ name|upper }}_LAYER_H
-#define {{ name|upper }}_LAYER_H
-
-{# For layer configuration -#}
-{% include "./_def_io.jinja" %}
-{% include "./_meminfo.jinja" %}
-{# 4D kernels are not supported yet that's why define NAME_DIM1 #}
-#define {{ name|upper }}_DIM1 1
-
-static const unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} };
-
-#endif /* {{ name|upper }}_LAYER_H */
diff --git a/aidge_export_cpp/templates/configuration/transpose_ND_config.jinja b/aidge_export_cpp/templates/configuration/transpose_ND_config.jinja
new file mode 100644
index 0000000..e5ef4ff
--- /dev/null
+++ b/aidge_export_cpp/templates/configuration/transpose_ND_config.jinja
@@ -0,0 +1,15 @@
+{#- For name header -#}
+#ifndef {{ name|upper }}_LAYER_H
+#define {{ name|upper }}_LAYER_H
+
+{# For layer configuration -#}
+{% include "./_def_io.jinja" %}
+{% include "./_meminfo.jinja" %}
+{# Export suppose that batchsize = 1#}
+#define {{ name|upper }}_NB_DIMS {{ in_dims[0] | length }}
+
+static constexpr unsigned int {{ name|upper }}_PERMUTE[] = { {{ output_dims_order | join(', ') }} };
+static constexpr unsigned int {{ name|upper }}_DIMS[] = { {{ in_dims[0] | join(', ') }}};
+
+
+#endif /* {{ name|upper }}_LAYER_H */
\ No newline at end of file
diff --git a/aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja b/aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja
deleted file mode 100644
index db04ac5..0000000
--- a/aidge_export_cpp/templates/kernel_forward/transpose_4D_forward.jinja
+++ /dev/null
@@ -1,7 +0,0 @@
-transpose_4D_forward({{in_name[0]}},
-                    {{out_name[0]}},
-                    {{name|upper}}_DIM1,
-                    {{ in_name[0]|upper }}_NB_CHANNELS,
-                    {{ in_name[0]|upper }}_IN_HEIGHT,
-                    {{ in_name[0]|upper }}_IN_WIDTH,
-                    {{name|upper}}_PERMUTE);
\ No newline at end of file
diff --git a/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja b/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja
new file mode 100644
index 0000000..8f39fbc
--- /dev/null
+++ b/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja
@@ -0,0 +1 @@
+transpose_ND_forward<{{in_cdtype[0]}}>({{in_name[0]}},{{name|upper}}_DIMS,{{name|upper}}_NB_DIMS,{{name|upper}}_PERMUTE,{{ out_name[0]|upper }}_SIZE,{{out_name[0]}});
\ No newline at end of file
-- 
GitLab


From b7b395f31fa63e8c8139960a34ee9839ef8ea8dc Mon Sep 17 00:00:00 2001
From: Wissam Boussella <wissam.boussella@cea.fr>
Date: Tue, 4 Mar 2025 12:09:17 +0100
Subject: [PATCH 3/3] now NB_DIMS is tezmplate's parameter

---
 aidge_export_cpp/kernels/transpose.hpp        | 29 +++++++++----------
 .../kernel_forward/transpose_ND_forward.jinja |  2 +-
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/aidge_export_cpp/kernels/transpose.hpp b/aidge_export_cpp/kernels/transpose.hpp
index 082d738..31c9e27 100644
--- a/aidge_export_cpp/kernels/transpose.hpp
+++ b/aidge_export_cpp/kernels/transpose.hpp
@@ -23,60 +23,59 @@
  * Based on Tensor::copyTranspose from aidge.aidge_core
  *
  * @tparam T        Data type of the tensor elements.
+ * @tparam NB_DIMS  Number of dimensions of the input tensor.
  * @param[in]  inputs      Pointer to the input tensor data stored in contiguous memory.
  * @param[in]  in_dims     Array containing the size of each dimension of the input tensor.
- * @param[in]  nb_dims     Number of dimensions of the input tensor.
  * @param[in]  permute     Array of unsigned integers specifying the desired permutation
- *                         of dimensions. Each value should be in the range [0, nb_dims-1],
+ *                         of dimensions. Each value should be in the range [0, NB_DIMS-1],
  *                         defining the new order of dimensions for the output tensor.
  * @param[in]  total_size  Total number of elements in the input/output tensor.
  * @param[out] outputs     Pointer to the pre-allocated memory for the transposed tensor.
  *                         Ensure this memory is appropriately sized to hold the transposed data.
  */
-template <typename T>
+template <typename T,unsigned int NB_DIMS>
 void transpose_ND_forward(const T *__restrict inputs,
                           const unsigned int *in_dims,
-                          const unsigned int nb_dims,
                           const unsigned int *permute,
                           const unsigned int total_size,
                           T *__restrict outputs)
 {
     // Compute strides for input tensor
-    unsigned int in_strides[nb_dims];
-    in_strides[nb_dims - 1] = 1;
-    for (int i = nb_dims - 2; i >= 0; --i)
+    unsigned int in_strides[NB_DIMS];
+    in_strides[NB_DIMS - 1] = 1;
+    for (int i = NB_DIMS - 2; i >= 0; --i)
     {
         in_strides[i] = in_strides[i + 1] * in_dims[i + 1];
     }
 
     // Compute dimensions and strides for output tensor
-    unsigned int out_dims[nb_dims];
-    unsigned int out_strides[nb_dims];
-    out_strides[nb_dims - 1] = 1;
-    for (unsigned int i = 0; i < nb_dims; ++i)
+    unsigned int out_dims[NB_DIMS];
+    unsigned int out_strides[NB_DIMS];
+    out_strides[NB_DIMS - 1] = 1;
+    for (unsigned int i = 0; i < NB_DIMS; ++i)
     {
         out_dims[i] = in_dims[permute[i]];
     }
-    for (int i = nb_dims - 2; i >= 0; --i)
+    for (int i = NB_DIMS - 2; i >= 0; --i)
     {
         out_strides[i] = out_strides[i + 1] * out_dims[i + 1];
     }
 
-    unsigned int current_idx[nb_dims];
+    unsigned int current_idx[NB_DIMS];
 
     // Iterate over all elements in the input tensor
     for (unsigned int idx = 0; idx < total_size; ++idx)
     {
 
         unsigned int remaining = idx;
-        for (unsigned int i = 0; i < nb_dims; ++i)
+        for (unsigned int i = 0; i < NB_DIMS; ++i)
         {
             current_idx[i] = remaining / in_strides[i];
             remaining = remaining % in_strides[i];
         }
 
         unsigned int output_index = 0;
-        for (unsigned int i = 0; i < nb_dims; ++i)
+        for (unsigned int i = 0; i < NB_DIMS; ++i)
         {
             output_index += current_idx[permute[i]] * out_strides[i];
         }
diff --git a/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja b/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja
index 8f39fbc..25af5bd 100644
--- a/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja
+++ b/aidge_export_cpp/templates/kernel_forward/transpose_ND_forward.jinja
@@ -1 +1 @@
-transpose_ND_forward<{{in_cdtype[0]}}>({{in_name[0]}},{{name|upper}}_DIMS,{{name|upper}}_NB_DIMS,{{name|upper}}_PERMUTE,{{ out_name[0]|upper }}_SIZE,{{out_name[0]}});
\ No newline at end of file
+transpose_ND_forward<{{in_cdtype[0]}},{{name|upper}}_NB_DIMS>({{in_name[0]}},{{name|upper}}_DIMS,{{name|upper}}_PERMUTE,{{ out_name[0]|upper }}_SIZE,{{out_name[0]}});
\ No newline at end of file
-- 
GitLab