diff --git a/src/operator/Stack.cpp b/src/operator/Stack.cpp
index d64e27dac30bbf80b66e6cdfbfbe3b7a2dba5d73..8833cac3c8d142042a792edcd9e5280c41bc38ad 100644
--- a/src/operator/Stack.cpp
+++ b/src/operator/Stack.cpp
@@ -29,9 +29,7 @@ Aidge::Elts_t Aidge::StackProdConso::getRequiredMemory(
 
     const StackOp &op = dynamic_cast<const StackOp &>(mOp);
     // The produced data after one forward pass is simply the input size,
-    // we do not produced the whole output tensor everytime.
-    // The output tensor it set to its max dimensions just to some dimensions
-    // to forward.
+    // we do not produce the whole output tensor everytime.
     return Elts_t::DataElts(op.getInput(inputIdx)->size());
 }
 
@@ -41,7 +39,7 @@ void StackOpImpl::forward() {
     const StackOp &op = dynamic_cast<const StackOp &>(mOp);
     AIDGE_ASSERT(op.getInput(0), "missing input #0");
     AIDGE_ASSERT((op.forwardStep() < op.maxElements()),
-                 "cannot forward anymore, number of cycles exceeded");
+                 "cannot forward anymore, maximum number of elements to stack exceeded");
 
     op.getOutput(0)->getImpl()->copy(
         op.getInput(0)->getImpl()->rawPtr(),
@@ -98,7 +96,6 @@ std::set<std::string> StackOp::getAvailableBackends() const {
 }
 
 void StackOp::forward() {
-    Log::info("fw step {}", forwardStep());
     Operator::forward();
     ++forwardStep();
 }
diff --git a/unit_tests/operator/Test_StackImpl.cpp b/unit_tests/operator/Test_StackImpl.cpp
index 90ef4a2dfc965db488d6c2e1bd5a248237a38749..521e365d718a491dbc9c12972d67ee3c8045c3eb 100644
--- a/unit_tests/operator/Test_StackImpl.cpp
+++ b/unit_tests/operator/Test_StackImpl.cpp
@@ -14,7 +14,9 @@
 #include <catch2/matchers/catch_matchers_string.hpp>
 #include <cstddef>
 #include <memory>
+#include <numeric>
 #include <random>
+#include <stdexcept>
 #include <vector>
 
 #include "aidge/data/Tensor.hpp"
@@ -23,28 +25,25 @@
 
 using Catch::Matchers::Equals;
 
+namespace Aidge {
 
-namespace Aidge 
-{
-
-TEST_CASE("[core/operator] Stack(forward)", "[Stack]") 
-{
+TEST_CASE("[core/operator] Stack(forward)", "[Stack]") {
     constexpr auto nbTrials = 10;
     auto rd = Catch::Generators::Detail::getSeed;
     std::mt19937 gen(rd());
     std::uniform_int_distribution<std::size_t> nbDist(1, 100);
     std::uniform_int_distribution<std::size_t> dimsDist(1, 10);
-    std::uniform_int_distribution<std::size_t> nbDimsDist(1, 5);
+    std::uniform_int_distribution<std::size_t> nbDimsDist(2, 5);
     std::uniform_real_distribution<float> valueDist(0.1f, 1.1f);
+    // std::uniform_int_distribution<std::size_t> tensorNbDimsDist(2U, 5U);
 
+    const std::size_t nbDimsTensor = nbDimsDist(gen);
+    std::vector<std::size_t> dimsIn(nbDimsTensor);
     std::shared_ptr<Tensor> t1 = std::make_shared<Tensor>();
 
-
-    SECTION("Constructors")
-    {
+    SECTION("Constructors") {
         // Valid arguments
-        for(auto i = 0; i < nbTrials; ++i)
-        {
+        for (auto i = 0; i < nbTrials; ++i) {
             auto maxElements = nbDist(gen);
             REQUIRE_NOTHROW(StackOp(maxElements));
 
@@ -62,11 +61,8 @@ TEST_CASE("[core/operator] Stack(forward)", "[Stack]")
         REQUIRE_THROWS_AS(StackOp(0), std::invalid_argument);
     }
 
-
-    SECTION("forwardDims") 
-    {
-        for(auto i = 0; i < nbTrials; ++i)
-        {
+    SECTION("forwardDims") {
+        for (auto i = 0; i < nbTrials; ++i) {
             auto maxElements = nbDist(gen);
             auto op = StackOp(maxElements);
 
@@ -77,12 +73,101 @@ TEST_CASE("[core/operator] Stack(forward)", "[Stack]")
             }
             t1->resize(dims);
 
-
-            REQUIRE_THROWS_WITH(op.forwardDims(),Equals("Stack: input #0 should be associated with a Tensor"));
+            REQUIRE_THROWS_WITH(
+                op.forwardDims(),
+                Equals("Stack: input #0 should be associated with a Tensor"));
             op.associateInput(0, t1);
-            REQUIRE_NOTHROW(op.forwardDims())       ;
+            REQUIRE_NOTHROW(op.forwardDims());
             REQUIRE(op.getOutput(0)->dims()[0] == maxElements);
         }
     }
+
+    SECTION("forward") {
+
+        std::generate(dimsIn.begin(), dimsIn.end(), [&gen, &dimsDist]() {
+            return dimsDist(gen);
+        });
+        const std::size_t nbElems =
+            std::accumulate(dimsIn.cbegin(),
+                            dimsIn.cend(),
+                            1u,
+                            std::multiplies<std::size_t>());
+
+        std::uniform_int_distribution<std::size_t> numTensorsDist(2, 10);
+        const std::size_t numTensors = numTensorsDist(gen);
+
+        std::vector<std::shared_ptr<Tensor>> tensors(numTensors);
+        std::vector<float *> arrays(numTensors);
+
+        for (std::size_t i = 0; i < numTensors; ++i) {
+            arrays[i] = new float[nbElems];
+            for (std::size_t j = 0; j < nbElems; ++j) {
+                arrays[i][j] = valueDist(gen);
+            }
+            tensors[i] = std::make_shared<Tensor>();
+            tensors[i]->resize(dimsIn);
+            tensors[i]->setBackend("cpu");
+            tensors[i]->setDataType(DataType::Float32);
+            tensors[i]->getImpl()->setRawPtr(arrays[i], nbElems);
+        }
+
+        auto myStack = stack(numTensors);
+        myStack->getOperator()->setBackend("cpu");
+        myStack->getOperator()->setDataType(DataType::Float32);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myStack->getOperator());
+
+        op->associateInput(0, tensors[0]);
+        op->forwardDims();
+        op->getOutput(0)->zeros();
+
+        // Perform forward passes for each tensor
+        for (std::size_t i = 0; i < numTensors; ++i) {
+            if (i > 0) {
+                op->associateInput(0, tensors[i]);
+            }
+            op->forward();
+        }
+
+        auto output = op->getOutput(0);
+
+        std::vector<DimSize_t> expectedDims = dimsIn;
+        expectedDims.insert(expectedDims.begin(), numTensors);
+
+        REQUIRE(output->dims() == expectedDims);
+
+        // Compare output slices with input tensors
+        for (std::size_t i = 0; i < numTensors; ++i) {
+            Tensor outputTensor = output->extract(
+                {static_cast<std::uint64_t>(static_cast<int>(i))});
+            Tensor inputTensor(DataType::Float32); inputTensor.resize(dimsIn);
+            inputTensor.setBackend("cpu");
+            inputTensor.getImpl()->setRawPtr(arrays[i], nbElems);
+
+            REQUIRE(approxEq<float>(outputTensor, inputTensor));
+        }
+
+        // Attempt to exceed maxElements
+        std::shared_ptr<Tensor> extraTensor = std::make_shared<Tensor>();
+        extraTensor->resize(dimsIn);
+        extraTensor->setBackend("cpu");
+        extraTensor->setDataType(DataType::Float32);
+        float* extraArray = new float[nbElems];
+        for (std::size_t j = 0; j < nbElems; ++j) {
+            extraArray[j] = valueDist(gen);
+        }
+        extraTensor->getImpl()->setRawPtr(extraArray, nbElems);
+
+        REQUIRE_THROWS_AS(
+            (op->associateInput(0, extraTensor), op->forward()),
+            std::runtime_error
+        );
+
+        // Clean up
+        delete[] extraArray;
+        for (std::size_t i = 0; i < numTensors; ++i) {
+            delete[] arrays[i];
+        }
+    }
 }
 } // namespace Aidge