diff --git a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp
index d541480d87512e529cd62173d61a16f3c8c928c5..cab1dc24155e89a23bca3043231fd815badb05d1 100644
--- a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp
@@ -53,12 +53,11 @@ void GlobalAveragePoolingImpl_cpu_forward_kernel(
       const I *filter_start = std::next(
           input, batch * in_batch_nb_elems + (channel * in_channel_nb_elems));
       // I sum = std::accumulate(&filter_start[0],
-      //                         &filter_start[in_batch_nb_elems + 1], 0);
+      //                         &filter_start[in_batch_nb_elems + 1], 0.f);
       I sum = 0;
       for (size_t i = 0; i < in_channel_nb_elems; ++i) {
         sum += filter_start[i];
       }
-
       output[batch * out_batch_nb_elems + channel] =
           sum / static_cast<I>(in_channel_nb_elems);
     }
diff --git a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
index 7ed65d1894240f92f12f49c7768ee76a186556c2..bb45927c4881f45fc29064b5861803158561c888 100644
--- a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
+++ b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
@@ -25,6 +25,17 @@
 #include "aidge/operator/GlobalAveragePooling.hpp"
 #include "aidge/utils/TensorUtils.hpp"
 
+// debug print function
+void print_tensor(Aidge::Tensor &T) {
+  // Print tensors
+  std::cout << "Tensor : size =  [";
+  for (auto &dim : T.dims()) {
+    std::cout << dim << " , ";
+  }
+  std::cout << "]" << std::endl;
+  T.print();
+}
+
 namespace Aidge {
 TEST_CASE("[cpu/operator] GlobalAveragePooling",
           "[GlobalAveragePooling][CPU]") {
@@ -80,7 +91,6 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling",
           std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1),
                           std::multiplies<std::size_t>());
 
-      // without broadcasting
       float *array0 = new float[nb_elements];
       float *result = new float[nb_elements];
 
@@ -107,6 +117,10 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling",
         const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0];
         const DimSize_t in_channel_nb_elems = in_batch_nb_elems / dims_in[1];
 
+        number_of_operation +=
+            in_nb_elems +
+            dims_in[1]; //  averaging per channel : 1 addition per element in
+                        //  the channel + 1 division this for every batch
         // create out nb_elems
         std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]};
         const std::size_t out_nb_elems =
@@ -118,7 +132,6 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling",
         float *array0 = new float[in_nb_elems];
         float *result = new float[out_nb_elems];
         float val = valueDist(gen);
-        std::cout << "val = " << val << std::endl;
         for (std::size_t batch = 0; batch < dims_in[0]; ++batch) {
           for (std::size_t channel = 0; channel < dims_in[1]; ++channel) {
             for (std::size_t i = 0; i < in_channel_nb_elems; ++i)
@@ -146,42 +159,404 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling",
         duration +=
             std::chrono::duration_cast<std::chrono::microseconds>(end - start);
 
-        // Print tensors
-        std::cout << "input : size =  [";
-        for (auto &dim : op->getInput(0)->dims()) {
-          std::cout << dim << " , ";
-        }
-        std::cout << "]" << std::endl;
-        // T0->print();
-
-        std::cout << "output : size =  [";
-        for (auto &dim : op->getOutput(0)->dims()) {
-          std::cout << dim << " , ";
-        }
-        std::cout << "]" << std::endl;
-        op->getOutput(0)->print();
-
-        std::cout << "ref Tres : size = output size if no error occurred"
-                  << std::endl;
-        std::cout << "ref Tres: size =  [";
-        for (auto &dim : Tres->dims()) {
-          std::cout << dim << " , ";
-        }
-        std::cout << "]" << std::endl;
-
         CHECK(Tres->nbDims() == op->getOutput(0)->nbDims());
         for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
           CHECK(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
         }
-        Tres->print();
 
         CHECK(approxEq<float>(*(op->getOutput(0)), *Tres));
 
         delete[] array0;
         delete[] result;
       }
-      SECTION("Using result from a pytorch function as groundtruth") {}
-      SECTION("random testing") {}
+
+      SECTION("random testing") {
+        for (int trial = 0; trial < NBTRIALS; ++trial) {
+          // generate the tensor
+          const std::size_t nbDims = nbHighDimsDist(gen);
+          std::vector<std::size_t> dims_in;
+          for (std::size_t i = 0; i < nbDims; ++i) {
+            dims_in.push_back(dimSizeDist(gen));
+          }
+          // create in nb_elems
+          const std::size_t in_nb_elems =
+              std::accumulate(dims_in.cbegin(), dims_in.cend(), std::size_t(1),
+                              std::multiplies<std::size_t>());
+          const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0];
+          const DimSize_t in_channel_nb_elems = in_batch_nb_elems / dims_in[1];
+          number_of_operation +=
+              in_nb_elems +
+              dims_in[1]; //  averaging per channel : 1 addition per element in
+                          //  the channel + 1 division this for every batch
+
+          // create out nb_elems
+          std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]};
+          const std::size_t out_nb_elems =
+              std::accumulate(dims_out.cbegin(), dims_out.cend(),
+                              std::size_t(1), std::multiplies<std::size_t>());
+          const DimSize_t out_batch_nb_elems = out_nb_elems / dims_out[0];
+
+          // iterate over each batch/channel
+          float *array0 = new float[in_nb_elems];
+          float *result = new float[out_nb_elems];
+          for (std::size_t batch = 0; batch < dims_in[0]; ++batch) {
+            for (std::size_t channel = 0; channel < dims_in[1]; ++channel) {
+              float channel_sum = 0;
+              for (std::size_t i = 0; i < in_channel_nb_elems; ++i)
+
+              {
+                float val = valueDist(gen);
+                array0[batch * in_batch_nb_elems +
+                       channel * in_channel_nb_elems + i] = val;
+                channel_sum += val;
+              }
+              result[batch * out_batch_nb_elems + channel] =
+                  channel_sum / in_channel_nb_elems;
+            }
+          }
+
+          // input0
+          T0->resize(dims_in);
+          T0->getImpl()->setRawPtr(array0, in_nb_elems);
+
+          // results
+          Tres->resize(dims_out);
+          Tres->getImpl()->setRawPtr(result, out_nb_elems);
+
+          op->computeOutputDims();
+          start = std::chrono::system_clock::now();
+          REQUIRE_NOTHROW(globAvgPool->forward());
+          end = std::chrono::system_clock::now();
+          duration += std::chrono::duration_cast<std::chrono::microseconds>(
+              end - start);
+
+          CHECK(Tres->nbDims() == op->getOutput(0)->nbDims());
+          for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
+            CHECK(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
+          }
+
+          CHECK(approxEq<float>(*(op->getOutput(0)), *Tres));
+
+          delete[] array0;
+          delete[] result;
+        }
+      }
+      SECTION("Using result from a pytorch function as groundtruth") {
+        DimSize_t batch_size = 2;
+        DimSize_t channels = 3;
+        DimSize_t height = 4;
+        DimSize_t width = 3;
+        DimSize_t depth = 2;
+
+        SECTION("2D_img") {
+          const std::vector<DimSize_t> in_dims{batch_size, channels, height,
+                                               width};
+          const std::vector<DimSize_t> out_dims{batch_size, channels};
+          DimSize_t in_nb_elems = batch_size * channels * height * width;
+          DimSize_t out_nb_elems = batch_size * channels;
+          number_of_operation +=
+              in_nb_elems +
+              channels; //  averaging per channel : 1 addition per element in
+                        //  the channel + 1 division this for every batch
+          auto input = new float[in_nb_elems];
+          auto result = new float[out_nb_elems];
+          input[0] = 0.1807716;
+          input[1] = -0.0699881;
+          input[2] = -0.3596235;
+          input[3] = -0.9152045;
+          input[4] = 0.6257653;
+          input[5] = 0.0255099;
+          input[6] = 0.9545137;
+          input[7] = 0.0643485;
+          input[8] = 0.3611506;
+          input[9] = 1.1678782;
+          input[10] = -1.3498932;
+          input[11] = -0.5101767;
+          input[12] = 0.2359577;
+          input[13] = -0.2397784;
+          input[14] = -0.9211147;
+          input[15] = 1.5432971;
+          input[16] = 1.3488258;
+          input[17] = -0.1396417;
+          input[18] = 0.2857972;
+          input[19] = 0.9651205;
+          input[20] = -2.0371499;
+          input[21] = 0.4931363;
+          input[22] = 1.4869986;
+          input[23] = 0.5910330;
+          input[24] = 0.1260297;
+          input[25] = -1.5626874;
+          input[26] = -1.1601028;
+          input[27] = -0.3348408;
+          input[28] = 0.4477722;
+          input[29] = -0.8016447;
+          input[30] = 1.5236114;
+          input[31] = 2.5085869;
+          input[32] = -0.6630959;
+          input[33] = -0.2512752;
+          input[34] = 1.0101448;
+          input[35] = 0.1215468;
+          input[36] = 0.1583993;
+          input[37] = 1.1340188;
+          input[38] = -1.1538976;
+          input[39] = -0.2983968;
+          input[40] = -0.5075365;
+          input[41] = -0.9239212;
+          input[42] = 0.5467061;
+          input[43] = -1.4947776;
+          input[44] = -1.2057148;
+          input[45] = 0.5718198;
+          input[46] = -0.5973545;
+          input[47] = -0.6936757;
+          input[48] = 1.6455388;
+          input[49] = -0.8029931;
+          input[50] = 1.3514109;
+          input[51] = -0.2759193;
+          input[52] = -1.5108346;
+          input[53] = 2.1047730;
+          input[54] = 2.7629590;
+          input[55] = -1.7465292;
+          input[56] = 0.8353187;
+          input[57] = -1.9560477;
+          input[58] = -0.8002653;
+          input[59] = -0.5044988;
+          input[60] = -0.0711742;
+          input[61] = -0.5130699;
+          input[62] = -1.0307810;
+          input[63] = 0.9154347;
+          input[64] = -0.2282317;
+          input[65] = -0.6884708;
+          input[66] = 0.1832259;
+          input[67] = 0.6003584;
+          input[68] = -1.5429375;
+          input[69] = -0.3465560;
+          input[70] = -0.1476223;
+          input[71] = 0.6469797;
+
+          result[0] = 0.0145876;
+          result[1] = 0.3010401;
+          result[2] = 0.0803371;
+
+          result[3] = -0.3720275;
+          result[4] = 0.0919094;
+          result[5] = -0.1852371;
+
+          // input0
+          T0->resize(in_dims);
+          T0->getImpl()->setRawPtr(input, in_nb_elems);
+
+          // results
+          Tres->resize(out_dims);
+          Tres->getImpl()->setRawPtr(result, out_nb_elems);
+          op->computeOutputDims();
+          start = std::chrono::system_clock::now();
+          REQUIRE_NOTHROW(globAvgPool->forward());
+          end = std::chrono::system_clock::now();
+          duration += std::chrono::duration_cast<std::chrono::microseconds>(
+              end - start);
+
+          CHECK(Tres->nbDims() == op->getOutput(0)->nbDims());
+          for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
+            CHECK(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
+          }
+          CHECK(approxEq<float>(*(op->getOutput(0)), *Tres));
+          delete[] input;
+          delete[] result;
+        }
+        SECTION("3D_img") {
+          const std::vector<DimSize_t> in_dims{batch_size, channels, height,
+                                               width, depth};
+          const std::vector<DimSize_t> out_dims{batch_size, channels};
+          DimSize_t in_nb_elems =
+              batch_size * channels * height * width * depth;
+          number_of_operation +=
+              in_nb_elems +
+              channels; //  averaging per channel : 1 addition per element in
+                        //  the channel + 1 division this for every batch
+          DimSize_t out_nb_elems = batch_size * channels;
+          auto input = new float[in_nb_elems];
+          auto result = new float[out_nb_elems];
+          input[0] = 0.0061403;
+          input[1] = -0.9665052;
+          input[2] = 0.3582928;
+          input[3] = 0.1072854;
+          input[4] = 1.2463317;
+          input[5] = 1.2460036;
+          input[6] = 0.3534451;
+          input[7] = 0.9425349;
+          input[8] = -0.2103887;
+          input[9] = -0.7959853;
+          input[10] = 0.1297970;
+          input[11] = -1.9445597;
+          input[12] = 0.0609514;
+          input[13] = -0.2379328;
+          input[14] = 1.9020044;
+          input[15] = -1.1762751;
+          input[16] = 0.3404147;
+          input[17] = 1.1685153;
+          input[18] = -0.6526139;
+          input[19] = 0.3767620;
+          input[20] = 0.1887376;
+          input[21] = 0.5154487;
+          input[22] = 0.6371427;
+          input[23] = -0.3948864;
+          input[24] = -1.1571540;
+          input[25] = 0.2896117;
+          input[26] = 0.6163548;
+          input[27] = -0.4370409;
+          input[28] = 0.6589766;
+          input[29] = 0.6587803;
+          input[30] = -1.3702172;
+          input[31] = -1.6210355;
+          input[32] = 0.5872851;
+          input[33] = 0.2860694;
+          input[34] = 0.0082870;
+          input[35] = -0.2523253;
+          input[36] = -1.3247224;
+          input[37] = 0.1891782;
+          input[38] = 0.0211001;
+          input[39] = 0.9404197;
+          input[40] = -0.5576900;
+          input[41] = -0.6939272;
+          input[42] = -0.3252473;
+          input[43] = 1.2439330;
+          input[44] = -1.1671864;
+          input[45] = -0.4091243;
+          input[46] = 1.2600617;
+          input[47] = -1.5630058;
+          input[48] = 1.1346143;
+          input[49] = -0.0823837;
+          input[50] = 0.2893163;
+          input[51] = 0.8357732;
+          input[52] = -0.2449911;
+          input[53] = 0.2712233;
+          input[54] = 0.0936364;
+          input[55] = -0.8834321;
+          input[56] = -0.3274170;
+          input[57] = 0.0783938;
+          input[58] = -0.3807656;
+          input[59] = 0.3775077;
+          input[60] = 0.1119123;
+          input[61] = 2.3142793;
+          input[62] = -0.7989057;
+          input[63] = -0.5643027;
+          input[64] = -1.1346605;
+          input[65] = 0.1705271;
+          input[66] = 0.9946650;
+          input[67] = 1.2625724;
+          input[68] = 1.6218156;
+          input[69] = 1.0774711;
+          input[70] = 0.5947813;
+          input[71] = -1.5290873;
+          input[72] = 2.0437069;
+          input[73] = -0.1656267;
+          input[74] = 0.0870704;
+          input[75] = -0.5276564;
+          input[76] = -0.1002882;
+          input[77] = 1.0539219;
+          input[78] = -0.6230739;
+          input[79] = -1.5905718;
+          input[80] = -0.9741858;
+          input[81] = -0.1869211;
+          input[82] = 0.5816050;
+          input[83] = -2.6339815;
+          input[84] = -1.0764544;
+          input[85] = 2.5903966;
+          input[86] = 0.4940658;
+          input[87] = 0.4671729;
+          input[88] = 0.6588292;
+          input[89] = -0.7257792;
+          input[90] = 1.4280071;
+          input[91] = -1.2187740;
+          input[92] = 0.7380729;
+          input[93] = -1.1599953;
+          input[94] = -1.4355115;
+          input[95] = -1.5304037;
+          input[96] = 0.8474578;
+          input[97] = 0.0774260;
+          input[98] = 0.5433396;
+          input[99] = -0.8438400;
+          input[100] = -0.1089903;
+          input[101] = -0.6354192;
+          input[102] = 0.8772392;
+          input[103] = 0.2844733;
+          input[104] = 0.0975270;
+          input[105] = -0.9785872;
+          input[106] = -0.4320499;
+          input[107] = -1.4937501;
+          input[108] = -2.0644901;
+          input[109] = 0.0851217;
+          input[110] = 0.6644159;
+          input[111] = 0.4168026;
+          input[112] = 0.0958830;
+          input[113] = -1.5699565;
+          input[114] = 0.3739572;
+          input[115] = -0.1420672;
+          input[116] = -0.7864021;
+          input[117] = 0.2443752;
+          input[118] = -0.9811850;
+          input[119] = -0.0698569;
+          input[120] = 0.1463890;
+          input[121] = 0.2536245;
+          input[122] = 0.2136150;
+          input[123] = 0.3113698;
+          input[124] = 1.8353856;
+          input[125] = 1.4473228;
+          input[126] = -0.7373698;
+          input[127] = 0.2485314;
+          input[128] = -0.4789796;
+          input[129] = -0.3396149;
+          input[130] = 0.6438198;
+          input[131] = 0.7287521;
+          input[132] = -1.5119252;
+          input[133] = -0.1006494;
+          input[134] = 1.8955028;
+          input[135] = 1.0871323;
+          input[136] = 0.3620502;
+          input[137] = -0.8826663;
+          input[138] = 1.2220223;
+          input[139] = -1.2817260;
+          input[140] = 1.4153577;
+          input[141] = 0.4148015;
+          input[142] = 1.3458617;
+          input[143] = 1.9718349;
+
+          result[0] = 0.1333608;
+          result[1] = -0.1716091;
+          result[2] = 0.2201060;
+          result[3] = -0.1585989;
+          result[4] = -0.2291074;
+          result[5] = 0.4254351;
+
+          // input0
+          T0->resize(in_dims);
+          T0->getImpl()->setRawPtr(input, in_nb_elems);
+
+          // results
+          Tres->resize(out_dims);
+          Tres->getImpl()->setRawPtr(result, out_nb_elems);
+          op->computeOutputDims();
+          start = std::chrono::system_clock::now();
+          REQUIRE_NOTHROW(globAvgPool->forward());
+          end = std::chrono::system_clock::now();
+          duration += std::chrono::duration_cast<std::chrono::microseconds>(
+              end - start);
+
+          CHECK(Tres->nbDims() == op->getOutput(0)->nbDims());
+          for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
+            CHECK(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
+          }
+          CHECK(approxEq<float>(*(op->getOutput(0)), *Tres));
+          delete[] input;
+          delete[] result;
+        }
+      }
+      std::cout << "GlobalAveragePooling total execution time : "
+                << duration.count() << "µs" << std::endl;
+      std::cout << "Number of operations : " << number_of_operation
+                << std::endl;
+      std::cout << "Operation / µs = " << number_of_operation / duration.count()
+                << std::endl;
     }
   }
 }