diff --git a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp index d541480d87512e529cd62173d61a16f3c8c928c5..cab1dc24155e89a23bca3043231fd815badb05d1 100644 --- a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp @@ -53,12 +53,11 @@ void GlobalAveragePoolingImpl_cpu_forward_kernel( const I *filter_start = std::next( input, batch * in_batch_nb_elems + (channel * in_channel_nb_elems)); // I sum = std::accumulate(&filter_start[0], - // &filter_start[in_batch_nb_elems + 1], 0); + // &filter_start[in_batch_nb_elems + 1], 0.f); I sum = 0; for (size_t i = 0; i < in_channel_nb_elems; ++i) { sum += filter_start[i]; } - output[batch * out_batch_nb_elems + channel] = sum / static_cast<I>(in_channel_nb_elems); } diff --git a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp index 7ed65d1894240f92f12f49c7768ee76a186556c2..bb45927c4881f45fc29064b5861803158561c888 100644 --- a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp +++ b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp @@ -25,6 +25,17 @@ #include "aidge/operator/GlobalAveragePooling.hpp" #include "aidge/utils/TensorUtils.hpp" +// debug print function +void print_tensor(Aidge::Tensor &T) { + // Print tensors + std::cout << "Tensor : size =  ["; + for (auto &dim : T.dims()) { + std::cout << dim << " , "; + } + std::cout << "]" << std::endl; + T.print(); +} + namespace Aidge { TEST_CASE("[cpu/operator] GlobalAveragePooling", "[GlobalAveragePooling][CPU]") { @@ -80,7 +91,6 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling", std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); - // without broadcasting float *array0 = new float[nb_elements]; float *result = new float[nb_elements]; @@ -107,6 +117,10 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling", const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0]; const DimSize_t in_channel_nb_elems = in_batch_nb_elems / dims_in[1]; + number_of_operation += + in_nb_elems + + dims_in[1]; // averaging per channel : 1 addition per element in + // the channel + 1 division this for every batch // create out nb_elems std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]}; const std::size_t out_nb_elems = @@ -118,7 +132,6 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling", float *array0 = new float[in_nb_elems]; float *result = new float[out_nb_elems]; float val = valueDist(gen); - std::cout << "val = " << val << std::endl; for (std::size_t batch = 0; batch < dims_in[0]; ++batch) { for (std::size_t channel = 0; channel < dims_in[1]; ++channel) { for (std::size_t i = 0; i < in_channel_nb_elems; ++i) @@ -146,42 +159,404 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling", duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); - // Print tensors - std::cout << "input : size =  ["; - for (auto &dim : op->getInput(0)->dims()) { - std::cout << dim << " , "; - } - std::cout << "]" << std::endl; - // T0->print(); - - std::cout << "output : size =  ["; - for (auto &dim : op->getOutput(0)->dims()) { - std::cout << dim << " , "; - } - std::cout << "]" << std::endl; - op->getOutput(0)->print(); - - std::cout << "ref Tres : size = output size if no error occurred" - << std::endl; - std::cout << "ref Tres: size =  ["; - for (auto &dim : Tres->dims()) { - std::cout << dim << " , "; - } - std::cout << "]" << std::endl; - CHECK(Tres->nbDims() == op->getOutput(0)->nbDims()); for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { CHECK(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); } - Tres->print(); CHECK(approxEq<float>(*(op->getOutput(0)), *Tres)); delete[] array0; delete[] result; } - SECTION("Using result from a pytorch function as groundtruth") {} - SECTION("random testing") {} + + SECTION("random testing") { + for (int trial = 0; trial < NBTRIALS; ++trial) { + // generate the tensor + const std::size_t nbDims = nbHighDimsDist(gen); + std::vector<std::size_t> dims_in; + for (std::size_t i = 0; i < nbDims; ++i) { + dims_in.push_back(dimSizeDist(gen)); + } + // create in nb_elems + const std::size_t in_nb_elems = + std::accumulate(dims_in.cbegin(), dims_in.cend(), std::size_t(1), + std::multiplies<std::size_t>()); + const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0]; + const DimSize_t in_channel_nb_elems = in_batch_nb_elems / dims_in[1]; + number_of_operation += + in_nb_elems + + dims_in[1]; // averaging per channel : 1 addition per element in + // the channel + 1 division this for every batch + + // create out nb_elems + std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]}; + const std::size_t out_nb_elems = + std::accumulate(dims_out.cbegin(), dims_out.cend(), + std::size_t(1), std::multiplies<std::size_t>()); + const DimSize_t out_batch_nb_elems = out_nb_elems / dims_out[0]; + + // iterate over each batch/channel + float *array0 = new float[in_nb_elems]; + float *result = new float[out_nb_elems]; + for (std::size_t batch = 0; batch < dims_in[0]; ++batch) { + for (std::size_t channel = 0; channel < dims_in[1]; ++channel) { + float channel_sum = 0; + for (std::size_t i = 0; i < in_channel_nb_elems; ++i) + + { + float val = valueDist(gen); + array0[batch * in_batch_nb_elems + + channel * in_channel_nb_elems + i] = val; + channel_sum += val; + } + result[batch * out_batch_nb_elems + channel] = + channel_sum / in_channel_nb_elems; + } + } + + // input0 + T0->resize(dims_in); + T0->getImpl()->setRawPtr(array0, in_nb_elems); + + // results + Tres->resize(dims_out); + Tres->getImpl()->setRawPtr(result, out_nb_elems); + + op->computeOutputDims(); + start = std::chrono::system_clock::now(); + REQUIRE_NOTHROW(globAvgPool->forward()); + end = std::chrono::system_clock::now(); + duration += std::chrono::duration_cast<std::chrono::microseconds>( + end - start); + + CHECK(Tres->nbDims() == op->getOutput(0)->nbDims()); + for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { + CHECK(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); + } + + CHECK(approxEq<float>(*(op->getOutput(0)), *Tres)); + + delete[] array0; + delete[] result; + } + } + SECTION("Using result from a pytorch function as groundtruth") { + DimSize_t batch_size = 2; + DimSize_t channels = 3; + DimSize_t height = 4; + DimSize_t width = 3; + DimSize_t depth = 2; + + SECTION("2D_img") { + const std::vector<DimSize_t> in_dims{batch_size, channels, height, + width}; + const std::vector<DimSize_t> out_dims{batch_size, channels}; + DimSize_t in_nb_elems = batch_size * channels * height * width; + DimSize_t out_nb_elems = batch_size * channels; + number_of_operation += + in_nb_elems + + channels; // averaging per channel : 1 addition per element in + // the channel + 1 division this for every batch + auto input = new float[in_nb_elems]; + auto result = new float[out_nb_elems]; + input[0] = 0.1807716; + input[1] = -0.0699881; + input[2] = -0.3596235; + input[3] = -0.9152045; + input[4] = 0.6257653; + input[5] = 0.0255099; + input[6] = 0.9545137; + input[7] = 0.0643485; + input[8] = 0.3611506; + input[9] = 1.1678782; + input[10] = -1.3498932; + input[11] = -0.5101767; + input[12] = 0.2359577; + input[13] = -0.2397784; + input[14] = -0.9211147; + input[15] = 1.5432971; + input[16] = 1.3488258; + input[17] = -0.1396417; + input[18] = 0.2857972; + input[19] = 0.9651205; + input[20] = -2.0371499; + input[21] = 0.4931363; + input[22] = 1.4869986; + input[23] = 0.5910330; + input[24] = 0.1260297; + input[25] = -1.5626874; + input[26] = -1.1601028; + input[27] = -0.3348408; + input[28] = 0.4477722; + input[29] = -0.8016447; + input[30] = 1.5236114; + input[31] = 2.5085869; + input[32] = -0.6630959; + input[33] = -0.2512752; + input[34] = 1.0101448; + input[35] = 0.1215468; + input[36] = 0.1583993; + input[37] = 1.1340188; + input[38] = -1.1538976; + input[39] = -0.2983968; + input[40] = -0.5075365; + input[41] = -0.9239212; + input[42] = 0.5467061; + input[43] = -1.4947776; + input[44] = -1.2057148; + input[45] = 0.5718198; + input[46] = -0.5973545; + input[47] = -0.6936757; + input[48] = 1.6455388; + input[49] = -0.8029931; + input[50] = 1.3514109; + input[51] = -0.2759193; + input[52] = -1.5108346; + input[53] = 2.1047730; + input[54] = 2.7629590; + input[55] = -1.7465292; + input[56] = 0.8353187; + input[57] = -1.9560477; + input[58] = -0.8002653; + input[59] = -0.5044988; + input[60] = -0.0711742; + input[61] = -0.5130699; + input[62] = -1.0307810; + input[63] = 0.9154347; + input[64] = -0.2282317; + input[65] = -0.6884708; + input[66] = 0.1832259; + input[67] = 0.6003584; + input[68] = -1.5429375; + input[69] = -0.3465560; + input[70] = -0.1476223; + input[71] = 0.6469797; + + result[0] = 0.0145876; + result[1] = 0.3010401; + result[2] = 0.0803371; + + result[3] = -0.3720275; + result[4] = 0.0919094; + result[5] = -0.1852371; + + // input0 + T0->resize(in_dims); + T0->getImpl()->setRawPtr(input, in_nb_elems); + + // results + Tres->resize(out_dims); + Tres->getImpl()->setRawPtr(result, out_nb_elems); + op->computeOutputDims(); + start = std::chrono::system_clock::now(); + REQUIRE_NOTHROW(globAvgPool->forward()); + end = std::chrono::system_clock::now(); + duration += std::chrono::duration_cast<std::chrono::microseconds>( + end - start); + + CHECK(Tres->nbDims() == op->getOutput(0)->nbDims()); + for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { + CHECK(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); + } + CHECK(approxEq<float>(*(op->getOutput(0)), *Tres)); + delete[] input; + delete[] result; + } + SECTION("3D_img") { + const std::vector<DimSize_t> in_dims{batch_size, channels, height, + width, depth}; + const std::vector<DimSize_t> out_dims{batch_size, channels}; + DimSize_t in_nb_elems = + batch_size * channels * height * width * depth; + number_of_operation += + in_nb_elems + + channels; // averaging per channel : 1 addition per element in + // the channel + 1 division this for every batch + DimSize_t out_nb_elems = batch_size * channels; + auto input = new float[in_nb_elems]; + auto result = new float[out_nb_elems]; + input[0] = 0.0061403; + input[1] = -0.9665052; + input[2] = 0.3582928; + input[3] = 0.1072854; + input[4] = 1.2463317; + input[5] = 1.2460036; + input[6] = 0.3534451; + input[7] = 0.9425349; + input[8] = -0.2103887; + input[9] = -0.7959853; + input[10] = 0.1297970; + input[11] = -1.9445597; + input[12] = 0.0609514; + input[13] = -0.2379328; + input[14] = 1.9020044; + input[15] = -1.1762751; + input[16] = 0.3404147; + input[17] = 1.1685153; + input[18] = -0.6526139; + input[19] = 0.3767620; + input[20] = 0.1887376; + input[21] = 0.5154487; + input[22] = 0.6371427; + input[23] = -0.3948864; + input[24] = -1.1571540; + input[25] = 0.2896117; + input[26] = 0.6163548; + input[27] = -0.4370409; + input[28] = 0.6589766; + input[29] = 0.6587803; + input[30] = -1.3702172; + input[31] = -1.6210355; + input[32] = 0.5872851; + input[33] = 0.2860694; + input[34] = 0.0082870; + input[35] = -0.2523253; + input[36] = -1.3247224; + input[37] = 0.1891782; + input[38] = 0.0211001; + input[39] = 0.9404197; + input[40] = -0.5576900; + input[41] = -0.6939272; + input[42] = -0.3252473; + input[43] = 1.2439330; + input[44] = -1.1671864; + input[45] = -0.4091243; + input[46] = 1.2600617; + input[47] = -1.5630058; + input[48] = 1.1346143; + input[49] = -0.0823837; + input[50] = 0.2893163; + input[51] = 0.8357732; + input[52] = -0.2449911; + input[53] = 0.2712233; + input[54] = 0.0936364; + input[55] = -0.8834321; + input[56] = -0.3274170; + input[57] = 0.0783938; + input[58] = -0.3807656; + input[59] = 0.3775077; + input[60] = 0.1119123; + input[61] = 2.3142793; + input[62] = -0.7989057; + input[63] = -0.5643027; + input[64] = -1.1346605; + input[65] = 0.1705271; + input[66] = 0.9946650; + input[67] = 1.2625724; + input[68] = 1.6218156; + input[69] = 1.0774711; + input[70] = 0.5947813; + input[71] = -1.5290873; + input[72] = 2.0437069; + input[73] = -0.1656267; + input[74] = 0.0870704; + input[75] = -0.5276564; + input[76] = -0.1002882; + input[77] = 1.0539219; + input[78] = -0.6230739; + input[79] = -1.5905718; + input[80] = -0.9741858; + input[81] = -0.1869211; + input[82] = 0.5816050; + input[83] = -2.6339815; + input[84] = -1.0764544; + input[85] = 2.5903966; + input[86] = 0.4940658; + input[87] = 0.4671729; + input[88] = 0.6588292; + input[89] = -0.7257792; + input[90] = 1.4280071; + input[91] = -1.2187740; + input[92] = 0.7380729; + input[93] = -1.1599953; + input[94] = -1.4355115; + input[95] = -1.5304037; + input[96] = 0.8474578; + input[97] = 0.0774260; + input[98] = 0.5433396; + input[99] = -0.8438400; + input[100] = -0.1089903; + input[101] = -0.6354192; + input[102] = 0.8772392; + input[103] = 0.2844733; + input[104] = 0.0975270; + input[105] = -0.9785872; + input[106] = -0.4320499; + input[107] = -1.4937501; + input[108] = -2.0644901; + input[109] = 0.0851217; + input[110] = 0.6644159; + input[111] = 0.4168026; + input[112] = 0.0958830; + input[113] = -1.5699565; + input[114] = 0.3739572; + input[115] = -0.1420672; + input[116] = -0.7864021; + input[117] = 0.2443752; + input[118] = -0.9811850; + input[119] = -0.0698569; + input[120] = 0.1463890; + input[121] = 0.2536245; + input[122] = 0.2136150; + input[123] = 0.3113698; + input[124] = 1.8353856; + input[125] = 1.4473228; + input[126] = -0.7373698; + input[127] = 0.2485314; + input[128] = -0.4789796; + input[129] = -0.3396149; + input[130] = 0.6438198; + input[131] = 0.7287521; + input[132] = -1.5119252; + input[133] = -0.1006494; + input[134] = 1.8955028; + input[135] = 1.0871323; + input[136] = 0.3620502; + input[137] = -0.8826663; + input[138] = 1.2220223; + input[139] = -1.2817260; + input[140] = 1.4153577; + input[141] = 0.4148015; + input[142] = 1.3458617; + input[143] = 1.9718349; + + result[0] = 0.1333608; + result[1] = -0.1716091; + result[2] = 0.2201060; + result[3] = -0.1585989; + result[4] = -0.2291074; + result[5] = 0.4254351; + + // input0 + T0->resize(in_dims); + T0->getImpl()->setRawPtr(input, in_nb_elems); + + // results + Tres->resize(out_dims); + Tres->getImpl()->setRawPtr(result, out_nb_elems); + op->computeOutputDims(); + start = std::chrono::system_clock::now(); + REQUIRE_NOTHROW(globAvgPool->forward()); + end = std::chrono::system_clock::now(); + duration += std::chrono::duration_cast<std::chrono::microseconds>( + end - start); + + CHECK(Tres->nbDims() == op->getOutput(0)->nbDims()); + for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { + CHECK(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); + } + CHECK(approxEq<float>(*(op->getOutput(0)), *Tres)); + delete[] input; + delete[] result; + } + } + std::cout << "GlobalAveragePooling total execution time : " + << duration.count() << "µs" << std::endl; + std::cout << "Number of operations : " << number_of_operation + << std::endl; + std::cout << "Operation / µs = " << number_of_operation / duration.count() + << std::endl; } } }