/******************************************************************************** * Copyright (c) 2023 CEA-List * * This program and the accompanying materials are made available under the * terms of the Eclipse Public License 2.0 which is available at * http://www.eclipse.org/legal/epl-2.0. * * SPDX-License-Identifier: EPL-2.0 * ********************************************************************************/ #include <aidge/utils/Types.h> #include <catch2/catch_test_macros.hpp> #include <chrono> #include <cmath> #include <cstddef> // std::size_t #include <cstdint> // std::uint16_t #include <iostream> #include <memory> #include <numeric> // std::accumulate #include <ostream> #include <random> // std::random_device, std::mt19937, std::uniform_real_distribution #include "aidge/data/Tensor.hpp" #include "aidge/operator/GlobalAveragePooling.hpp" #include "aidge/utils/TensorUtils.hpp" // debug print function void print_tensor(Aidge::Tensor &T) { // Print tensors std::cout << "Tensor : size = ["; for (auto &dim : T.dims()) { std::cout << dim << " , "; } std::cout << "]" << std::endl; T.print(); } namespace Aidge { TEST_CASE("[cpu/operator] GlobalAveragePooling", "[GlobalAveragePooling][CPU]") { constexpr std::uint16_t NBTRIALS = 10; // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); std::uniform_real_distribution<float> valueDist( 0.1f, 1.1f); // Random float distribution between 0 and 1 std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); std::uniform_int_distribution<std::size_t> nbLowDimsDist(std::size_t(1), std::size_t(2)); std::uniform_int_distribution<std::size_t> nbHighDimsDist(std::size_t(3), std::size_t(7)); // Create MatGlobalAveragePooling Operator std::shared_ptr<Node> globAvgPool = GlobalAveragePooling(); auto op = std::static_pointer_cast<OperatorTensor>(globAvgPool->getOperator()); op->setDataType(DataType::Float32); op->setBackend("cpu"); // Create the input Tensor std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); op->associateInput(0, T0); T0->setDataType(DataType::Float32); T0->setBackend("cpu"); // Create results Tensor std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(); Tres->setDataType(DataType::Float32); Tres->setBackend("cpu"); // To measure execution time of 'MatGlobalAveragePooling_Op::forward()' member // function call std::chrono::time_point<std::chrono::system_clock> start; std::chrono::time_point<std::chrono::system_clock> end; std::chrono::duration<double, std::micro> duration{}; int number_of_operation{0}; SECTION("GlobalAveragePoolingImpl_cpu::forward()") { SECTION( "1-2Dim > not enough dimensions leads to function throwing an error") { // generate a random tensors const std::size_t nbDims = nbLowDimsDist(gen); std::vector<std::size_t> dims; for (std::size_t i = 0; i < nbDims; ++i) { dims.push_back(dimSizeDist(gen)); } const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); float *array0 = new float[nb_elements]; for (std::size_t i = 0; i < nb_elements; ++i) { array0[i] = valueDist(gen); } // input0 T0->resize(dims); T0->getImpl()->setRawPtr(array0, nb_elements); REQUIRE_THROWS(globAvgPool->forward()); delete[] array0; } SECTION("3+Dim") { SECTION("Fill a tensor with all values set as N will result with every " "output being N") { // generate the tensor const std::size_t nbDims = nbHighDimsDist(gen); std::vector<std::size_t> dims_in; for (std::size_t i = 0; i < nbDims; ++i) { dims_in.push_back(dimSizeDist(gen)); } // create in nb_elems const std::size_t in_nb_elems = std::accumulate(dims_in.cbegin(), dims_in.cend(), std::size_t(1), std::multiplies<std::size_t>()); const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0]; const DimSize_t in_channel_nb_elems = in_batch_nb_elems / dims_in[1]; number_of_operation += in_nb_elems + dims_in[1]; // averaging per channel : 1 addition per element in // the channel + 1 division this for every batch // create out nb_elems std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]}; const std::size_t out_nb_elems = std::accumulate(dims_out.cbegin(), dims_out.cend(), std::size_t(1), std::multiplies<std::size_t>()); const DimSize_t out_batch_nb_elems = out_nb_elems / dims_out[0]; // iterate over each batch/channel float *array0 = new float[in_nb_elems]; float *result = new float[out_nb_elems]; float val = valueDist(gen); for (std::size_t batch = 0; batch < dims_in[0]; ++batch) { for (std::size_t channel = 0; channel < dims_in[1]; ++channel) { for (std::size_t i = 0; i < in_channel_nb_elems; ++i) { array0[batch * in_batch_nb_elems + channel * in_channel_nb_elems + i] = val; } result[batch * out_batch_nb_elems + channel] = val; } } // input0 T0->resize(dims_in); T0->getImpl()->setRawPtr(array0, in_nb_elems); // results Tres->resize(dims_out); Tres->getImpl()->setRawPtr(result, out_nb_elems); op->computeOutputDims(); start = std::chrono::system_clock::now(); REQUIRE_NOTHROW(globAvgPool->forward()); end = std::chrono::system_clock::now(); duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims()); for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); } REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); delete[] array0; delete[] result; } SECTION("random testing") { for (int trial = 0; trial < NBTRIALS; ++trial) { // generate the tensor const std::size_t nbDims = nbHighDimsDist(gen); std::vector<std::size_t> dims_in; for (std::size_t i = 0; i < nbDims; ++i) { dims_in.push_back(dimSizeDist(gen)); } // create in nb_elems const std::size_t in_nb_elems = std::accumulate(dims_in.cbegin(), dims_in.cend(), std::size_t(1), std::multiplies<std::size_t>()); const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0]; const DimSize_t in_channel_nb_elems = in_batch_nb_elems / dims_in[1]; number_of_operation += in_nb_elems + dims_in[1]; // averaging per channel : 1 addition per element in // the channel + 1 division this for every batch // create out nb_elems std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]}; const std::size_t out_nb_elems = std::accumulate(dims_out.cbegin(), dims_out.cend(), std::size_t(1), std::multiplies<std::size_t>()); const DimSize_t out_batch_nb_elems = out_nb_elems / dims_out[0]; // iterate over each batch/channel float *array0 = new float[in_nb_elems]; float *result = new float[out_nb_elems]; for (std::size_t batch = 0; batch < dims_in[0]; ++batch) { for (std::size_t channel = 0; channel < dims_in[1]; ++channel) { float channel_sum = 0; for (std::size_t i = 0; i < in_channel_nb_elems; ++i) { float val = valueDist(gen); array0[batch * in_batch_nb_elems + channel * in_channel_nb_elems + i] = val; channel_sum += val; } result[batch * out_batch_nb_elems + channel] = channel_sum / in_channel_nb_elems; } } // input0 T0->resize(dims_in); T0->getImpl()->setRawPtr(array0, in_nb_elems); // results Tres->resize(dims_out); Tres->getImpl()->setRawPtr(result, out_nb_elems); op->computeOutputDims(); start = std::chrono::system_clock::now(); REQUIRE_NOTHROW(globAvgPool->forward()); end = std::chrono::system_clock::now(); duration += std::chrono::duration_cast<std::chrono::microseconds>( end - start); REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims()); for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); } REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); delete[] array0; delete[] result; } } SECTION("Using result from a pytorch function as groundtruth") { DimSize_t batch_size = 2; DimSize_t channels = 3; DimSize_t height = 4; DimSize_t width = 3; DimSize_t depth = 2; SECTION("2D_img") { const std::vector<DimSize_t> in_dims{batch_size, channels, height, width}; const std::vector<DimSize_t> out_dims{batch_size, channels}; DimSize_t in_nb_elems = batch_size * channels * height * width; DimSize_t out_nb_elems = batch_size * channels; number_of_operation += in_nb_elems + channels; // averaging per channel : 1 addition per element in // the channel + 1 division this for every batch auto input = new float[in_nb_elems]; auto result = new float[out_nb_elems]; input[0] = 0.1807716; input[1] = -0.0699881; input[2] = -0.3596235; input[3] = -0.9152045; input[4] = 0.6257653; input[5] = 0.0255099; input[6] = 0.9545137; input[7] = 0.0643485; input[8] = 0.3611506; input[9] = 1.1678782; input[10] = -1.3498932; input[11] = -0.5101767; input[12] = 0.2359577; input[13] = -0.2397784; input[14] = -0.9211147; input[15] = 1.5432971; input[16] = 1.3488258; input[17] = -0.1396417; input[18] = 0.2857972; input[19] = 0.9651205; input[20] = -2.0371499; input[21] = 0.4931363; input[22] = 1.4869986; input[23] = 0.5910330; input[24] = 0.1260297; input[25] = -1.5626874; input[26] = -1.1601028; input[27] = -0.3348408; input[28] = 0.4477722; input[29] = -0.8016447; input[30] = 1.5236114; input[31] = 2.5085869; input[32] = -0.6630959; input[33] = -0.2512752; input[34] = 1.0101448; input[35] = 0.1215468; input[36] = 0.1583993; input[37] = 1.1340188; input[38] = -1.1538976; input[39] = -0.2983968; input[40] = -0.5075365; input[41] = -0.9239212; input[42] = 0.5467061; input[43] = -1.4947776; input[44] = -1.2057148; input[45] = 0.5718198; input[46] = -0.5973545; input[47] = -0.6936757; input[48] = 1.6455388; input[49] = -0.8029931; input[50] = 1.3514109; input[51] = -0.2759193; input[52] = -1.5108346; input[53] = 2.1047730; input[54] = 2.7629590; input[55] = -1.7465292; input[56] = 0.8353187; input[57] = -1.9560477; input[58] = -0.8002653; input[59] = -0.5044988; input[60] = -0.0711742; input[61] = -0.5130699; input[62] = -1.0307810; input[63] = 0.9154347; input[64] = -0.2282317; input[65] = -0.6884708; input[66] = 0.1832259; input[67] = 0.6003584; input[68] = -1.5429375; input[69] = -0.3465560; input[70] = -0.1476223; input[71] = 0.6469797; result[0] = 0.0145876; result[1] = 0.3010401; result[2] = 0.0803371; result[3] = -0.3720275; result[4] = 0.0919094; result[5] = -0.1852371; // input0 T0->resize(in_dims); T0->getImpl()->setRawPtr(input, in_nb_elems); // results Tres->resize(out_dims); Tres->getImpl()->setRawPtr(result, out_nb_elems); op->computeOutputDims(); start = std::chrono::system_clock::now(); REQUIRE_NOTHROW(globAvgPool->forward()); end = std::chrono::system_clock::now(); duration += std::chrono::duration_cast<std::chrono::microseconds>( end - start); REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims()); for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); } REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); delete[] input; delete[] result; } SECTION("3D_img") { const std::vector<DimSize_t> in_dims{batch_size, channels, height, width, depth}; const std::vector<DimSize_t> out_dims{batch_size, channels}; DimSize_t in_nb_elems = batch_size * channels * height * width * depth; number_of_operation += in_nb_elems + channels; // averaging per channel : 1 addition per element in // the channel + 1 division this for every batch DimSize_t out_nb_elems = batch_size * channels; auto input = new float[in_nb_elems]; auto result = new float[out_nb_elems]; input[0] = 0.0061403; input[1] = -0.9665052; input[2] = 0.3582928; input[3] = 0.1072854; input[4] = 1.2463317; input[5] = 1.2460036; input[6] = 0.3534451; input[7] = 0.9425349; input[8] = -0.2103887; input[9] = -0.7959853; input[10] = 0.1297970; input[11] = -1.9445597; input[12] = 0.0609514; input[13] = -0.2379328; input[14] = 1.9020044; input[15] = -1.1762751; input[16] = 0.3404147; input[17] = 1.1685153; input[18] = -0.6526139; input[19] = 0.3767620; input[20] = 0.1887376; input[21] = 0.5154487; input[22] = 0.6371427; input[23] = -0.3948864; input[24] = -1.1571540; input[25] = 0.2896117; input[26] = 0.6163548; input[27] = -0.4370409; input[28] = 0.6589766; input[29] = 0.6587803; input[30] = -1.3702172; input[31] = -1.6210355; input[32] = 0.5872851; input[33] = 0.2860694; input[34] = 0.0082870; input[35] = -0.2523253; input[36] = -1.3247224; input[37] = 0.1891782; input[38] = 0.0211001; input[39] = 0.9404197; input[40] = -0.5576900; input[41] = -0.6939272; input[42] = -0.3252473; input[43] = 1.2439330; input[44] = -1.1671864; input[45] = -0.4091243; input[46] = 1.2600617; input[47] = -1.5630058; input[48] = 1.1346143; input[49] = -0.0823837; input[50] = 0.2893163; input[51] = 0.8357732; input[52] = -0.2449911; input[53] = 0.2712233; input[54] = 0.0936364; input[55] = -0.8834321; input[56] = -0.3274170; input[57] = 0.0783938; input[58] = -0.3807656; input[59] = 0.3775077; input[60] = 0.1119123; input[61] = 2.3142793; input[62] = -0.7989057; input[63] = -0.5643027; input[64] = -1.1346605; input[65] = 0.1705271; input[66] = 0.9946650; input[67] = 1.2625724; input[68] = 1.6218156; input[69] = 1.0774711; input[70] = 0.5947813; input[71] = -1.5290873; input[72] = 2.0437069; input[73] = -0.1656267; input[74] = 0.0870704; input[75] = -0.5276564; input[76] = -0.1002882; input[77] = 1.0539219; input[78] = -0.6230739; input[79] = -1.5905718; input[80] = -0.9741858; input[81] = -0.1869211; input[82] = 0.5816050; input[83] = -2.6339815; input[84] = -1.0764544; input[85] = 2.5903966; input[86] = 0.4940658; input[87] = 0.4671729; input[88] = 0.6588292; input[89] = -0.7257792; input[90] = 1.4280071; input[91] = -1.2187740; input[92] = 0.7380729; input[93] = -1.1599953; input[94] = -1.4355115; input[95] = -1.5304037; input[96] = 0.8474578; input[97] = 0.0774260; input[98] = 0.5433396; input[99] = -0.8438400; input[100] = -0.1089903; input[101] = -0.6354192; input[102] = 0.8772392; input[103] = 0.2844733; input[104] = 0.0975270; input[105] = -0.9785872; input[106] = -0.4320499; input[107] = -1.4937501; input[108] = -2.0644901; input[109] = 0.0851217; input[110] = 0.6644159; input[111] = 0.4168026; input[112] = 0.0958830; input[113] = -1.5699565; input[114] = 0.3739572; input[115] = -0.1420672; input[116] = -0.7864021; input[117] = 0.2443752; input[118] = -0.9811850; input[119] = -0.0698569; input[120] = 0.1463890; input[121] = 0.2536245; input[122] = 0.2136150; input[123] = 0.3113698; input[124] = 1.8353856; input[125] = 1.4473228; input[126] = -0.7373698; input[127] = 0.2485314; input[128] = -0.4789796; input[129] = -0.3396149; input[130] = 0.6438198; input[131] = 0.7287521; input[132] = -1.5119252; input[133] = -0.1006494; input[134] = 1.8955028; input[135] = 1.0871323; input[136] = 0.3620502; input[137] = -0.8826663; input[138] = 1.2220223; input[139] = -1.2817260; input[140] = 1.4153577; input[141] = 0.4148015; input[142] = 1.3458617; input[143] = 1.9718349; result[0] = 0.1333608; result[1] = -0.1716091; result[2] = 0.2201060; result[3] = -0.1585989; result[4] = -0.2291074; result[5] = 0.4254351; // input0 T0->resize(in_dims); T0->getImpl()->setRawPtr(input, in_nb_elems); // results Tres->resize(out_dims); Tres->getImpl()->setRawPtr(result, out_nb_elems); op->computeOutputDims(); start = std::chrono::system_clock::now(); REQUIRE_NOTHROW(globAvgPool->forward()); end = std::chrono::system_clock::now(); duration += std::chrono::duration_cast<std::chrono::microseconds>( end - start); REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims()); for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); } REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); delete[] input; delete[] result; } } std::cout << "GlobalAveragePooling total execution time : " << duration.count() << "µs" << std::endl; std::cout << "Number of operations : " << number_of_operation << std::endl; std::cout << "Operation / µs = " << number_of_operation / duration.count() << std::endl; } } } } // namespace Aidge