diff --git a/CMakeLists.txt b/CMakeLists.txt index 51a6ebe10d7b8d03fcb94898de55734dbabf9b0c..229110d9c1a5b8b202a6811a0a2276f91ba6b73a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,10 +7,11 @@ file(READ "${CMAKE_SOURCE_DIR}/project_name.txt" project) message(STATUS "Project name: ${project}") message(STATUS "Project version: ${version}") -# Note : project name is {project} and python module name is also {project} +# Note : project name is {project} and python module name is also {project} set(module_name _${project}) # target name project(${project}) +set(CXX_STANDARD 14) ############################################## # Define options @@ -18,6 +19,7 @@ option(PYBIND "python binding" ON) option(WERROR "Warning as error" OFF) option(TEST "Enable tests" ON) option(COVERAGE "Enable coverage" OFF) +option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memory use (over/underflow, memory leak, ...)" OFF) ############################################## # Import utils CMakeLists @@ -34,7 +36,6 @@ find_package(aidge_core REQUIRED) ############################################## # Create target and set properties - file(GLOB_RECURSE src_files "src/*.cpp") file(GLOB_RECURSE inc_files "include/*.hpp") @@ -43,9 +44,23 @@ target_link_libraries(${module_name} PUBLIC _aidge_core # _ is added because we link the target not the project ) + #Set target properties set_property(TARGET ${module_name} PROPERTY POSITION_INDEPENDENT_CODE ON) +if( ${ENABLE_ASAN} ) + message("Building ${module_name} with ASAN.") + set(SANITIZE_FLAGS -fsanitize=address -fno-omit-frame-pointer) + target_link_libraries(${module_name} + PUBLIC + -fsanitize=address + ) + target_compile_options(${module_name} + PRIVATE + ${SANITIZE_FLAGS} + ) +endif() + target_include_directories(${module_name} PUBLIC $<INSTALL_INTERFACE:include> @@ -60,7 +75,7 @@ if (PYBIND) # Handles Python + pybind11 headers dependencies target_link_libraries(${module_name} - PUBLIC + PUBLIC pybind11::pybind11 PRIVATE Python::Python @@ -99,8 +114,8 @@ install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) install(EXPORT ${project}-targets FILE "${project}-targets.cmake" DESTINATION ${INSTALL_CONFIGDIR} - COMPONENT ${module_name} -) + COMPONENT ${module_name} +) #Create a ConfigVersion.cmake file include(CMakePackageConfigHelpers) diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index 78a317281475bd05ee317127b02cfeddcfd07e49..6b8b7b9208abd95f312ee53e5909f7de2b163624 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -23,6 +23,7 @@ #include "aidge/backend/cpu/operator/ErfImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/backend/cpu/operator/GatherImpl.hpp" +#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp" #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl.hpp" #include "aidge/backend/cpu/operator/MemorizeImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp index 91e2558a7ef1079cbc9fb11f78fab53ef4246149..64f3b3e18f7255b74decad5137cbb5ccd6966123 100644 --- a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp @@ -12,10 +12,10 @@ #ifndef AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ -#include "aidge/utils/Registrar.hpp" #include <algorithm> #include "aidge/backend/cpu/operator/FCImpl.hpp" +#include "aidge/utils/Registrar.hpp" namespace Aidge { // template <class I, class W, class B, class O> diff --git a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..758535de4cc506b8de4adf7004afbbfdd8185941 --- /dev/null +++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp @@ -0,0 +1,55 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_ +#define AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_ + +#include <memory> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/GlobalAveragePooling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// class GlobalAveragePooling_Op; + +class GlobalAveragePoolingImplForward_cpu + : public Registrable< + GlobalAveragePoolingImplForward_cpu, std::tuple<DataType, DataType>, + void(const std::vector<DimSize_t> &, const void *, void *)> {}; + +class GlobalAveragePoolingImplBackward_cpu + : public Registrable< + GlobalAveragePoolingImplBackward_cpu, std::tuple<DataType, DataType>, + void(const std::vector<DimSize_t> &, const void *, void *)> {}; + +class GlobalAveragePoolingImpl_cpu : public OperatorImpl { +public: + GlobalAveragePoolingImpl_cpu(const GlobalAveragePooling_Op &op) + : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<GlobalAveragePoolingImpl_cpu> + create(const GlobalAveragePooling_Op &op) { + return std::make_unique<GlobalAveragePoolingImpl_cpu>(op); + } + + void forward() override; +}; + +namespace { +static Registrar<GlobalAveragePooling_Op> registrarGlobalAveragePoolingImpl_cpu( + "cpu", Aidge::GlobalAveragePoolingImpl_cpu::create); +} +} // namespace Aidge + +#endif /* _AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2bb78b2f4ccacfa1080203efcbc6f9896e464661 --- /dev/null +++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp @@ -0,0 +1,79 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_FORWARD_KERNEL_H_ + +#include <cstddef> +#include <functional> // std::multiplies +#include <numeric> // std::accumulate +#include <vector> + +#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp" +#include "aidge/data/Data.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + + +namespace Aidge { +template <class I, class O> +void GlobalAveragePoolingImpl_cpu_forward_kernel( + const std::vector<DimSize_t> &dims, const void *input_, void *output_) { + // error checking + AIDGE_ASSERT(dims.size() >= 3,"GlobalAveragePool needs at least a 3 dimensions " + "input, number of input dim : {}", + dims.size()); + + // computation + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); + + DimSize_t nb_elems = std::accumulate(dims.begin(), dims.end(), std::size_t(1), + std::multiplies<std::size_t>()); + + const DimSize_t in_batch_nb_elems{nb_elems / dims[0]}; + const DimSize_t in_channel_nb_elems{in_batch_nb_elems / dims[1]}; + const DimSize_t out_batch_nb_elems{dims[1]}; + // parse channel by channel and fill each output with the average of the + // values in the channel + for (DimSize_t batch = 0; batch < dims[0]; ++batch) { + for (DimSize_t channel = 0; channel < dims[1]; ++channel) { + const I *filter_start = std::next( + input, (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems)); + I sum = 0; + for (size_t i = 0; i < in_channel_nb_elems; ++i) { + sum += filter_start[i]; + } + output[batch * out_batch_nb_elems + channel] = + sum / static_cast<I>(in_channel_nb_elems); + } + } +} + +// Then we add the Registrar declaration for different input/output types +namespace { +static Registrar<GlobalAveragePoolingImplForward_cpu> + registrarGlobalAveragePoolingImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, + Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<float, float>); +static Registrar<GlobalAveragePoolingImplForward_cpu> + registrarGlobalAveragePoolingImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, + Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<int, int>); +static Registrar<GlobalAveragePoolingImplForward_cpu> + registrarGlobalAveragePoolingImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, + Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_FORWARD_KERNEL_H_ */ diff --git a/src/operator/GlobalAveragePoolingImpl.cpp b/src/operator/GlobalAveragePoolingImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f7280360a4486fe5db6c4dfdd4c492bbe6ba302b --- /dev/null +++ b/src/operator/GlobalAveragePoolingImpl.cpp @@ -0,0 +1,41 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp" + +#include <functional> +#include <memory> +#include <vector> + +#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp" +#include "aidge/data/Data.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/GlobalAveragePooling.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + + +void Aidge::GlobalAveragePoolingImpl_cpu::forward() +{ + const GlobalAveragePooling_Op& op_ = static_cast<const GlobalAveragePooling_Op&>(mOp); + // Check if input is provided + AIDGE_ASSERT(op_.getInput(0), "missing input 0"); + + // Create the forward kernal with the wanted types + auto kernelFunc = Registrar<GlobalAveragePoolingImplForward_cpu>::create({op_.getInput(0)->dataType(), + op_.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(op_.getInput(0)->dims(), + op_.getInput(0)->getImpl()->rawPtr(), + op_.getOutput(0)->getImpl()->rawPtr()); +} \ No newline at end of file diff --git a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c1db6c5eebcef13df970ec7e9fc415b5cba187a2 --- /dev/null +++ b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp @@ -0,0 +1,565 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <aidge/utils/Types.h> +#include <catch2/catch_test_macros.hpp> +#include <chrono> +#include <cmath> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t +#include <iostream> +#include <memory> +#include <numeric> // std::accumulate +#include <ostream> +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/GlobalAveragePooling.hpp" +#include "aidge/utils/TensorUtils.hpp" + +// debug print function +void print_tensor(Aidge::Tensor &T) { + // Print tensors + std::cout << "Tensor : size =  ["; + for (auto &dim : T.dims()) { + std::cout << dim << " , "; + } + std::cout << "]" << std::endl; + T.print(); +} + +namespace Aidge { +TEST_CASE("[cpu/operator] GlobalAveragePooling", + "[GlobalAveragePooling][CPU]") { + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist( + 0.1f, 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), + std::size_t(10)); + + std::uniform_int_distribution<std::size_t> nbLowDimsDist(std::size_t(1), + std::size_t(2)); + std::uniform_int_distribution<std::size_t> nbHighDimsDist(std::size_t(3), + std::size_t(7)); + + // Create MatGlobalAveragePooling Operator + std::shared_ptr<Node> globAvgPool = GlobalAveragePooling(); + auto op = + std::static_pointer_cast<OperatorTensor>(globAvgPool->getOperator()); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + // Create the input Tensor + std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); + op->associateInput(0, T0); + T0->setDataType(DataType::Float32); + T0->setBackend("cpu"); + + // Create results Tensor + std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(); + Tres->setDataType(DataType::Float32); + Tres->setBackend("cpu"); + + // To measure execution time of 'MatGlobalAveragePooling_Op::forward()' member + // function call + std::chrono::time_point<std::chrono::system_clock> start; + std::chrono::time_point<std::chrono::system_clock> end; + std::chrono::duration<double, std::micro> duration{}; + int number_of_operation{0}; + + SECTION("GlobalAveragePoolingImpl_cpu::forward()") { + SECTION( + "1-2Dim > not enough dimensions leads to function throwing an error") { + // generate a random tensors + const std::size_t nbDims = nbLowDimsDist(gen); + std::vector<std::size_t> dims; + for (std::size_t i = 0; i < nbDims; ++i) { + dims.push_back(dimSizeDist(gen)); + } + const std::size_t nb_elements = + std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), + std::multiplies<std::size_t>()); + + float *array0 = new float[nb_elements]; + for (std::size_t i = 0; i < nb_elements; ++i) { + array0[i] = valueDist(gen); + } + // input0 + T0->resize(dims); + T0->getImpl()->setRawPtr(array0, nb_elements); + + REQUIRE_THROWS(globAvgPool->forward()); + delete[] array0; + } + + SECTION("3+Dim") { + SECTION("Fill a tensor with all values set as N will result with every " + "output being N") { + // generate the tensor + const std::size_t nbDims = nbHighDimsDist(gen); + std::vector<std::size_t> dims_in; + for (std::size_t i = 0; i < nbDims; ++i) { + dims_in.push_back(dimSizeDist(gen)); + } + // create in nb_elems + const std::size_t in_nb_elems = + std::accumulate(dims_in.cbegin(), dims_in.cend(), std::size_t(1), + std::multiplies<std::size_t>()); + const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0]; + const DimSize_t in_channel_nb_elems = in_batch_nb_elems / dims_in[1]; + + number_of_operation += + in_nb_elems + + dims_in[1]; // averaging per channel : 1 addition per element in + // the channel + 1 division this for every batch + // create out nb_elems + std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]}; + const std::size_t out_nb_elems = + std::accumulate(dims_out.cbegin(), dims_out.cend(), std::size_t(1), + std::multiplies<std::size_t>()); + const DimSize_t out_batch_nb_elems = out_nb_elems / dims_out[0]; + + // iterate over each batch/channel + float *array0 = new float[in_nb_elems]; + float *result = new float[out_nb_elems]; + float val = valueDist(gen); + for (std::size_t batch = 0; batch < dims_in[0]; ++batch) { + for (std::size_t channel = 0; channel < dims_in[1]; ++channel) { + for (std::size_t i = 0; i < in_channel_nb_elems; ++i) + + { + array0[batch * in_batch_nb_elems + channel * in_channel_nb_elems + + i] = val; + } + result[batch * out_batch_nb_elems + channel] = val; + } + } + + // input0 + T0->resize(dims_in); + T0->getImpl()->setRawPtr(array0, in_nb_elems); + + // results + Tres->resize(dims_out); + Tres->getImpl()->setRawPtr(result, out_nb_elems); + + op->computeOutputDims(); + start = std::chrono::system_clock::now(); + REQUIRE_NOTHROW(globAvgPool->forward()); + end = std::chrono::system_clock::now(); + duration += + std::chrono::duration_cast<std::chrono::microseconds>(end - start); + + REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims()); + for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { + REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); + } + + REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); + + delete[] array0; + delete[] result; + } + + SECTION("random testing") { + for (int trial = 0; trial < NBTRIALS; ++trial) { + // generate the tensor + const std::size_t nbDims = nbHighDimsDist(gen); + std::vector<std::size_t> dims_in; + for (std::size_t i = 0; i < nbDims; ++i) { + dims_in.push_back(dimSizeDist(gen)); + } + // create in nb_elems + const std::size_t in_nb_elems = + std::accumulate(dims_in.cbegin(), dims_in.cend(), std::size_t(1), + std::multiplies<std::size_t>()); + const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0]; + const DimSize_t in_channel_nb_elems = in_batch_nb_elems / dims_in[1]; + number_of_operation += + in_nb_elems + + dims_in[1]; // averaging per channel : 1 addition per element in + // the channel + 1 division this for every batch + + // create out nb_elems + std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]}; + const std::size_t out_nb_elems = + std::accumulate(dims_out.cbegin(), dims_out.cend(), + std::size_t(1), std::multiplies<std::size_t>()); + const DimSize_t out_batch_nb_elems = out_nb_elems / dims_out[0]; + + // iterate over each batch/channel + float *array0 = new float[in_nb_elems]; + float *result = new float[out_nb_elems]; + for (std::size_t batch = 0; batch < dims_in[0]; ++batch) { + for (std::size_t channel = 0; channel < dims_in[1]; ++channel) { + float channel_sum = 0; + for (std::size_t i = 0; i < in_channel_nb_elems; ++i) + + { + float val = valueDist(gen); + array0[batch * in_batch_nb_elems + + channel * in_channel_nb_elems + i] = val; + channel_sum += val; + } + result[batch * out_batch_nb_elems + channel] = + channel_sum / in_channel_nb_elems; + } + } + + // input0 + T0->resize(dims_in); + T0->getImpl()->setRawPtr(array0, in_nb_elems); + + // results + Tres->resize(dims_out); + Tres->getImpl()->setRawPtr(result, out_nb_elems); + + op->computeOutputDims(); + start = std::chrono::system_clock::now(); + REQUIRE_NOTHROW(globAvgPool->forward()); + end = std::chrono::system_clock::now(); + duration += std::chrono::duration_cast<std::chrono::microseconds>( + end - start); + + REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims()); + for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { + REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); + } + + REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); + + delete[] array0; + delete[] result; + } + } + SECTION("Using result from a pytorch function as groundtruth") { + DimSize_t batch_size = 2; + DimSize_t channels = 3; + DimSize_t height = 4; + DimSize_t width = 3; + DimSize_t depth = 2; + + SECTION("2D_img") { + const std::vector<DimSize_t> in_dims{batch_size, channels, height, + width}; + const std::vector<DimSize_t> out_dims{batch_size, channels}; + DimSize_t in_nb_elems = batch_size * channels * height * width; + DimSize_t out_nb_elems = batch_size * channels; + number_of_operation += + in_nb_elems + + channels; // averaging per channel : 1 addition per element in + // the channel + 1 division this for every batch + auto input = new float[in_nb_elems]; + auto result = new float[out_nb_elems]; + input[0] = 0.1807716; + input[1] = -0.0699881; + input[2] = -0.3596235; + input[3] = -0.9152045; + input[4] = 0.6257653; + input[5] = 0.0255099; + input[6] = 0.9545137; + input[7] = 0.0643485; + input[8] = 0.3611506; + input[9] = 1.1678782; + input[10] = -1.3498932; + input[11] = -0.5101767; + input[12] = 0.2359577; + input[13] = -0.2397784; + input[14] = -0.9211147; + input[15] = 1.5432971; + input[16] = 1.3488258; + input[17] = -0.1396417; + input[18] = 0.2857972; + input[19] = 0.9651205; + input[20] = -2.0371499; + input[21] = 0.4931363; + input[22] = 1.4869986; + input[23] = 0.5910330; + input[24] = 0.1260297; + input[25] = -1.5626874; + input[26] = -1.1601028; + input[27] = -0.3348408; + input[28] = 0.4477722; + input[29] = -0.8016447; + input[30] = 1.5236114; + input[31] = 2.5085869; + input[32] = -0.6630959; + input[33] = -0.2512752; + input[34] = 1.0101448; + input[35] = 0.1215468; + input[36] = 0.1583993; + input[37] = 1.1340188; + input[38] = -1.1538976; + input[39] = -0.2983968; + input[40] = -0.5075365; + input[41] = -0.9239212; + input[42] = 0.5467061; + input[43] = -1.4947776; + input[44] = -1.2057148; + input[45] = 0.5718198; + input[46] = -0.5973545; + input[47] = -0.6936757; + input[48] = 1.6455388; + input[49] = -0.8029931; + input[50] = 1.3514109; + input[51] = -0.2759193; + input[52] = -1.5108346; + input[53] = 2.1047730; + input[54] = 2.7629590; + input[55] = -1.7465292; + input[56] = 0.8353187; + input[57] = -1.9560477; + input[58] = -0.8002653; + input[59] = -0.5044988; + input[60] = -0.0711742; + input[61] = -0.5130699; + input[62] = -1.0307810; + input[63] = 0.9154347; + input[64] = -0.2282317; + input[65] = -0.6884708; + input[66] = 0.1832259; + input[67] = 0.6003584; + input[68] = -1.5429375; + input[69] = -0.3465560; + input[70] = -0.1476223; + input[71] = 0.6469797; + + result[0] = 0.0145876; + result[1] = 0.3010401; + result[2] = 0.0803371; + + result[3] = -0.3720275; + result[4] = 0.0919094; + result[5] = -0.1852371; + + // input0 + T0->resize(in_dims); + T0->getImpl()->setRawPtr(input, in_nb_elems); + + // results + Tres->resize(out_dims); + Tres->getImpl()->setRawPtr(result, out_nb_elems); + op->computeOutputDims(); + start = std::chrono::system_clock::now(); + REQUIRE_NOTHROW(globAvgPool->forward()); + end = std::chrono::system_clock::now(); + duration += std::chrono::duration_cast<std::chrono::microseconds>( + end - start); + + REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims()); + for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { + REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); + } + REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); + delete[] input; + delete[] result; + } + SECTION("3D_img") { + const std::vector<DimSize_t> in_dims{batch_size, channels, height, + width, depth}; + const std::vector<DimSize_t> out_dims{batch_size, channels}; + DimSize_t in_nb_elems = + batch_size * channels * height * width * depth; + number_of_operation += + in_nb_elems + + channels; // averaging per channel : 1 addition per element in + // the channel + 1 division this for every batch + DimSize_t out_nb_elems = batch_size * channels; + auto input = new float[in_nb_elems]; + auto result = new float[out_nb_elems]; + input[0] = 0.0061403; + input[1] = -0.9665052; + input[2] = 0.3582928; + input[3] = 0.1072854; + input[4] = 1.2463317; + input[5] = 1.2460036; + input[6] = 0.3534451; + input[7] = 0.9425349; + input[8] = -0.2103887; + input[9] = -0.7959853; + input[10] = 0.1297970; + input[11] = -1.9445597; + input[12] = 0.0609514; + input[13] = -0.2379328; + input[14] = 1.9020044; + input[15] = -1.1762751; + input[16] = 0.3404147; + input[17] = 1.1685153; + input[18] = -0.6526139; + input[19] = 0.3767620; + input[20] = 0.1887376; + input[21] = 0.5154487; + input[22] = 0.6371427; + input[23] = -0.3948864; + input[24] = -1.1571540; + input[25] = 0.2896117; + input[26] = 0.6163548; + input[27] = -0.4370409; + input[28] = 0.6589766; + input[29] = 0.6587803; + input[30] = -1.3702172; + input[31] = -1.6210355; + input[32] = 0.5872851; + input[33] = 0.2860694; + input[34] = 0.0082870; + input[35] = -0.2523253; + input[36] = -1.3247224; + input[37] = 0.1891782; + input[38] = 0.0211001; + input[39] = 0.9404197; + input[40] = -0.5576900; + input[41] = -0.6939272; + input[42] = -0.3252473; + input[43] = 1.2439330; + input[44] = -1.1671864; + input[45] = -0.4091243; + input[46] = 1.2600617; + input[47] = -1.5630058; + input[48] = 1.1346143; + input[49] = -0.0823837; + input[50] = 0.2893163; + input[51] = 0.8357732; + input[52] = -0.2449911; + input[53] = 0.2712233; + input[54] = 0.0936364; + input[55] = -0.8834321; + input[56] = -0.3274170; + input[57] = 0.0783938; + input[58] = -0.3807656; + input[59] = 0.3775077; + input[60] = 0.1119123; + input[61] = 2.3142793; + input[62] = -0.7989057; + input[63] = -0.5643027; + input[64] = -1.1346605; + input[65] = 0.1705271; + input[66] = 0.9946650; + input[67] = 1.2625724; + input[68] = 1.6218156; + input[69] = 1.0774711; + input[70] = 0.5947813; + input[71] = -1.5290873; + input[72] = 2.0437069; + input[73] = -0.1656267; + input[74] = 0.0870704; + input[75] = -0.5276564; + input[76] = -0.1002882; + input[77] = 1.0539219; + input[78] = -0.6230739; + input[79] = -1.5905718; + input[80] = -0.9741858; + input[81] = -0.1869211; + input[82] = 0.5816050; + input[83] = -2.6339815; + input[84] = -1.0764544; + input[85] = 2.5903966; + input[86] = 0.4940658; + input[87] = 0.4671729; + input[88] = 0.6588292; + input[89] = -0.7257792; + input[90] = 1.4280071; + input[91] = -1.2187740; + input[92] = 0.7380729; + input[93] = -1.1599953; + input[94] = -1.4355115; + input[95] = -1.5304037; + input[96] = 0.8474578; + input[97] = 0.0774260; + input[98] = 0.5433396; + input[99] = -0.8438400; + input[100] = -0.1089903; + input[101] = -0.6354192; + input[102] = 0.8772392; + input[103] = 0.2844733; + input[104] = 0.0975270; + input[105] = -0.9785872; + input[106] = -0.4320499; + input[107] = -1.4937501; + input[108] = -2.0644901; + input[109] = 0.0851217; + input[110] = 0.6644159; + input[111] = 0.4168026; + input[112] = 0.0958830; + input[113] = -1.5699565; + input[114] = 0.3739572; + input[115] = -0.1420672; + input[116] = -0.7864021; + input[117] = 0.2443752; + input[118] = -0.9811850; + input[119] = -0.0698569; + input[120] = 0.1463890; + input[121] = 0.2536245; + input[122] = 0.2136150; + input[123] = 0.3113698; + input[124] = 1.8353856; + input[125] = 1.4473228; + input[126] = -0.7373698; + input[127] = 0.2485314; + input[128] = -0.4789796; + input[129] = -0.3396149; + input[130] = 0.6438198; + input[131] = 0.7287521; + input[132] = -1.5119252; + input[133] = -0.1006494; + input[134] = 1.8955028; + input[135] = 1.0871323; + input[136] = 0.3620502; + input[137] = -0.8826663; + input[138] = 1.2220223; + input[139] = -1.2817260; + input[140] = 1.4153577; + input[141] = 0.4148015; + input[142] = 1.3458617; + input[143] = 1.9718349; + + result[0] = 0.1333608; + result[1] = -0.1716091; + result[2] = 0.2201060; + result[3] = -0.1585989; + result[4] = -0.2291074; + result[5] = 0.4254351; + + // input0 + T0->resize(in_dims); + T0->getImpl()->setRawPtr(input, in_nb_elems); + + // results + Tres->resize(out_dims); + Tres->getImpl()->setRawPtr(result, out_nb_elems); + op->computeOutputDims(); + start = std::chrono::system_clock::now(); + REQUIRE_NOTHROW(globAvgPool->forward()); + end = std::chrono::system_clock::now(); + duration += std::chrono::duration_cast<std::chrono::microseconds>( + end - start); + + REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims()); + for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) { + REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); + } + REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); + delete[] input; + delete[] result; + } + } + std::cout << "GlobalAveragePooling total execution time : " + << duration.count() << "µs" << std::endl; + std::cout << "Number of operations : " << number_of_operation + << std::endl; + std::cout << "Operation / µs = " << number_of_operation / duration.count() + << std::endl; + } + } +} +} // namespace Aidge