diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 651acd6fe1a58edc0b6f2c446e48e4bc4e4a8750..3efb308fa0f78dce35973ccb47d1303d7c8634af 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -10,9 +10,12 @@ stages: - build # Unit test stage - test + # Code coverage + - coverage include: - local: '/.gitlab/ci/_global.gitlab-ci.yml' - local: '/.gitlab/ci/static_analysis.gitlab-ci.yml' - local: '/.gitlab/ci/build.gitlab-ci.yml' - local: '/.gitlab/ci/test.gitlab-ci.yml' + - local: '/.gitlab/ci/coverage.gitlab-ci.yml' diff --git a/.gitlab/ci/_global.gitlab-ci.yml b/.gitlab/ci/_global.gitlab-ci.yml index 6f34fe701df035e68ce49825fde0ff88449a9637..1615b8974db11d93cb3305ce800e46cf5377bc33 100644 --- a/.gitlab/ci/_global.gitlab-ci.yml +++ b/.gitlab/ci/_global.gitlab-ci.yml @@ -9,5 +9,8 @@ variables: GIT_SSL_NO_VERIFY: 1 DEBIAN_FRONTEND: noninteractive - -image: n2d2-ci/ubuntu20.04/cpu:latest \ No newline at end of file +default: + image: nvidia/cuda:12.2.0-devel-ubuntu22.04 + before_script: + - apt update + - apt install -y cmake cppcheck python-is-python3 pip git gcovr unzip curl diff --git a/.gitlab/ci/build.gitlab-ci.yml b/.gitlab/ci/build.gitlab-ci.yml index 620bc325977dc6f5dd2372f6f48ed8cc688b3388..68fcb6b4bc0dac08c4f0029ec1f2d3404226c1c2 100644 --- a/.gitlab/ci/build.gitlab-ci.yml +++ b/.gitlab/ci/build.gitlab-ci.yml @@ -1,61 +1,86 @@ build:ubuntu_cpp: stage: build + needs: [] tags: - docker - image: n2d2-ci/ubuntu20.04/cpu:latest - script: - - INSTALL_PATH="$CI_PROJECT_DIR/install_cpp" - - mkdir -p $INSTALL_PATH - - mkdir -p build_cpp + # Download dependencies + # aidge_core + - 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"' + - unzip -o build_artifacts.zip -d . + - rm -rf build_cpp - # Clone and compile dependencies - - MODULE_NAME="aidge_core" - - BASE_URL=`echo $CI_REPOSITORY_URL | sed "s;\/*$CI_PROJECT_PATH.*;;"` - - REPO_URL="$BASE_URL/aidge/$MODULE_NAME.git" - - git clone $REPO_URL $MODULE_NAME - - mkdir -p $MODULE_NAME/build - - cd $MODULE_NAME/build - - cmake -DCMAKE_INSTALL_PREFIX:PATH=$INSTALL_PATH -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON .. - - make -j4 all install - - cd ../.. - # Build current module + - export CMAKE_PREFIX_PATH=../install_cpp + - mkdir -p build_cpp - cd build_cpp - - cmake -DCMAKE_INSTALL_PREFIX:PATH=$INSTALL_PATH -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON .. + - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON .. - make -j4 all install artifacts: + expire_in: 1 week paths: - build_cpp/ - install_cpp/ build:ubuntu_python: stage: build + needs: [] tags: - docker - image: n2d2-ci/ubuntu20.04/cpu:latest - script: - - export AIDGE_INSTALL=`pwd`/install + # Download dependencies + # aidge_core (CPP) + - 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"' + - unzip -o build_artifacts.zip -d . + - rm -rf build_cpp + # aidge_core (Python) + - 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_python"' + - unzip -o build_artifacts.zip -d . - # Create virtaul env - python3 -m pip install virtualenv - virtualenv venv - source venv/bin/activate + - export AIDGE_INSTALL=`pwd`/install + - export CMAKE_PREFIX_PATH=../install_cpp + - python3 -m pip install . + artifacts: + expire_in: 1 week + paths: + - venv/ - # Clone dependencies - - MODULE_NAME="aidge_core" - - BASE_URL=`echo $CI_REPOSITORY_URL | sed "s;\/*$CI_PROJECT_PATH.*;;"` - - REPO_URL="$BASE_URL/aidge/$MODULE_NAME.git" - - git clone $REPO_URL $MODULE_NAME +build:windows_cpp: + stage: build + needs: [] + tags: + - windows + + image: buildtools + before_script: + # Install Chocolatey + - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) + # Install dependencies + - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y + - choco install git -Y + - choco install python -Y + # Update PATH + - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") + script: + # Download dependencies + # aidge_core + - 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_cpp" -o build_artifacts.zip' + - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force + - Remove-Item .\build_cpp\ -Recurse - # Pip install dependancy - - cd $MODULE_NAME - - python3 -m pip install . -v + - $env:CMAKE_PREFIX_PATH = '../install_cpp' + - mkdir -p build_cpp + - cd build_cpp + - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug .. + - cmake --build . -j2 + - cmake --install . --config Debug - - cd .. - - python3 -m pip install . -v artifacts: + expire_in: 1 week paths: - - venv/ \ No newline at end of file + - build_cpp/ + - install_cpp/ diff --git a/.gitlab/ci/coverage.gitlab-ci.yml b/.gitlab/ci/coverage.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..33547fc3f52771c456fba3d34a6e8d96eebafd8a --- /dev/null +++ b/.gitlab/ci/coverage.gitlab-ci.yml @@ -0,0 +1,41 @@ +coverage:ubuntu_cpp: + stage: coverage + needs: ["build:ubuntu_cpp"] + tags: + - docker + script: + - cd build_cpp + - ctest --output-on-failure + # HTML report for visualization + - gcovr --html-details --exclude-unreachable-branches -o coverage.html --root ${CI_PROJECT_DIR} --filter '\.\./include/' --filter '\.\./src/' + # Coberta XML report for Gitlab integration + - gcovr --xml-pretty --exclude-unreachable-branches --print-summary -o coverage.xml --root ${CI_PROJECT_DIR} --filter '\.\./include/' --filter '\.\./src/' + coverage: /^\s*lines:\s*\d+.\d+\%/ + artifacts: + name: ${CI_JOB_NAME}-${CI_COMMIT_REF_NAME}-${CI_COMMIT_SHA} + expire_in: 2 days + reports: + coverage_report: + coverage_format: cobertura + path: build_cpp/coverage.xml + +coverage:ubuntu_python: + stage: coverage + needs: ["build:ubuntu_python"] + tags: + - docker + script: + - source venv/bin/activate + - python3 -m pip install numpy coverage + - cd ${CI_PROJECT_NAME} + # Retrieve the installation path of the module, since it is installed with pip. + - export MODULE_LOCATION=`python -c "import ${CI_PROJECT_NAME} as _; print(_.__path__[0])"` + - python3 -m coverage run --source=$MODULE_LOCATION -m unittest discover -s unit_tests/ -v -b + - python3 -m coverage report + - python3 -m coverage xml + coverage: '/(?i)total.*? (100(?:\.0+)?\%|[1-9]?\d(?:\.\d+)?\%)$/' + artifacts: + reports: + coverage_report: + coverage_format: cobertura + path: ${CI_PROJECT_NAME}/coverage.xml diff --git a/.gitlab/ci/static_analysis.gitlab-ci.yml b/.gitlab/ci/static_analysis.gitlab-ci.yml index 7490b5af51ca970c1b892408ac0023d8cd945cfa..0ea9b711885442e7f260ae86e313464b592127a0 100644 --- a/.gitlab/ci/static_analysis.gitlab-ci.yml +++ b/.gitlab/ci/static_analysis.gitlab-ci.yml @@ -26,8 +26,8 @@ static_analysis:python: script: - pip install pylint - pip install pylint-gitlab - - pylint --rcfile=.pylintrc --exit-zero --output-format=pylint_gitlab.GitlabCodeClimateReporter aidge_backend_cpu/ > codeclimate.json - - pylint --rcfile=.pylintrc --exit-zero --output-format=pylint_gitlab.GitlabPagesHtmlReporter aidge_backend_cpu/ > pylint.html + - pylint --rcfile=.pylintrc --exit-zero --output-format=pylint_gitlab.GitlabCodeClimateReporter ${CI_PROJECT_NAME}/ > codeclimate.json + - pylint --rcfile=.pylintrc --exit-zero --output-format=pylint_gitlab.GitlabPagesHtmlReporter ${CI_PROJECT_NAME}/ > pylint.html - mkdir -p public/python/$CI_COMMIT_REF_NAME - mv pylint.html public/python/$CI_COMMIT_REF_NAME/ artifacts: diff --git a/.gitlab/ci/test.gitlab-ci.yml b/.gitlab/ci/test.gitlab-ci.yml index 2ad635dff79715f2e3a487b0a41d6f1b132ff641..05f567dd7430b0d3a801612ca5353a39288285d2 100644 --- a/.gitlab/ci/test.gitlab-ci.yml +++ b/.gitlab/ci/test.gitlab-ci.yml @@ -3,22 +3,47 @@ test:ubuntu_cpp: needs: ["build:ubuntu_cpp"] tags: - docker - image: n2d2-ci/ubuntu20.04/cpu:latest script: - cd build_cpp - - ctest --output-on-failure + - ctest --output-junit ctest-results.xml --output-on-failure + artifacts: + reports: + junit: build_cpp/ctest-results.xml test:ubuntu_python: stage: test needs: ["build:ubuntu_python"] tags: - docker - image: n2d2-ci/ubuntu20.04/cpu:latest script: - source venv/bin/activate - - cd aidge_backend_cpu - - python3 -m pip install numpy + - cd ${CI_PROJECT_NAME} + - python3 -m pip install numpy unittest-xml-reporting - python3 -m pip list # Run on discovery all tests located in core/unit_tests/python and discard the stdout # only to show the errors/warnings and the results of the tests - - python3 -m unittest discover -s unit_tests/ -v -b 1> /dev/null + - python3 -m xmlrunner discover -s unit_tests/ -v -b --output-file xmlrunner-results.xml + artifacts: + reports: + junit: ${CI_PROJECT_NAME}/xmlrunner-results.xml + +test:windows_cpp: + stage: test + needs: ["build:windows_cpp"] + tags: + - windows + image: buildtools + before_script: + # Install Chocolatey + - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) + # Install dependencies + - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y + - choco install python -Y + # Update PATH + - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") + script: + - cd build_cpp + - ctest --output-junit ctest-results.xml --output-on-failure + artifacts: + reports: + junit: build_cpp/ctest-results.xml diff --git a/CMakeLists.txt b/CMakeLists.txt index d973a04ec5136347c3ddc7fc92989e65b0f34a42..51ee1f6d5df771fcccd1b05a45861eb2f1d3bbbe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,17 @@ include(PybindModuleCreation) # Define options option(PYBIND "python binding" ON) option(WERROR "Warning as error" OFF) +option(TEST "Enable tests" ON) +option(COVERAGE "Enable coverage" OFF) + +############################################## +# Import utils CMakeLists +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") +include(PybindModuleCreation) + +if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) + Include(CodeCoverage) +endif() ############################################## # Find system dependencies @@ -49,9 +60,9 @@ target_include_directories(${module_name} ) # PYTHON BINDING -generate_python_binding(${project} ${module_name}) - if (PYBIND) + generate_python_binding(${project} ${module_name}) + # Handles Python + pybind11 headers dependencies target_link_libraries(${module_name} PUBLIC @@ -63,20 +74,15 @@ endif() target_compile_features(${module_name} PRIVATE cxx_std_14) -if(WERROR) - target_compile_options(${module_name} PRIVATE +target_compile_options(${module_name} PRIVATE $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>: - -Wall -Wextra -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow -Werror>) - target_compile_options(${module_name} PRIVATE + -Wall -Wextra -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow $<$<BOOL:${WERROR}>:-Werror>>) +target_compile_options(${module_name} PRIVATE $<$<CXX_COMPILER_ID:MSVC>: /W4>) -else() - target_compile_options(${module_name} PRIVATE - $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>: - -Wall -Wextra -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow -Wpedantic>) - target_compile_options(${module_name} PRIVATE - $<$<CXX_COMPILER_ID:MSVC>: - /W4>) + +if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) + append_coverage_compiler_flags() endif() ############################################## @@ -129,6 +135,7 @@ export(EXPORT ${project}-targets ############################################## ## Add test -enable_testing() -add_subdirectory(unit_tests) - +if(TEST) + enable_testing() + add_subdirectory(unit_tests) +endif() diff --git a/README.md b/README.md index 0a0fe37f8672fde09055d3356951579bd1b56d6c..74eb50826bf6f88a0ded363138adba04827390d0 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +   + # Aidge CPU library You can find in this folder the library that implements the CPU operators. <br> diff --git a/aidge_backend_cpu/unit_tests/test_scheduler.py b/aidge_backend_cpu/unit_tests/test_scheduler.py index bc76620390b7563f0088f4c600b612bbe827b170..d8cf3e164da4bd34273905b0b0e156cf057635a5 100644 --- a/aidge_backend_cpu/unit_tests/test_scheduler.py +++ b/aidge_backend_cpu/unit_tests/test_scheduler.py @@ -36,7 +36,54 @@ class test_scheduler(unittest.TestCase): for i in range(len(expected_out)): self.assertEqual(expected_out[i], out_tensor[i]) + def test_sequential_scheduling(self): + input_data = np.array([]).astype(np.float32) + input_tensor = aidge_core.Tensor(input_data) + input_node = aidge_core.Producer(input_tensor, "X") + + graph_view = aidge_core.sequential([ + aidge_core.FC(50, name='0'), + aidge_core.FC(50, name='1'), + aidge_core.FC(10, name='2'), + ]) + EXPECTED_SCHEDULE = ['0', '1', '2'] + + input_node.add_child(graph_view) + input_node.get_operator().set_datatype(aidge_core.DataType.Float32) + input_node.get_operator().set_backend("cpu") + graph_view.set_datatype(aidge_core.DataType.Float32) + graph_view.set_backend("cpu") + + scheduler = aidge_core.SequentialScheduler(graph_view) + scheduler.generate_scheduling() + + self.assertListEqual([i.name() for i in scheduler.get_static_scheduling()], EXPECTED_SCHEDULE) + + + def test_parallel_scheduling(self): + input_data = np.array([]).astype(np.float32) + input_tensor = aidge_core.Tensor(input_data) + + input_node = aidge_core.Producer(input_tensor, "X") + graph_view = aidge_core.sequential([ + aidge_core.FC(50, name='0'), + aidge_core.parallel([aidge_core.FC(50, name='1'), aidge_core.FC(50, name='3')]), + aidge_core.Add(name='2'), + ]) + + EXPECTED_SCHEDULE = [['0', '1', '3', '2'], ['0', '3', '1', '2']] # Both scheduling are valid ! + + input_node.add_child(graph_view) + input_node.get_operator().set_datatype(aidge_core.DataType.Float32) + input_node.get_operator().set_backend("cpu") + graph_view.set_datatype(aidge_core.DataType.Float32) + graph_view.set_backend("cpu") + + scheduler = aidge_core.SequentialScheduler(graph_view) + scheduler.generate_scheduling() + + self.assertTrue([i.name() for i in scheduler.get_static_scheduling()] in EXPECTED_SCHEDULE) if __name__ == '__main__': unittest.main() diff --git a/cmake/CodeCoverage.cmake b/cmake/CodeCoverage.cmake new file mode 100644 index 0000000000000000000000000000000000000000..d4a039fd0e511238df1c0e0502c7588409099289 --- /dev/null +++ b/cmake/CodeCoverage.cmake @@ -0,0 +1,742 @@ +# Copyright (c) 2012 - 2017, Lars Bilke +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# CHANGES: +# +# 2012-01-31, Lars Bilke +# - Enable Code Coverage +# +# 2013-09-17, Joakim Söderberg +# - Added support for Clang. +# - Some additional usage instructions. +# +# 2016-02-03, Lars Bilke +# - Refactored functions to use named parameters +# +# 2017-06-02, Lars Bilke +# - Merged with modified version from github.com/ufz/ogs +# +# 2019-05-06, Anatolii Kurotych +# - Remove unnecessary --coverage flag +# +# 2019-12-13, FeRD (Frank Dana) +# - Deprecate COVERAGE_LCOVR_EXCLUDES and COVERAGE_GCOVR_EXCLUDES lists in favor +# of tool-agnostic COVERAGE_EXCLUDES variable, or EXCLUDE setup arguments. +# - CMake 3.4+: All excludes can be specified relative to BASE_DIRECTORY +# - All setup functions: accept BASE_DIRECTORY, EXCLUDE list +# - Set lcov basedir with -b argument +# - Add automatic --demangle-cpp in lcovr, if 'c++filt' is available (can be +# overridden with NO_DEMANGLE option in setup_target_for_coverage_lcovr().) +# - Delete output dir, .info file on 'make clean' +# - Remove Python detection, since version mismatches will break gcovr +# - Minor cleanup (lowercase function names, update examples...) +# +# 2019-12-19, FeRD (Frank Dana) +# - Rename Lcov outputs, make filtered file canonical, fix cleanup for targets +# +# 2020-01-19, Bob Apthorpe +# - Added gfortran support +# +# 2020-02-17, FeRD (Frank Dana) +# - Make all add_custom_target()s VERBATIM to auto-escape wildcard characters +# in EXCLUDEs, and remove manual escaping from gcovr targets +# +# 2021-01-19, Robin Mueller +# - Add CODE_COVERAGE_VERBOSE option which will allow to print out commands which are run +# - Added the option for users to set the GCOVR_ADDITIONAL_ARGS variable to supply additional +# flags to the gcovr command +# +# 2020-05-04, Mihchael Davis +# - Add -fprofile-abs-path to make gcno files contain absolute paths +# - Fix BASE_DIRECTORY not working when defined +# - Change BYPRODUCT from folder to index.html to stop ninja from complaining about double defines +# +# 2021-05-10, Martin Stump +# - Check if the generator is multi-config before warning about non-Debug builds +# +# 2022-02-22, Marko Wehle +# - Change gcovr output from -o <filename> for --xml <filename> and --html <filename> output respectively. +# This will allow for Multiple Output Formats at the same time by making use of GCOVR_ADDITIONAL_ARGS, e.g. GCOVR_ADDITIONAL_ARGS "--txt". +# +# 2022-09-28, Sebastian Mueller +# - fix append_coverage_compiler_flags_to_target to correctly add flags +# - replace "-fprofile-arcs -ftest-coverage" with "--coverage" (equivalent) +# +# USAGE: +# +# 1. Copy this file into your cmake modules path. +# +# 2. Add the following line to your CMakeLists.txt (best inside an if-condition +# using a CMake option() to enable it just optionally): +# include(CodeCoverage) +# +# 3. Append necessary compiler flags for all supported source files: +# append_coverage_compiler_flags() +# Or for specific target: +# append_coverage_compiler_flags_to_target(YOUR_TARGET_NAME) +# +# 3.a (OPTIONAL) Set appropriate optimization flags, e.g. -O0, -O1 or -Og +# +# 4. If you need to exclude additional directories from the report, specify them +# using full paths in the COVERAGE_EXCLUDES variable before calling +# setup_target_for_coverage_*(). +# Example: +# set(COVERAGE_EXCLUDES +# '${PROJECT_SOURCE_DIR}/src/dir1/*' +# '/path/to/my/src/dir2/*') +# Or, use the EXCLUDE argument to setup_target_for_coverage_*(). +# Example: +# setup_target_for_coverage_lcov( +# NAME coverage +# EXECUTABLE testrunner +# EXCLUDE "${PROJECT_SOURCE_DIR}/src/dir1/*" "/path/to/my/src/dir2/*") +# +# 4.a NOTE: With CMake 3.4+, COVERAGE_EXCLUDES or EXCLUDE can also be set +# relative to the BASE_DIRECTORY (default: PROJECT_SOURCE_DIR) +# Example: +# set(COVERAGE_EXCLUDES "dir1/*") +# setup_target_for_coverage_gcovr_html( +# NAME coverage +# EXECUTABLE testrunner +# BASE_DIRECTORY "${PROJECT_SOURCE_DIR}/src" +# EXCLUDE "dir2/*") +# +# 5. Use the functions described below to create a custom make target which +# runs your test executable and produces a code coverage report. +# +# 6. Build a Debug build: +# cmake -DCMAKE_BUILD_TYPE=Debug .. +# make +# make my_coverage_target +# + +include(CMakeParseArguments) + +option(CODE_COVERAGE_VERBOSE "Verbose information" FALSE) + +# Check prereqs +find_program( GCOV_PATH gcov ) +find_program( LCOV_PATH NAMES lcov lcov.bat lcov.exe lcov.perl) +find_program( FASTCOV_PATH NAMES fastcov fastcov.py ) +find_program( GENHTML_PATH NAMES genhtml genhtml.perl genhtml.bat ) +find_program( GCOVR_PATH gcovr PATHS ${CMAKE_SOURCE_DIR}/scripts/test) +find_program( CPPFILT_PATH NAMES c++filt ) + +if(NOT GCOV_PATH) + message(FATAL_ERROR "gcov not found! Aborting...") +endif() # NOT GCOV_PATH + +# Check supported compiler (Clang, GNU and Flang) +get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) +foreach(LANG ${LANGUAGES}) + if("${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(Apple)?[Cc]lang") + if("${CMAKE_${LANG}_COMPILER_VERSION}" VERSION_LESS 3) + message(FATAL_ERROR "Clang version must be 3.0.0 or greater! Aborting...") + endif() + elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU" + AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang") + message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...") + endif() +endforeach() + +set(COVERAGE_COMPILER_FLAGS "-g --coverage" + CACHE INTERNAL "") +if(CMAKE_CXX_COMPILER_ID MATCHES "(GNU|Clang)") + include(CheckCXXCompilerFlag) + check_cxx_compiler_flag(-fprofile-abs-path HAVE_fprofile_abs_path) + if(HAVE_fprofile_abs_path) + set(COVERAGE_COMPILER_FLAGS "${COVERAGE_COMPILER_FLAGS} -fprofile-abs-path") + endif() +endif() + +set(CMAKE_Fortran_FLAGS_COVERAGE + ${COVERAGE_COMPILER_FLAGS} + CACHE STRING "Flags used by the Fortran compiler during coverage builds." + FORCE ) +set(CMAKE_CXX_FLAGS_COVERAGE + ${COVERAGE_COMPILER_FLAGS} + CACHE STRING "Flags used by the C++ compiler during coverage builds." + FORCE ) +set(CMAKE_C_FLAGS_COVERAGE + ${COVERAGE_COMPILER_FLAGS} + CACHE STRING "Flags used by the C compiler during coverage builds." + FORCE ) +set(CMAKE_EXE_LINKER_FLAGS_COVERAGE + "" + CACHE STRING "Flags used for linking binaries during coverage builds." + FORCE ) +set(CMAKE_SHARED_LINKER_FLAGS_COVERAGE + "" + CACHE STRING "Flags used by the shared libraries linker during coverage builds." + FORCE ) +mark_as_advanced( + CMAKE_Fortran_FLAGS_COVERAGE + CMAKE_CXX_FLAGS_COVERAGE + CMAKE_C_FLAGS_COVERAGE + CMAKE_EXE_LINKER_FLAGS_COVERAGE + CMAKE_SHARED_LINKER_FLAGS_COVERAGE ) + +get_property(GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) +if(NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR GENERATOR_IS_MULTI_CONFIG)) + message(WARNING "Code coverage results with an optimised (non-Debug) build may be misleading") +endif() # NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR GENERATOR_IS_MULTI_CONFIG) + +if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + link_libraries(gcov) +endif() + +# Defines a target for running and collection code coverage information +# Builds dependencies, runs the given executable and outputs reports. +# NOTE! The executable should always have a ZERO as exit code otherwise +# the coverage generation will not complete. +# +# setup_target_for_coverage_lcov( +# NAME testrunner_coverage # New target name +# EXECUTABLE testrunner -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR +# DEPENDENCIES testrunner # Dependencies to build first +# BASE_DIRECTORY "../" # Base directory for report +# # (defaults to PROJECT_SOURCE_DIR) +# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative +# # to BASE_DIRECTORY, with CMake 3.4+) +# NO_DEMANGLE # Don't demangle C++ symbols +# # even if c++filt is found +# ) +function(setup_target_for_coverage_lcov) + + set(options NO_DEMANGLE SONARQUBE) + set(oneValueArgs BASE_DIRECTORY NAME) + set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES LCOV_ARGS GENHTML_ARGS) + cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT LCOV_PATH) + message(FATAL_ERROR "lcov not found! Aborting...") + endif() # NOT LCOV_PATH + + if(NOT GENHTML_PATH) + message(FATAL_ERROR "genhtml not found! Aborting...") + endif() # NOT GENHTML_PATH + + # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR + if(DEFINED Coverage_BASE_DIRECTORY) + get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) + else() + set(BASEDIR ${PROJECT_SOURCE_DIR}) + endif() + + # Collect excludes (CMake 3.4+: Also compute absolute paths) + set(LCOV_EXCLUDES "") + foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_LCOV_EXCLUDES}) + if(CMAKE_VERSION VERSION_GREATER 3.4) + get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR}) + endif() + list(APPEND LCOV_EXCLUDES "${EXCLUDE}") + endforeach() + list(REMOVE_DUPLICATES LCOV_EXCLUDES) + + # Conditional arguments + if(CPPFILT_PATH AND NOT ${Coverage_NO_DEMANGLE}) + set(GENHTML_EXTRA_ARGS "--demangle-cpp") + endif() + + # Setting up commands which will be run to generate coverage data. + # Cleanup lcov + set(LCOV_CLEAN_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -directory . + -b ${BASEDIR} --zerocounters + ) + # Create baseline to make sure untouched files show up in the report + set(LCOV_BASELINE_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -c -i -d . -b + ${BASEDIR} -o ${Coverage_NAME}.base + ) + # Run tests + set(LCOV_EXEC_TESTS_CMD + ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS} + ) + # Capturing lcov counters and generating report + set(LCOV_CAPTURE_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} --directory . -b + ${BASEDIR} --capture --output-file ${Coverage_NAME}.capture + ) + # add baseline counters + set(LCOV_BASELINE_COUNT_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -a ${Coverage_NAME}.base + -a ${Coverage_NAME}.capture --output-file ${Coverage_NAME}.total + ) + # filter collected data to final coverage report + set(LCOV_FILTER_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} --remove + ${Coverage_NAME}.total ${LCOV_EXCLUDES} --output-file ${Coverage_NAME}.info + ) + # Generate HTML output + set(LCOV_GEN_HTML_CMD + ${GENHTML_PATH} ${GENHTML_EXTRA_ARGS} ${Coverage_GENHTML_ARGS} -o + ${Coverage_NAME} ${Coverage_NAME}.info + ) + if(${Coverage_SONARQUBE}) + # Generate SonarQube output + set(GCOVR_XML_CMD + ${GCOVR_PATH} --sonarqube ${Coverage_NAME}_sonarqube.xml -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS} + ${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR} + ) + set(GCOVR_XML_CMD_COMMAND + COMMAND ${GCOVR_XML_CMD} + ) + set(GCOVR_XML_CMD_BYPRODUCTS ${Coverage_NAME}_sonarqube.xml) + set(GCOVR_XML_CMD_COMMENT COMMENT "SonarQube code coverage info report saved in ${Coverage_NAME}_sonarqube.xml.") + endif() + + + if(CODE_COVERAGE_VERBOSE) + message(STATUS "Executed command report") + message(STATUS "Command to clean up lcov: ") + string(REPLACE ";" " " LCOV_CLEAN_CMD_SPACED "${LCOV_CLEAN_CMD}") + message(STATUS "${LCOV_CLEAN_CMD_SPACED}") + + message(STATUS "Command to create baseline: ") + string(REPLACE ";" " " LCOV_BASELINE_CMD_SPACED "${LCOV_BASELINE_CMD}") + message(STATUS "${LCOV_BASELINE_CMD_SPACED}") + + message(STATUS "Command to run the tests: ") + string(REPLACE ";" " " LCOV_EXEC_TESTS_CMD_SPACED "${LCOV_EXEC_TESTS_CMD}") + message(STATUS "${LCOV_EXEC_TESTS_CMD_SPACED}") + + message(STATUS "Command to capture counters and generate report: ") + string(REPLACE ";" " " LCOV_CAPTURE_CMD_SPACED "${LCOV_CAPTURE_CMD}") + message(STATUS "${LCOV_CAPTURE_CMD_SPACED}") + + message(STATUS "Command to add baseline counters: ") + string(REPLACE ";" " " LCOV_BASELINE_COUNT_CMD_SPACED "${LCOV_BASELINE_COUNT_CMD}") + message(STATUS "${LCOV_BASELINE_COUNT_CMD_SPACED}") + + message(STATUS "Command to filter collected data: ") + string(REPLACE ";" " " LCOV_FILTER_CMD_SPACED "${LCOV_FILTER_CMD}") + message(STATUS "${LCOV_FILTER_CMD_SPACED}") + + message(STATUS "Command to generate lcov HTML output: ") + string(REPLACE ";" " " LCOV_GEN_HTML_CMD_SPACED "${LCOV_GEN_HTML_CMD}") + message(STATUS "${LCOV_GEN_HTML_CMD_SPACED}") + + if(${Coverage_SONARQUBE}) + message(STATUS "Command to generate SonarQube XML output: ") + string(REPLACE ";" " " GCOVR_XML_CMD_SPACED "${GCOVR_XML_CMD}") + message(STATUS "${GCOVR_XML_CMD_SPACED}") + endif() + endif() + + # Setup target + add_custom_target(${Coverage_NAME} + COMMAND ${LCOV_CLEAN_CMD} + COMMAND ${LCOV_BASELINE_CMD} + COMMAND ${LCOV_EXEC_TESTS_CMD} + COMMAND ${LCOV_CAPTURE_CMD} + COMMAND ${LCOV_BASELINE_COUNT_CMD} + COMMAND ${LCOV_FILTER_CMD} + COMMAND ${LCOV_GEN_HTML_CMD} + ${GCOVR_XML_CMD_COMMAND} + + # Set output files as GENERATED (will be removed on 'make clean') + BYPRODUCTS + ${Coverage_NAME}.base + ${Coverage_NAME}.capture + ${Coverage_NAME}.total + ${Coverage_NAME}.info + ${GCOVR_XML_CMD_BYPRODUCTS} + ${Coverage_NAME}/index.html + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS ${Coverage_DEPENDENCIES} + VERBATIM # Protect arguments to commands + COMMENT "Resetting code coverage counters to zero.\nProcessing code coverage counters and generating report." + ) + + # Show where to find the lcov info report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ; + COMMENT "Lcov code coverage info report saved in ${Coverage_NAME}.info." + ${GCOVR_XML_CMD_COMMENT} + ) + + # Show info where to find the report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ; + COMMENT "Open ./${Coverage_NAME}/index.html in your browser to view the coverage report." + ) + +endfunction() # setup_target_for_coverage_lcov + +# Defines a target for running and collection code coverage information +# Builds dependencies, runs the given executable and outputs reports. +# NOTE! The executable should always have a ZERO as exit code otherwise +# the coverage generation will not complete. +# +# setup_target_for_coverage_gcovr_xml( +# NAME ctest_coverage # New target name +# EXECUTABLE ctest -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR +# DEPENDENCIES executable_target # Dependencies to build first +# BASE_DIRECTORY "../" # Base directory for report +# # (defaults to PROJECT_SOURCE_DIR) +# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative +# # to BASE_DIRECTORY, with CMake 3.4+) +# ) +# The user can set the variable GCOVR_ADDITIONAL_ARGS to supply additional flags to the +# GCVOR command. +function(setup_target_for_coverage_gcovr_xml) + + set(options NONE) + set(oneValueArgs BASE_DIRECTORY NAME) + set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES) + cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT GCOVR_PATH) + message(FATAL_ERROR "gcovr not found! Aborting...") + endif() # NOT GCOVR_PATH + + # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR + if(DEFINED Coverage_BASE_DIRECTORY) + get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) + else() + set(BASEDIR ${PROJECT_SOURCE_DIR}) + endif() + + # Collect excludes (CMake 3.4+: Also compute absolute paths) + set(GCOVR_EXCLUDES "") + foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_GCOVR_EXCLUDES}) + if(CMAKE_VERSION VERSION_GREATER 3.4) + get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR}) + endif() + list(APPEND GCOVR_EXCLUDES "${EXCLUDE}") + endforeach() + list(REMOVE_DUPLICATES GCOVR_EXCLUDES) + + # Combine excludes to several -e arguments + set(GCOVR_EXCLUDE_ARGS "") + foreach(EXCLUDE ${GCOVR_EXCLUDES}) + list(APPEND GCOVR_EXCLUDE_ARGS "-e") + list(APPEND GCOVR_EXCLUDE_ARGS "${EXCLUDE}") + endforeach() + + # Set up commands which will be run to generate coverage data + # Run tests + set(GCOVR_XML_EXEC_TESTS_CMD + ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS} + ) + # Running gcovr + set(GCOVR_XML_CMD + ${GCOVR_PATH} --xml ${Coverage_NAME}.xml -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS} + ${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR} + ) + + if(CODE_COVERAGE_VERBOSE) + message(STATUS "Executed command report") + + message(STATUS "Command to run tests: ") + string(REPLACE ";" " " GCOVR_XML_EXEC_TESTS_CMD_SPACED "${GCOVR_XML_EXEC_TESTS_CMD}") + message(STATUS "${GCOVR_XML_EXEC_TESTS_CMD_SPACED}") + + message(STATUS "Command to generate gcovr XML coverage data: ") + string(REPLACE ";" " " GCOVR_XML_CMD_SPACED "${GCOVR_XML_CMD}") + message(STATUS "${GCOVR_XML_CMD_SPACED}") + endif() + + add_custom_target(${Coverage_NAME} + COMMAND ${GCOVR_XML_EXEC_TESTS_CMD} + COMMAND ${GCOVR_XML_CMD} + + BYPRODUCTS ${Coverage_NAME}.xml + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS ${Coverage_DEPENDENCIES} + VERBATIM # Protect arguments to commands + COMMENT "Running gcovr to produce Cobertura code coverage report." + ) + + # Show info where to find the report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ; + COMMENT "Cobertura code coverage report saved in ${Coverage_NAME}.xml." + ) +endfunction() # setup_target_for_coverage_gcovr_xml + +# Defines a target for running and collection code coverage information +# Builds dependencies, runs the given executable and outputs reports. +# NOTE! The executable should always have a ZERO as exit code otherwise +# the coverage generation will not complete. +# +# setup_target_for_coverage_gcovr_html( +# NAME ctest_coverage # New target name +# EXECUTABLE ctest -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR +# DEPENDENCIES executable_target # Dependencies to build first +# BASE_DIRECTORY "../" # Base directory for report +# # (defaults to PROJECT_SOURCE_DIR) +# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative +# # to BASE_DIRECTORY, with CMake 3.4+) +# ) +# The user can set the variable GCOVR_ADDITIONAL_ARGS to supply additional flags to the +# GCVOR command. +function(setup_target_for_coverage_gcovr_html) + + set(options NONE) + set(oneValueArgs BASE_DIRECTORY NAME) + set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES) + cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT GCOVR_PATH) + message(FATAL_ERROR "gcovr not found! Aborting...") + endif() # NOT GCOVR_PATH + + # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR + if(DEFINED Coverage_BASE_DIRECTORY) + get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) + else() + set(BASEDIR ${PROJECT_SOURCE_DIR}) + endif() + + # Collect excludes (CMake 3.4+: Also compute absolute paths) + set(GCOVR_EXCLUDES "") + foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_GCOVR_EXCLUDES}) + if(CMAKE_VERSION VERSION_GREATER 3.4) + get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR}) + endif() + list(APPEND GCOVR_EXCLUDES "${EXCLUDE}") + endforeach() + list(REMOVE_DUPLICATES GCOVR_EXCLUDES) + + # Combine excludes to several -e arguments + set(GCOVR_EXCLUDE_ARGS "") + foreach(EXCLUDE ${GCOVR_EXCLUDES}) + list(APPEND GCOVR_EXCLUDE_ARGS "-e") + list(APPEND GCOVR_EXCLUDE_ARGS "${EXCLUDE}") + endforeach() + + # Set up commands which will be run to generate coverage data + # Run tests + set(GCOVR_HTML_EXEC_TESTS_CMD + ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS} + ) + # Create folder + set(GCOVR_HTML_FOLDER_CMD + ${CMAKE_COMMAND} -E make_directory ${PROJECT_BINARY_DIR}/${Coverage_NAME} + ) + # Running gcovr + set(GCOVR_HTML_CMD + ${GCOVR_PATH} --html ${Coverage_NAME}/index.html --html-details -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS} + ${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR} + ) + + if(CODE_COVERAGE_VERBOSE) + message(STATUS "Executed command report") + + message(STATUS "Command to run tests: ") + string(REPLACE ";" " " GCOVR_HTML_EXEC_TESTS_CMD_SPACED "${GCOVR_HTML_EXEC_TESTS_CMD}") + message(STATUS "${GCOVR_HTML_EXEC_TESTS_CMD_SPACED}") + + message(STATUS "Command to create a folder: ") + string(REPLACE ";" " " GCOVR_HTML_FOLDER_CMD_SPACED "${GCOVR_HTML_FOLDER_CMD}") + message(STATUS "${GCOVR_HTML_FOLDER_CMD_SPACED}") + + message(STATUS "Command to generate gcovr HTML coverage data: ") + string(REPLACE ";" " " GCOVR_HTML_CMD_SPACED "${GCOVR_HTML_CMD}") + message(STATUS "${GCOVR_HTML_CMD_SPACED}") + endif() + + add_custom_target(${Coverage_NAME} + COMMAND ${GCOVR_HTML_EXEC_TESTS_CMD} + COMMAND ${GCOVR_HTML_FOLDER_CMD} + COMMAND ${GCOVR_HTML_CMD} + + BYPRODUCTS ${PROJECT_BINARY_DIR}/${Coverage_NAME}/index.html # report directory + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS ${Coverage_DEPENDENCIES} + VERBATIM # Protect arguments to commands + COMMENT "Running gcovr to produce HTML code coverage report." + ) + + # Show info where to find the report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ; + COMMENT "Open ./${Coverage_NAME}/index.html in your browser to view the coverage report." + ) + +endfunction() # setup_target_for_coverage_gcovr_html + +# Defines a target for running and collection code coverage information +# Builds dependencies, runs the given executable and outputs reports. +# NOTE! The executable should always have a ZERO as exit code otherwise +# the coverage generation will not complete. +# +# setup_target_for_coverage_fastcov( +# NAME testrunner_coverage # New target name +# EXECUTABLE testrunner -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR +# DEPENDENCIES testrunner # Dependencies to build first +# BASE_DIRECTORY "../" # Base directory for report +# # (defaults to PROJECT_SOURCE_DIR) +# EXCLUDE "src/dir1/" "src/dir2/" # Patterns to exclude. +# NO_DEMANGLE # Don't demangle C++ symbols +# # even if c++filt is found +# SKIP_HTML # Don't create html report +# POST_CMD perl -i -pe s!${PROJECT_SOURCE_DIR}/!!g ctest_coverage.json # E.g. for stripping source dir from file paths +# ) +function(setup_target_for_coverage_fastcov) + + set(options NO_DEMANGLE SKIP_HTML) + set(oneValueArgs BASE_DIRECTORY NAME) + set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES FASTCOV_ARGS GENHTML_ARGS POST_CMD) + cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT FASTCOV_PATH) + message(FATAL_ERROR "fastcov not found! Aborting...") + endif() + + if(NOT Coverage_SKIP_HTML AND NOT GENHTML_PATH) + message(FATAL_ERROR "genhtml not found! Aborting...") + endif() + + # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR + if(Coverage_BASE_DIRECTORY) + get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) + else() + set(BASEDIR ${PROJECT_SOURCE_DIR}) + endif() + + # Collect excludes (Patterns, not paths, for fastcov) + set(FASTCOV_EXCLUDES "") + foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_FASTCOV_EXCLUDES}) + list(APPEND FASTCOV_EXCLUDES "${EXCLUDE}") + endforeach() + list(REMOVE_DUPLICATES FASTCOV_EXCLUDES) + + # Conditional arguments + if(CPPFILT_PATH AND NOT ${Coverage_NO_DEMANGLE}) + set(GENHTML_EXTRA_ARGS "--demangle-cpp") + endif() + + # Set up commands which will be run to generate coverage data + set(FASTCOV_EXEC_TESTS_CMD ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS}) + + set(FASTCOV_CAPTURE_CMD ${FASTCOV_PATH} ${Coverage_FASTCOV_ARGS} --gcov ${GCOV_PATH} + --search-directory ${BASEDIR} + --process-gcno + --output ${Coverage_NAME}.json + --exclude ${FASTCOV_EXCLUDES} + ) + + set(FASTCOV_CONVERT_CMD ${FASTCOV_PATH} + -C ${Coverage_NAME}.json --lcov --output ${Coverage_NAME}.info + ) + + if(Coverage_SKIP_HTML) + set(FASTCOV_HTML_CMD ";") + else() + set(FASTCOV_HTML_CMD ${GENHTML_PATH} ${GENHTML_EXTRA_ARGS} ${Coverage_GENHTML_ARGS} + -o ${Coverage_NAME} ${Coverage_NAME}.info + ) + endif() + + set(FASTCOV_POST_CMD ";") + if(Coverage_POST_CMD) + set(FASTCOV_POST_CMD ${Coverage_POST_CMD}) + endif() + + if(CODE_COVERAGE_VERBOSE) + message(STATUS "Code coverage commands for target ${Coverage_NAME} (fastcov):") + + message(" Running tests:") + string(REPLACE ";" " " FASTCOV_EXEC_TESTS_CMD_SPACED "${FASTCOV_EXEC_TESTS_CMD}") + message(" ${FASTCOV_EXEC_TESTS_CMD_SPACED}") + + message(" Capturing fastcov counters and generating report:") + string(REPLACE ";" " " FASTCOV_CAPTURE_CMD_SPACED "${FASTCOV_CAPTURE_CMD}") + message(" ${FASTCOV_CAPTURE_CMD_SPACED}") + + message(" Converting fastcov .json to lcov .info:") + string(REPLACE ";" " " FASTCOV_CONVERT_CMD_SPACED "${FASTCOV_CONVERT_CMD}") + message(" ${FASTCOV_CONVERT_CMD_SPACED}") + + if(NOT Coverage_SKIP_HTML) + message(" Generating HTML report: ") + string(REPLACE ";" " " FASTCOV_HTML_CMD_SPACED "${FASTCOV_HTML_CMD}") + message(" ${FASTCOV_HTML_CMD_SPACED}") + endif() + if(Coverage_POST_CMD) + message(" Running post command: ") + string(REPLACE ";" " " FASTCOV_POST_CMD_SPACED "${FASTCOV_POST_CMD}") + message(" ${FASTCOV_POST_CMD_SPACED}") + endif() + endif() + + # Setup target + add_custom_target(${Coverage_NAME} + + # Cleanup fastcov + COMMAND ${FASTCOV_PATH} ${Coverage_FASTCOV_ARGS} --gcov ${GCOV_PATH} + --search-directory ${BASEDIR} + --zerocounters + + COMMAND ${FASTCOV_EXEC_TESTS_CMD} + COMMAND ${FASTCOV_CAPTURE_CMD} + COMMAND ${FASTCOV_CONVERT_CMD} + COMMAND ${FASTCOV_HTML_CMD} + COMMAND ${FASTCOV_POST_CMD} + + # Set output files as GENERATED (will be removed on 'make clean') + BYPRODUCTS + ${Coverage_NAME}.info + ${Coverage_NAME}.json + ${Coverage_NAME}/index.html # report directory + + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS ${Coverage_DEPENDENCIES} + VERBATIM # Protect arguments to commands + COMMENT "Resetting code coverage counters to zero. Processing code coverage counters and generating report." + ) + + set(INFO_MSG "fastcov code coverage info report saved in ${Coverage_NAME}.info and ${Coverage_NAME}.json.") + if(NOT Coverage_SKIP_HTML) + string(APPEND INFO_MSG " Open ${PROJECT_BINARY_DIR}/${Coverage_NAME}/index.html in your browser to view the coverage report.") + endif() + # Show where to find the fastcov info report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E echo ${INFO_MSG} + ) + +endfunction() # setup_target_for_coverage_fastcov + +function(append_coverage_compiler_flags) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE) + message(STATUS "Appending code coverage compiler flags: ${COVERAGE_COMPILER_FLAGS}") +endfunction() # append_coverage_compiler_flags + +# Setup coverage for specific library +function(append_coverage_compiler_flags_to_target name) + separate_arguments(_flag_list NATIVE_COMMAND "${COVERAGE_COMPILER_FLAGS}") + target_compile_options(${name} PRIVATE ${_flag_list}) + if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_link_libraries(${name} PRIVATE gcov) + endif() +endfunction() diff --git a/cmake/PybindModuleCreation.cmake b/cmake/PybindModuleCreation.cmake index 18f4abc38e2537c3f4d949f08772a57b90758cb0..87e70fc38c9e4ec4ddb44cbe5d7fb2a31c2e94d6 100644 --- a/cmake/PybindModuleCreation.cmake +++ b/cmake/PybindModuleCreation.cmake @@ -1,23 +1,21 @@ function(generate_python_binding name target_to_bind) - if (PYBIND) - add_definitions(-DPYBIND) - Include(FetchContent) + add_definitions(-DPYBIND) + Include(FetchContent) - FetchContent_Declare( - PyBind11 - GIT_REPOSITORY https://github.com/pybind/pybind11.git - GIT_TAG v2.10.4 # or a later release - ) + FetchContent_Declare( + PyBind11 + GIT_REPOSITORY https://github.com/pybind/pybind11.git + GIT_TAG v2.10.4 # or a later release + ) - # Use the New FindPython mode, recommanded. Requires CMake 3.15+ - find_package(Python COMPONENTS Interpreter Development) - FetchContent_MakeAvailable(PyBind11) + # Use the New FindPython mode, recommanded. Requires CMake 3.15+ + find_package(Python COMPONENTS Interpreter Development) + FetchContent_MakeAvailable(PyBind11) - message(STATUS "Creating binding for module ${name}") - file(GLOB_RECURSE pybind_src_files "python_binding/*.cpp") + message(STATUS "Creating binding for module ${name}") + file(GLOB_RECURSE pybind_src_files "python_binding/*.cpp") - pybind11_add_module(${name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install - target_include_directories(${name} PUBLIC "python_binding") - target_link_libraries(${name} PUBLIC ${target_to_bind}) - endif() + pybind11_add_module(${name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install + target_include_directories(${name} PUBLIC "python_binding") + target_link_libraries(${name} PUBLIC ${target_to_bind}) endfunction() diff --git a/include/aidge/aidge_backend_cpu.hpp b/include/aidge/aidge_backend_cpu.hpp deleted file mode 100644 index ce723a528fef7a1e62851a854b06feba34525f09..0000000000000000000000000000000000000000 --- a/include/aidge/aidge_backend_cpu.hpp +++ /dev/null @@ -1,27 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef __AIDGE_CPU_IMPORTS_H__ -#define __AIDGE_CPU_IMPORTS_H__ - -#include "aidge/data/TensorImpl.hpp" -#include "aidge/operator/AddImpl.hpp" -#include "aidge/operator/AvgPoolingImpl.hpp" -#include "aidge/operator/BatchNormImpl.hpp" -#include "aidge/operator/ConvDepthWiseImpl.hpp" -#include "aidge/operator/ConvImpl.hpp" -#include "aidge/operator/FCImpl.hpp" -#include "aidge/operator/LeakyReLUImpl.hpp" -#include "aidge/operator/ProducerImpl.hpp" -#include "aidge/operator/ReLUImpl.hpp" -#include "aidge/operator/SoftmaxImpl.hpp" - -#endif /* __AIDGE_CPU_IMPORTS_H__ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp new file mode 100644 index 0000000000000000000000000000000000000000..95b2f7b8e2ff70c9b9224bea1137ad74e469ffb8 --- /dev/null +++ b/include/aidge/backend/cpu.hpp @@ -0,0 +1,27 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_IMPORTS_H_ +#define AIDGE_CPU_IMPORTS_H_ + +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/AddImpl.hpp" +#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" +#include "aidge/backend/cpu/operator/BatchNormImpl.hpp" +#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" +#include "aidge/backend/cpu/operator/ConvImpl.hpp" +#include "aidge/backend/cpu/operator/FCImpl.hpp" +#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" +#include "aidge/backend/cpu/operator/ProducerImpl.hpp" +#include "aidge/backend/cpu/operator/ReLUImpl.hpp" +#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" + +#endif /* AIDGE_CPU_IMPORTS_H_ */ \ No newline at end of file diff --git a/include/aidge/data/TensorImpl.hpp b/include/aidge/backend/cpu/data/TensorImpl.hpp similarity index 91% rename from include/aidge/data/TensorImpl.hpp rename to include/aidge/backend/cpu/data/TensorImpl.hpp index c3cc19547f49d4200af3244c67daba33b5d6618a..014939e106e5891c86b007f4bd3905c765ec8754 100644 --- a/include/aidge/data/TensorImpl.hpp +++ b/include/aidge/backend/cpu/data/TensorImpl.hpp @@ -1,75 +1,74 @@ -#ifndef __AIDGE_CPU_DATA_TENSORIMPL_H__ -#define __AIDGE_CPU_DATA_TENSORIMPL_H__ - -#include "aidge/backend/TensorImpl.hpp" -#include "aidge/data/Tensor.hpp" -#include "aidge/utils/Registrar.hpp" -#include "aidge/utils/Types.h" - -namespace Aidge { -template <class T> -class TensorImpl_cpu : public TensorImpl { - private: - const Tensor &mTensor; // Impl needs to access Tensor information, but is not - // supposed to change it! - std::vector<T> mData; - - public: - static constexpr const char *Backend = "cpu"; - - TensorImpl_cpu(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor) {} - - bool operator==(const TensorImpl &otherImpl) const override final { - std::size_t i = 0; - for (; i < mTensor.size() && - mData[i] == reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl).data()[i]; - ++i) { - } - return i == mTensor.size(); - } - - static std::unique_ptr<TensorImpl_cpu> create(const Tensor &tensor) { - return std::make_unique<TensorImpl_cpu<T>>(tensor); - } - - // native interface - const std::vector<T> &data() const { return mData; } - - std::size_t scalarSize() const override { return sizeof(T); } - - void copy(const void *src, NbElts_t length, std::size_t offset = 0) override { - std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length, - static_cast<T *>(rawPtr())+offset); - } - - void *rawPtr() override { - lazyInit(mData); - return mData.data(); - }; - - virtual ~TensorImpl_cpu() = default; - - void setRawPtr(void *ptr) override final { - T *newPtr = static_cast<T *>(ptr); - mData = std::vector<T>(newPtr, newPtr + mTensor.size()); - }; - - private: - void lazyInit(std::vector<T> &data) { - assert(mTensor.dataType() == NativeType<T>::type); - - if (data.size() != mTensor.size()) data.resize(mTensor.size()); - } -}; - -namespace { -static Registrar<Tensor> registrarTensorImpl_cpu_Float64( - {"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create); -static Registrar<Tensor> registrarTensorImpl_cpu_Float32( - {"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create); -static Registrar<Tensor> registrarTensorImpl_cpu_Int32( - {"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int>::create); -} // namespace -} // namespace Aidge - -#endif /* __AIDGE_CPU_DATA_TENSORIMPL_H__ */ +#ifndef AIDGE_CPU_DATA_TENSORIMPL_H_ +#define AIDGE_CPU_DATA_TENSORIMPL_H_ + +#include "aidge/backend/TensorImpl.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +template <class T> +class TensorImpl_cpu : public TensorImpl { + private: + const Tensor &mTensor; // Impl needs to access Tensor information, but is not + // supposed to change it! + std::vector<T> mData; + + public: + static constexpr const char *Backend = "cpu"; + + TensorImpl_cpu(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor) {} + + bool operator==(const TensorImpl &otherImpl) const override final { + std::size_t i = 0; + for (; i < mTensor.size() && + mData[i] == reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl).data()[i]; + ++i) { + } + return i == mTensor.size(); + } + + static std::unique_ptr<TensorImpl_cpu> create(const Tensor &tensor) { + return std::make_unique<TensorImpl_cpu<T>>(tensor); + } + + // native interface + const std::vector<T> &data() const { return mData; } + + std::size_t scalarSize() const override { return sizeof(T); } + + void copy(const void *src, NbElts_t length, std::size_t offset = 0) override { + std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length, + static_cast<T *>(rawPtr())+offset); + } + + void *rawPtr() override { + lazyInit(mData); + return mData.data(); + }; + + virtual ~TensorImpl_cpu() = default; + + void setRawPtr(void *ptr) override final { + T *newPtr = static_cast<T *>(ptr); + mData = std::vector<T>(newPtr, newPtr + mTensor.size()); + }; + + private: + void lazyInit(std::vector<T> &data) { + assert(mTensor.dataType() == NativeType<T>::type); + + if (data.size() != mTensor.size()) data.resize(mTensor.size()); + } +}; +namespace { +static Registrar<Tensor> registrarTensorImpl_cpu_Float64( + {"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create); +static Registrar<Tensor> registrarTensorImpl_cpu_Float32( + {"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create); +static Registrar<Tensor> registrarTensorImpl_cpu_Int32( + {"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int>::create); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_DATA_TENSORIMPL_H_ */ diff --git a/include/aidge/operator/AddImpl.hpp b/include/aidge/backend/cpu/operator/AddImpl.hpp similarity index 88% rename from include/aidge/operator/AddImpl.hpp rename to include/aidge/backend/cpu/operator/AddImpl.hpp index 8bd954c0d1dba40fe666e5aad7be47a65033e607..6e1cd03a3af81ee85f4f9e0e212af7c02089734e 100644 --- a/include/aidge/operator/AddImpl.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef __AIDGE_CPU_OPERATOR_ADDIMPL_H__ -#define __AIDGE_CPU_OPERATOR_ADDIMPL_H__ +#ifndef AIDGE_CPU_OPERATOR_ADDIMPL_H_ +#define AIDGE_CPU_OPERATOR_ADDIMPL_H_ #include "aidge/backend/OperatorImpl.hpp" #include "aidge/operator/Add.hpp" @@ -79,9 +79,10 @@ class AddImpl_cpu : public OperatorImpl { return 0; } - NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, const std::vector<DimSize_t>& inputsSize) const override final { + NbElts_t getRequiredMemory(const IOIndex_t outputIdx, const std::vector<DimSize_t>& inputsSize) const override final { // Requires the whole tensors, regardless of available data on inputs assert(outputIdx == 0 && "operator has only one output"); + (void) outputIdx; const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); @@ -96,6 +97,7 @@ class AddImpl_cpu : public OperatorImpl { assert(outputIdx < mNbProducedData.size()); return mNbProducedData[outputIdx]; } + void updateConsummerProducer() override final; void forward() { // nothing @@ -123,12 +125,13 @@ class AddImpl_cpu<1> : public OperatorImpl { NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, - __attribute__((unused)) const std::vector<DimSize_t> &inputsSize) const override final; + NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, + const std::vector<DimSize_t> &/*inputsSize*/) const override final; NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final; NbElts_t getNbProducedData(const IOIndex_t /*outputIdx*/) const override final; + void updateConsummerProducer() override final; void forward(); @@ -154,12 +157,13 @@ class AddImpl_cpu<2> : public OperatorImpl { NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, - __attribute__((unused)) const std::vector<DimSize_t>& inputsSize) const override final; + NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, + const std::vector<DimSize_t>& /*inputsSize*/) const override final; NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; NbElts_t getNbProducedData(const IOIndex_t /*outputIdx*/) const override final; + void updateConsummerProducer() override final; void forward(); @@ -185,11 +189,12 @@ class AddImpl_cpu<3> : public OperatorImpl { NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, const std::vector<DimSize_t>& /*inputsSize*/) const override final; + NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final; NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; + void updateConsummerProducer() override final; void forward(); @@ -203,4 +208,4 @@ static Registrar<Add_Op<3>> registrarAddImpl3I_cpu("cpu", Aidge::AddImpl_cpu<3>: } // namespace } // namespace Aidge -#endif /* __AIDGE_CPU_OPERATOR_ADDIMPL_H__ */ \ No newline at end of file +#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_H_ */ diff --git a/include/aidge/operator/AddImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp similarity index 92% rename from include/aidge/operator/AddImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp index f968f94b5b5f5f7708a9f753a7d0a02e6274cb98..490598599aedf24b26865ce6a1ddb3fe32044b1b 100644 --- a/include/aidge/operator/AddImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp @@ -1,87 +1,87 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef __AIDGE_CPU_OPERATOR_ADDIMPL_FORWARD_KERNEL_H__ -#define __AIDGE_CPU_OPERATOR_ADDIMPL_FORWARD_KERNEL_H__ - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/operator/AddImpl.hpp" - -namespace Aidge { - -template <class I1, class O> -void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, void* output_) { - // FIXME: missing Add parameters as arguments - const I1* input1 = static_cast<const I1*>(input1_); - O* output = static_cast<O*>(output_); - - for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) { - output[oIndex] = input1[oIndex]; - } -} - -template <class I1, class I2, class O> -void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_, - void* output_) { - // FIXME: missing Add parameters as arguments - const I1* input1 = static_cast<const I1*>(input1_); - const I2* input2 = static_cast<const I2*>(input2_); - O* output = static_cast<O*>(output_); - - for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) { - output[oIndex] = input1[oIndex] + input2[oIndex]; - } -} - -template <class I1, class I2, class I3, class O> -void AddImpl3I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_, - const void* input3_, void* output_) { - // FIXME: missing Add parameters as arguments - const I1* input1 = static_cast<const I1*>(input1_); - const I2* input2 = static_cast<const I2*>(input2_); - const I3* input3 = static_cast<const I3*>(input3_); - O* output = static_cast<O*>(output_); - - for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) { - output[oIndex] = input1[oIndex] + input2[oIndex] + input3[oIndex]; - } -} - -namespace { -static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::AddImpl1I_cpu_forward_kernel<float, float>); -static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::AddImpl1I_cpu_forward_kernel<int, int>); -static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::AddImpl1I_cpu_forward_kernel<double, double>); - -static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::AddImpl2I_cpu_forward_kernel<float, float, float>); -static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32}, Aidge::AddImpl2I_cpu_forward_kernel<int, int, int>); -static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64}, Aidge::AddImpl2I_cpu_forward_kernel<double, double, double>); - -static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::AddImpl3I_cpu_forward_kernel<float, float, float, float>); -static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::AddImpl3I_cpu_forward_kernel<int, int, int, int>); -static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::AddImpl3I_cpu_forward_kernel<double, double, double, double>); -} // namespace -} // namespace Aidge - -#endif /* __AIDGE_CPU_OPERATOR_ADDIMPL_CPU_FORWARD_KERNEL_H__ */ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_ADDIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_ADDIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/AddImpl.hpp" + +namespace Aidge { + +template <class I1, class O> +void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, void* output_) { + // FIXME: missing Add parameters as arguments + const I1* input1 = static_cast<const I1*>(input1_); + O* output = static_cast<O*>(output_); + + for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) { + output[oIndex] = input1[oIndex]; + } +} + +template <class I1, class I2, class O> +void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_, + void* output_) { + // FIXME: missing Add parameters as arguments + const I1* input1 = static_cast<const I1*>(input1_); + const I2* input2 = static_cast<const I2*>(input2_); + O* output = static_cast<O*>(output_); + + for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) { + output[oIndex] = input1[oIndex] + input2[oIndex]; + } +} + +template <class I1, class I2, class I3, class O> +void AddImpl3I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_, + const void* input3_, void* output_) { + // FIXME: missing Add parameters as arguments + const I1* input1 = static_cast<const I1*>(input1_); + const I2* input2 = static_cast<const I2*>(input2_); + const I3* input3 = static_cast<const I3*>(input3_); + O* output = static_cast<O*>(output_); + + for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) { + output[oIndex] = input1[oIndex] + input2[oIndex] + input3[oIndex]; + } +} + +namespace { +static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, Aidge::AddImpl1I_cpu_forward_kernel<float, float>); +static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, Aidge::AddImpl1I_cpu_forward_kernel<int, int>); +static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, Aidge::AddImpl1I_cpu_forward_kernel<double, double>); + +static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::AddImpl2I_cpu_forward_kernel<float, float, float>); +static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32}, Aidge::AddImpl2I_cpu_forward_kernel<int, int, int>); +static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64}, Aidge::AddImpl2I_cpu_forward_kernel<double, double, double>); + +static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::AddImpl3I_cpu_forward_kernel<float, float, float, float>); +static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::AddImpl3I_cpu_forward_kernel<int, int, int, int>); +static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::AddImpl3I_cpu_forward_kernel<double, double, double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp similarity index 88% rename from include/aidge/operator/AvgPoolingImpl.hpp rename to include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp index 5cde8bbd7b482a70b234f988cb3f54178a2c50ee..8373cb84a550efd8741a2dbc04c1e94ad37fe611 100644 --- a/include/aidge/operator/AvgPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef __AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_H__ -#define __AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_H__ +#ifndef AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_H_ +#define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_H_ #include <array> #include <memory> @@ -51,9 +51,10 @@ class AvgPoolingImpl2D_cpu : public OperatorImpl { public: NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, const std::vector<DimSize_t> &inputsSize) const override final; + NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &inputsSize) const override final; NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; + void updateConsummerProducer() override final; void forward(); @@ -66,4 +67,4 @@ static Registrar<AvgPooling_Op<2>> registrarAvgPoolingImpl2D_cpu("cpu", Aidge::A } // namespace } // namespace Aidge -#endif /* __AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_H__ */ \ No newline at end of file +#endif /* AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_H_ */ diff --git a/include/aidge/operator/AvgPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp similarity index 89% rename from include/aidge/operator/AvgPoolingImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp index cf6cd0e6ec016239bb357510766ac199de418377..776e020f1a20056db345c8e845fd73bb31b4138b 100644 --- a/include/aidge/operator/AvgPoolingImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp @@ -1,114 +1,114 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef __AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H__ -#define __AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H__ - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/operator/AvgPoolingImpl.hpp" -#include "aidge/utils/Types.h" -#include "aidge/data/Data.hpp" -#include <array> -#include <tuple> -#include <cmath> - -namespace Aidge { -/** - * @brief Forward kernel for 2D AvgPoolingolution on CPU backend. - * @tparam I Input data type. - * @tparam O Output data type. - * @param params tuple of Parameters from the Operator - * @param dims Array of input dimensions. - * @param input_ const input Tensor. - * @param output_ Output Tensor. - */ -template <class I, class O> -void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Parameters ¶ms, - const std::array<DimSize_t, 4> &dims, - const void *input_, - void *output_) { - // FIXME: missing convolution parameters as arguments - const I *input = static_cast<const I *>(input_); - O *output = static_cast<O *>(output_); - - - // output H size - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<2>(params)[0] + std::get<2>(params)[2] - std::get<1>(params)[0] + std::get<0>(params)[0]) / - static_cast<float>(std::get<0>(params)[0]))); - // output W size - const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<2>(params)[1] + std::get<2>(params)[3] - std::get<1>(params)[1] + std::get<0>(params)[1]) / - static_cast<float>(std::get<0>(params)[1]))); - - // TODO: kernel computation - // output (batch, outCh, Xout, Yout) - // input (batch, ch, Xin, Yin) - // weight (outCh, ch, kernelX, kernelY) - // does not take Dilation parameter into account - using signedsize = std::make_signed<std::size_t>::type; - for (std::size_t batch = 0; batch < dims[0]; ++batch) { - for (std::size_t ch = 0; ch < dims[1]; ++ch) { - const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; - const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; - for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(std::get<2>(params)[0] - ox * std::get<0>(params)[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (dims[2] + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(params)[0] ? std::get<1>(params)[0] : dims[2] + difx); - for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(std::get<2>(params)[1] - oy * std::get<0>(params)[1]); - const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (dims[3] + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(params)[1] ? std::get<1>(params)[1] : dims[3] + dify); - const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const std::size_t ix = ox * std::get<0>(params)[0]; - const std::size_t iy = oy * std::get<0>(params)[1]; - - if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += static_cast<O>( - input[iIndex + (ix+0)*dims[3] + (iy+0)] + - input[iIndex + (ix+0)*dims[3] + (iy+1)] + - input[iIndex + (ix+0)*dims[3] + (iy+2)] + - input[iIndex + (ix+1)*dims[3] + (iy+0)] + - input[iIndex + (ix+1)*dims[3] + (iy+1)] + - input[iIndex + (ix+1)*dims[3] + (iy+2)] + - input[iIndex + (ix+2)*dims[3] + (iy+0)] + - input[iIndex + (ix+2)*dims[3] + (iy+1)] + - input[iIndex + (ix+2)*dims[3] + (iy+2)]) / O(9); - } else { - for (std::size_t sx = sxMin; sx < sxMax; ++sx) { - for (std::size_t sy = syMin; sy < syMax; ++sy) { - output[oIndexFull] += input[iIndex + (ix+sx)*dims[3] + (iy+sy)]; - } - } - // padding not used - output[oIndexFull] /= (sxMax - sxMin) * (syMax - syMin); - } - } - } - } - } -} - -namespace { -static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Float32( - std::tuple<DataType, DataType>({DataType::Float32, DataType::Float32}), - Aidge::AvgPoolingImpl2D_cpu_forward_kernel<float, float>); -static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, - Aidge::AvgPoolingImpl2D_cpu_forward_kernel<int, int>); -static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, - Aidge::AvgPoolingImpl2D_cpu_forward_kernel<double, double>); -} // namespace -} // namespace Aidge - -#endif /* __AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H__ */ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" +#include "aidge/utils/Types.h" +#include "aidge/data/Data.hpp" +#include <array> +#include <tuple> +#include <cmath> + +namespace Aidge { +/** + * @brief Forward kernel for 2D AvgPoolingolution on CPU backend. + * @tparam I Input data type. + * @tparam O Output data type. + * @param params tuple of Parameters from the Operator + * @param dims Array of input dimensions. + * @param input_ const input Tensor. + * @param output_ Output Tensor. + */ +template <class I, class O> +void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Parameters ¶ms, + const std::array<DimSize_t, 4> &dims, + const void *input_, + void *output_) { + // FIXME: missing convolution parameters as arguments + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); + + + // output H size + const std::size_t oxSize = + static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<2>(params)[0] + std::get<2>(params)[2] - std::get<1>(params)[0] + std::get<0>(params)[0]) / + static_cast<float>(std::get<0>(params)[0]))); + // output W size + const std::size_t oySize = + static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<2>(params)[1] + std::get<2>(params)[3] - std::get<1>(params)[1] + std::get<0>(params)[1]) / + static_cast<float>(std::get<0>(params)[1]))); + + // TODO: kernel computation + // output (batch, outCh, Xout, Yout) + // input (batch, ch, Xin, Yin) + // weight (outCh, ch, kernelX, kernelY) + // does not take Dilation parameter into account + using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < dims[0]; ++batch) { + for (std::size_t ch = 0; ch < dims[1]; ++ch) { + const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; + const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { + const signedsize difx = static_cast<signedsize>(std::get<2>(params)[0] - ox * std::get<0>(params)[0]); + const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(params)[0] ? std::get<1>(params)[0] : dims[2] + difx); + for (std::size_t oy = 0; oy < oySize; ++oy) { + const signedsize dify = static_cast<signedsize>(std::get<2>(params)[1] - oy * std::get<0>(params)[1]); + const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); + const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(params)[1] ? std::get<1>(params)[1] : dims[3] + dify); + const std::size_t oIndexFull = oIndex + ox*oySize + oy; + const std::size_t ix = ox * std::get<0>(params)[0]; + const std::size_t iy = oy * std::get<0>(params)[1]; + + if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { + output[oIndexFull] += static_cast<O>( + input[iIndex + (ix+0)*dims[3] + (iy+0)] + + input[iIndex + (ix+0)*dims[3] + (iy+1)] + + input[iIndex + (ix+0)*dims[3] + (iy+2)] + + input[iIndex + (ix+1)*dims[3] + (iy+0)] + + input[iIndex + (ix+1)*dims[3] + (iy+1)] + + input[iIndex + (ix+1)*dims[3] + (iy+2)] + + input[iIndex + (ix+2)*dims[3] + (iy+0)] + + input[iIndex + (ix+2)*dims[3] + (iy+1)] + + input[iIndex + (ix+2)*dims[3] + (iy+2)]) / O(9); + } else { + for (std::size_t sx = sxMin; sx < sxMax; ++sx) { + for (std::size_t sy = syMin; sy < syMax; ++sy) { + output[oIndexFull] += input[iIndex + (ix+sx)*dims[3] + (iy+sy)]; + } + } + // padding not used + output[oIndexFull] /= (sxMax - sxMin) * (syMax - syMin); + } + } + } + } + } +} + +namespace { +static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Float32( + std::tuple<DataType, DataType>({DataType::Float32, DataType::Float32}), + Aidge::AvgPoolingImpl2D_cpu_forward_kernel<float, float>); +static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, + Aidge::AvgPoolingImpl2D_cpu_forward_kernel<int, int>); +static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, + Aidge::AvgPoolingImpl2D_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/operator/BatchNormImpl.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp similarity index 90% rename from include/aidge/operator/BatchNormImpl.hpp rename to include/aidge/backend/cpu/operator/BatchNormImpl.hpp index 37d644f00f4a53b0f0b5c64928ec5c77e719ceb5..d9f25b4a8e38510f82fc5afe9ed4b656197a47d5 100644 --- a/include/aidge/operator/BatchNormImpl.hpp +++ b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef __AIDGE_CPU_OPERATOR_BATCHNORMIMPL_H__ -#define __AIDGE_CPU_OPERATOR_BATCHNORMIMPL_H__ +#ifndef AIDGE_CPU_OPERATOR_BATCHNORMIMPL_H_ +#define AIDGE_CPU_OPERATOR_BATCHNORMIMPL_H_ #include <array> #include <memory> @@ -66,9 +66,10 @@ class BatchNormImpl2D_cpu : public OperatorImpl { public: NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, const std::vector<DimSize_t> &inputsSize) const override final; + NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &inputsSize) const override final; NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; + void updateConsummerProducer() override final; void forward(); @@ -81,4 +82,4 @@ static Registrar<BatchNorm_Op<2>> registrarBatchNormImpl2D_cpu("cpu", Aidge::Bat } // namespace } // namespace Aidge -#endif /* __AIDGE_CPU_OPERATOR_BATCHNORMIMPL_H__ */ \ No newline at end of file +#endif /* AIDGE_CPU_OPERATOR_BATCHNORMIMPL_H_ */ diff --git a/include/aidge/operator/BatchNormImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp similarity index 92% rename from include/aidge/operator/BatchNormImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp index 77a8f0aa12c3b5c450dfd765626acbe7e6dfe995..eedb80bde60d65b53bac70cc33ca83eb4f0121e7 100644 --- a/include/aidge/operator/BatchNormImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp @@ -1,109 +1,109 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef __AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H__ -#define __AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H__ - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/operator/BatchNormImpl.hpp" -#include "aidge/utils/Types.h" -#include <array> -#include <cmath> -#include <algorithm> - -namespace Aidge { -/** - * @brief Forward kernel for 2D BatchNormolution on CPU backend. - * @tparam I Input data type. - * @tparam W Weight data type. - * @tparam B Bias data type. - * @tparam O Output data type. - * @param params tuple of Parameters from the Operator - * @param dims Array of input dimensions. - * @param input_ const input Tensor. - * @param scale_ const scale Tensor. - * @param shift_ const shift Tensor. - * @param batchMean_ const mean Tensor. - * @param batchVar_ const variance Tensor. - * @param output_ Output Tensor. - */ -template <class I, class P, class O> -void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters ¶ms, const std::array<DimSize_t, 4> &dims, - const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) { - // FIXME: missing convolution parameters as arguments - const I *input = static_cast<const I *>(input_); - const P *scale = static_cast<const P *>(scale_); - const P *shift = static_cast<const P *>(shift_); - P *batchMean = static_cast<P *>(batchMean_); - P *batchVar = static_cast<P *>(batchVar_); - O *output = static_cast<O *>(output_); - - const DimSize_t nbBatch = dims[0]; - const DimSize_t nbChannels = dims[1]; - const DimSize_t featureMapSize = dims[2]*dims[3]; - - - if ((freeze == true) || (std::get<1>(params) == 0.0f)) { - for (std::size_t batch = 0; batch < nbBatch; ++batch) { - for (std::size_t ch = 0; ch < nbChannels; ++ch) { - const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; - std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]); - const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(params))); - - for (std::size_t feature = 0; feature<featureMapSize; ++feature) { - output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var; - } - } - } - } else { - const std::size_t nbDataPerChannel = nbBatch * featureMapSize; - for (std::size_t ch = 0; ch < nbChannels; ++ch) { - I sum = I(0); - I sumSquare = I(0); - for (std::size_t batch = 0; batch < nbBatch; ++batch) { - const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; - std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]); - - for (std::size_t feature = 0; feature<featureMapSize; ++feature) { - sum += input[ioIndex + feature]; - sumSquare += input[ioIndex + feature] * input[ioIndex + feature]; - } - } - const I inputMean = sum / static_cast<I>(nbDataPerChannel); - const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel) - inputMean*inputMean; - - batchMean[ch] = batchMean[ch]*(1-std::get<1>(params)) + inputMean*std::get<1>(params); - batchVar[ch] = batchVar[ch]*(1-std::get<1>(params)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(params); - - const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(params))); - for (std::size_t batch = 0; batch < nbBatch; ++batch) { - const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; - for (std::size_t feature = 0; feature<featureMapSize; ++feature) { - output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-inputMean) / var; - } - } - } - } -} - - - - - -namespace { -static Registrar<BatchNormImpl2DForward_cpu> registrarBatchNormImpl2DForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::BatchNormImpl2D_cpu_forward_kernel<float, float, float>); -} // namespace -} // namespace Aidge - -#endif /* __AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H__ */ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/BatchNormImpl.hpp" +#include "aidge/utils/Types.h" +#include <array> +#include <cmath> +#include <algorithm> + +namespace Aidge { +/** + * @brief Forward kernel for 2D BatchNormolution on CPU backend. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param params tuple of Parameters from the Operator + * @param dims Array of input dimensions. + * @param input_ const input Tensor. + * @param scale_ const scale Tensor. + * @param shift_ const shift Tensor. + * @param batchMean_ const mean Tensor. + * @param batchVar_ const variance Tensor. + * @param output_ Output Tensor. + */ +template <class I, class P, class O> +void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters ¶ms, const std::array<DimSize_t, 4> &dims, + const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) { + // FIXME: missing convolution parameters as arguments + const I *input = static_cast<const I *>(input_); + const P *scale = static_cast<const P *>(scale_); + const P *shift = static_cast<const P *>(shift_); + P *batchMean = static_cast<P *>(batchMean_); + P *batchVar = static_cast<P *>(batchVar_); + O *output = static_cast<O *>(output_); + + const DimSize_t nbBatch = dims[0]; + const DimSize_t nbChannels = dims[1]; + const DimSize_t featureMapSize = dims[2]*dims[3]; + + + if ((freeze == true) || (std::get<1>(params) == 0.0f)) { + for (std::size_t batch = 0; batch < nbBatch; ++batch) { + for (std::size_t ch = 0; ch < nbChannels; ++ch) { + const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; + std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]); + const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(params))); + + for (std::size_t feature = 0; feature<featureMapSize; ++feature) { + output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var; + } + } + } + } else { + const std::size_t nbDataPerChannel = nbBatch * featureMapSize; + for (std::size_t ch = 0; ch < nbChannels; ++ch) { + I sum = I(0); + I sumSquare = I(0); + for (std::size_t batch = 0; batch < nbBatch; ++batch) { + const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; + std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]); + + for (std::size_t feature = 0; feature<featureMapSize; ++feature) { + sum += input[ioIndex + feature]; + sumSquare += input[ioIndex + feature] * input[ioIndex + feature]; + } + } + const I inputMean = sum / static_cast<I>(nbDataPerChannel); + const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel) - inputMean*inputMean; + + batchMean[ch] = batchMean[ch]*(1-std::get<1>(params)) + inputMean*std::get<1>(params); + batchVar[ch] = batchVar[ch]*(1-std::get<1>(params)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(params); + + const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(params))); + for (std::size_t batch = 0; batch < nbBatch; ++batch) { + const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; + for (std::size_t feature = 0; feature<featureMapSize; ++feature) { + output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-inputMean) / var; + } + } + } + } +} + + + + + +namespace { +static Registrar<BatchNormImpl2DForward_cpu> registrarBatchNormImpl2DForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::BatchNormImpl2D_cpu_forward_kernel<float, float, float>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/operator/ConvDepthWiseImpl.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp similarity index 88% rename from include/aidge/operator/ConvDepthWiseImpl.hpp rename to include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp index 64f5df8c4dc6994629b10b2021d6f35d745ed7b2..0d21c676d797b2fc4e95c4aea47674c8fca5eef4 100644 --- a/include/aidge/operator/ConvDepthWiseImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef __AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H__ -#define __AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H__ +#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H_ +#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H_ #include <array> #include <memory> @@ -53,9 +53,10 @@ class ConvDepthWiseImpl2D_cpu : public OperatorImpl { public: NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t> &inputsSize) const override final; + NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final; NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; + void updateConsummerProducer() override final; void forward(); @@ -68,4 +69,4 @@ static Registrar<ConvDepthWise_Op<2>> registrarConvDepthWiseImpl2D_cpu("cpu", Ai } // namespace } // namespace Aidge -#endif /* __AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H__ */ \ No newline at end of file +#endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H_ */ diff --git a/include/aidge/operator/ConvDepthWiseImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp similarity index 90% rename from include/aidge/operator/ConvDepthWiseImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp index 699a086457ee54f048182b9e318dbe1311b0c75c..ee2d82e00376c5a2cc5a075565e35eb8885c021e 100644 --- a/include/aidge/operator/ConvDepthWiseImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp @@ -1,118 +1,118 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef __AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMP_FORWARD_KERNEL_H__ -#define __AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H__ - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/operator/ConvDepthWiseImpl.hpp" -#include "aidge/utils/Types.h" -#include <cmath> -#include <array> -#include <algorithm> - -namespace Aidge { -/** - * @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend. - * @tparam I Input data type. - * @tparam W Weight data type. - * @tparam B Bias data type. - * @tparam O Output data type. - * @param params tuple of Parameters from the Operator - * @param dims Array of input dimensions. - * @param input_ const input Tensor. - * @param weights_ const weight Tensor. - * @param biases_ const Biais Tensor. - * @param output_ Output Tensor. - */ -template <class I, class W, class B, class O> -void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Parameters ¶ms, const std::array<DimSize_t, 4> &dims, - const void *input_, const void *weights_, const void *biases_, void *output_) { - // FIXME: missing convolution parameters as arguments - const I *input = static_cast<const I *>(input_); - const W *weights = static_cast<const W *>(weights_); - const B *biases = static_cast<const B *>(biases_); - O *output = static_cast<O *>(output_); - - - // output H size - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<4>(params)[0] + std::get<4>(params)[2] - std::get<3>(params)[0] + std::get<0>(params)[0]) / - static_cast<float>(std::get<0>(params)[0]))); - // output W size - const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<4>(params)[1] + std::get<4>(params)[3] - std::get<3>(params)[1] + std::get<0>(params)[1]) / - static_cast<float>(std::get<0>(params)[1]))); - - // TODO: kernel computation - // output (batch, outCh, Xout, Yout) - // input (batch, ch, Xin, Yin) - // weight (outCh, ch, kernelX, kernelY) - // does not take Dilation parameter into account - using signedsize = std::make_signed<std::size_t>::type; - for (std::size_t batch = 0; batch < dims[0]; ++batch) { - for (std::size_t ch = 0; ch < std::get<2>(params); ++ch) { - const std::size_t oIndex = (ch + batch*std::get<2>(params)) * oxSize * oySize; - B biasVal = (biases != nullptr) ? biases[ch] : B(0); - std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); - const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; - const std::size_t wIndex = ch * std::get<3>(params)[0] * std::get<3>(params)[1]; - for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(std::get<4>(params)[0] - ox * std::get<0>(params)[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (dims[2] + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(params)[0] ? std::get<3>(params)[0] : dims[2] + difx); - for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(std::get<4>(params)[1] - oy * std::get<0>(params)[1]); - const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (dims[3] + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(params)[1] ? std::get<3>(params)[1] : dims[3] + dify); - const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<4>(params)[0]; - const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<4>(params)[1]; - - if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += (weights[wIndex + 0*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 0*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 0*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 1*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 1*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 1*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 2*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 2*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 2*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]); - } else { - for (std::size_t sx = sxMin; sx < sxMax; ++sx) { - for (std::size_t sy = syMin; sy < syMax; ++sy) { - output[oIndexFull] += weights[wIndex + sx*std::get<3>(params)[1] + sy] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; - } - } - } - } - } - } - } -} - -namespace { -static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>); -static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<int, int, int, int>); -static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>); -} // namespace -} // namespace Aidge - -#endif /* __AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H__ */ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMP_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" +#include "aidge/utils/Types.h" +#include <cmath> +#include <array> +#include <algorithm> + +namespace Aidge { +/** + * @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param params tuple of Parameters from the Operator + * @param dims Array of input dimensions. + * @param input_ const input Tensor. + * @param weights_ const weight Tensor. + * @param biases_ const Biais Tensor. + * @param output_ Output Tensor. + */ +template <class I, class W, class B, class O> +void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Parameters ¶ms, const std::array<DimSize_t, 4> &dims, + const void *input_, const void *weights_, const void *biases_, void *output_) { + // FIXME: missing convolution parameters as arguments + const I *input = static_cast<const I *>(input_); + const W *weights = static_cast<const W *>(weights_); + const B *biases = static_cast<const B *>(biases_); + O *output = static_cast<O *>(output_); + + + // output H size + const std::size_t oxSize = + static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<4>(params)[0] + std::get<4>(params)[2] - std::get<3>(params)[0] + std::get<0>(params)[0]) / + static_cast<float>(std::get<0>(params)[0]))); + // output W size + const std::size_t oySize = + static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<4>(params)[1] + std::get<4>(params)[3] - std::get<3>(params)[1] + std::get<0>(params)[1]) / + static_cast<float>(std::get<0>(params)[1]))); + + // TODO: kernel computation + // output (batch, outCh, Xout, Yout) + // input (batch, ch, Xin, Yin) + // weight (outCh, ch, kernelX, kernelY) + // does not take Dilation parameter into account + using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < dims[0]; ++batch) { + for (std::size_t ch = 0; ch < std::get<2>(params); ++ch) { + const std::size_t oIndex = (ch + batch*std::get<2>(params)) * oxSize * oySize; + B biasVal = (biases != nullptr) ? biases[ch] : B(0); + std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); + const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; + const std::size_t wIndex = ch * std::get<3>(params)[0] * std::get<3>(params)[1]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { + const signedsize difx = static_cast<signedsize>(std::get<4>(params)[0] - ox * std::get<0>(params)[0]); + const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(params)[0] ? std::get<3>(params)[0] : dims[2] + difx); + for (std::size_t oy = 0; oy < oySize; ++oy) { + const signedsize dify = static_cast<signedsize>(std::get<4>(params)[1] - oy * std::get<0>(params)[1]); + const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); + const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(params)[1] ? std::get<3>(params)[1] : dims[3] + dify); + const std::size_t oIndexFull = oIndex + ox*oySize + oy; + const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<4>(params)[0]; + const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<4>(params)[1]; + + if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { + output[oIndexFull] += (weights[wIndex + 0*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 0*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 0*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 1*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 1*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 1*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 2*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 2*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 2*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]); + } else { + for (std::size_t sx = sxMin; sx < sxMax; ++sx) { + for (std::size_t sy = syMin; sy < syMax; ++sy) { + output[oIndexFull] += weights[wIndex + sx*std::get<3>(params)[1] + sy] * + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; + } + } + } + } + } + } + } +} + +namespace { +static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>); +static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<int, int, int, int>); +static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp similarity index 88% rename from include/aidge/operator/ConvImpl.hpp rename to include/aidge/backend/cpu/operator/ConvImpl.hpp index 7bdeb0597d628c802270cd8af5a55c1362704483..1f3dffe43b966bc37887f267cc56760a899476f9 100644 --- a/include/aidge/operator/ConvImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef __AIDGE_CPU_OPERATOR_CONVIMPL_H__ -#define __AIDGE_CPU_OPERATOR_CONVIMPL_H__ +#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_H_ +#define AIDGE_CPU_OPERATOR_CONVIMPL_H_ #include <array> #include <memory> @@ -53,9 +53,10 @@ class ConvImpl2D_cpu : public OperatorImpl { public: NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t> &inputsSize) const override final; + NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final; NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; + void updateConsummerProducer() override final; void forward(); @@ -68,4 +69,4 @@ static Registrar<Conv_Op<2>> registrarConvImpl2D_cpu("cpu", Aidge::ConvImpl2D_cp } // namespace } // namespace Aidge -#endif /* __AIDGE_CPU_OPERATOR_CONVIMPL_H__ */ \ No newline at end of file +#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_H_ */ diff --git a/include/aidge/operator/ConvImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp similarity index 92% rename from include/aidge/operator/ConvImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp index 8c2aedca4855c1272838604757e3b2727f11edb0..bc2f10099f42cba91be8d089b66dc176fdeb7c10 100644 --- a/include/aidge/operator/ConvImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp @@ -1,162 +1,162 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef __AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H__ -#define __AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H__ - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/operator/ConvImpl.hpp" -#include "aidge/utils/Types.h" -#include <cmath> -#include <array> -#include <algorithm> - -namespace Aidge { -/** - * @brief Forward kernel for 2D Convolution on CPU backend. - * @tparam I Input data type. - * @tparam W Weight data type. - * @tparam B Bias data type. - * @tparam O Output data type. - * @param params tuple of Parameters from the Operator - * @param dims Array of input dimensions. - * @param input_ const input Tensor. - * @param weights_ const weight Tensor. - * @param biases_ const Biais Tensor. - * @param output_ Output Tensor. - */ -template <class I, class W, class B, class O> -void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters ¶ms, const std::array<DimSize_t, 4> &dims, - const void *input_, const void *weights_, const void *biases_, void *output_) { - // FIXME: missing convolution parameters as arguments - const I *input = static_cast<const I *>(input_); - const W *weights = static_cast<const W *>(weights_); - const B *biases = static_cast<const B *>(biases_); - O *output = static_cast<O *>(output_); -/* - // output H size - const std::size_t oxSize = - static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(params)[0] + std::get<0>(params)[0]) / - static_cast<float>(std::get<0>(params)[0])); - // output W size - const std::size_t oySize = - static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(params)[1] + std::get<0>(params)[1]) / - static_cast<float>(std::get<0>(params)[1])); - - // TODO: kernel computation - // output (Xout, Yout, outCh, batch) - // input (Xin, Yin, inCh, batch) - // weight (kernelX, kernelY, inCh, outCh) - // does not take Dilation parameter into account - for (std::size_t ox = 0; ox < oxSize; ++ox) { - for (std::size_t oy = 0; oy < oySize; ++oy) { - const std::size_t ix = ox * std::get<0>(params)[0]; - const std::size_t iy = oy * std::get<0>(params)[1]; - - for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) { - const std::size_t oIndex = dims[3] * (outCh + std::get<3>(params) * (oy + oySize * ox)); - B biasVal = (biases != nullptr) ? biases[outCh] : B(0); - for (std::size_t batch = 0; batch < dims[3]; ++batch) { - output[oIndex + batch] = biasVal; - } - for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) { - for (std::size_t sx = 0; sx < std::get<4>(params)[0]; ++sx) { - for (std::size_t sy = 0; sy < std::get<4>(params)[1]; ++sy) { - const std::size_t wIndex = - outCh + std::get<3>(params) * (inCh + dims[2] * (sy + std::get<4>(params)[1] * sx)); - std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx))); - for (std::size_t batch = 0; batch < dims[3]; ++batch) { - output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; - } - } - } - } - } - } - } -*/ - - - // output H size - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<5>(params)[0] + std::get<5>(params)[2] - std::get<4>(params)[0] + std::get<0>(params)[0]) / - static_cast<float>(std::get<0>(params)[0]))); - // output W size - const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<5>(params)[1] + std::get<5>(params)[3] - std::get<4>(params)[1] + std::get<0>(params)[1]) / - static_cast<float>(std::get<0>(params)[1]))); - - // TODO: kernel computation - // output (batch, outCh, Xout, Yout) - // input (batch, inCh, Xin, Yin) - // weight (outCh, inCh, kernelX, kernelY) - // does not take Dilation parameter into account - using signedsize = std::make_signed<std::size_t>::type; - for (std::size_t batch = 0; batch < dims[0]; ++batch) { - for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) { - const std::size_t oIndex = (outCh + batch*std::get<3>(params)) * oxSize * oySize; - B biasVal = (biases != nullptr) ? biases[outCh] : B(0); - std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); - for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) { - const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3]; - const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(params)[0] * std::get<4>(params)[1]; - for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(std::get<5>(params)[0] - ox * std::get<0>(params)[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (dims[2] + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(params)[0] ? std::get<4>(params)[0] : dims[2] + difx); - for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(std::get<5>(params)[1] - oy * std::get<0>(params)[1]); - const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (dims[3] + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(params)[1] ? std::get<4>(params)[1] : dims[3] + dify); - const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<5>(params)[0]; - const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<5>(params)[1]; - - if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += (weights[wIndex + 0*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 0*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 0*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 1*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 1*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 1*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 2*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 2*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 2*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]); - } else { - for (std::size_t sx = sxMin; sx < sxMax; ++sx) { - for (std::size_t sy = syMin; sy < syMax; ++sy) { - output[oIndexFull] += weights[wIndex + sx*std::get<4>(params)[1] + sy] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; - } - } - } - } - } - } - } - } -} - -namespace { -static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>); -static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>); -static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>); -} // namespace -} // namespace Aidge - -#endif /* __AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H__ */ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/ConvImpl.hpp" +#include "aidge/utils/Types.h" +#include <cmath> +#include <array> +#include <algorithm> + +namespace Aidge { +/** + * @brief Forward kernel for 2D Convolution on CPU backend. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param params tuple of Parameters from the Operator + * @param dims Array of input dimensions. + * @param input_ const input Tensor. + * @param weights_ const weight Tensor. + * @param biases_ const Biais Tensor. + * @param output_ Output Tensor. + */ +template <class I, class W, class B, class O> +void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters ¶ms, const std::array<DimSize_t, 4> &dims, + const void *input_, const void *weights_, const void *biases_, void *output_) { + // FIXME: missing convolution parameters as arguments + const I *input = static_cast<const I *>(input_); + const W *weights = static_cast<const W *>(weights_); + const B *biases = static_cast<const B *>(biases_); + O *output = static_cast<O *>(output_); +/* + // output H size + const std::size_t oxSize = + static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(params)[0] + std::get<0>(params)[0]) / + static_cast<float>(std::get<0>(params)[0])); + // output W size + const std::size_t oySize = + static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(params)[1] + std::get<0>(params)[1]) / + static_cast<float>(std::get<0>(params)[1])); + + // TODO: kernel computation + // output (Xout, Yout, outCh, batch) + // input (Xin, Yin, inCh, batch) + // weight (kernelX, kernelY, inCh, outCh) + // does not take Dilation parameter into account + for (std::size_t ox = 0; ox < oxSize; ++ox) { + for (std::size_t oy = 0; oy < oySize; ++oy) { + const std::size_t ix = ox * std::get<0>(params)[0]; + const std::size_t iy = oy * std::get<0>(params)[1]; + + for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) { + const std::size_t oIndex = dims[3] * (outCh + std::get<3>(params) * (oy + oySize * ox)); + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); + for (std::size_t batch = 0; batch < dims[3]; ++batch) { + output[oIndex + batch] = biasVal; + } + for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) { + for (std::size_t sx = 0; sx < std::get<4>(params)[0]; ++sx) { + for (std::size_t sy = 0; sy < std::get<4>(params)[1]; ++sy) { + const std::size_t wIndex = + outCh + std::get<3>(params) * (inCh + dims[2] * (sy + std::get<4>(params)[1] * sx)); + std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx))); + for (std::size_t batch = 0; batch < dims[3]; ++batch) { + output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; + } + } + } + } + } + } + } +*/ + + + // output H size + const std::size_t oxSize = + static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<5>(params)[0] + std::get<5>(params)[2] - std::get<4>(params)[0] + std::get<0>(params)[0]) / + static_cast<float>(std::get<0>(params)[0]))); + // output W size + const std::size_t oySize = + static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<5>(params)[1] + std::get<5>(params)[3] - std::get<4>(params)[1] + std::get<0>(params)[1]) / + static_cast<float>(std::get<0>(params)[1]))); + + // TODO: kernel computation + // output (batch, outCh, Xout, Yout) + // input (batch, inCh, Xin, Yin) + // weight (outCh, inCh, kernelX, kernelY) + // does not take Dilation parameter into account + using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < dims[0]; ++batch) { + for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) { + const std::size_t oIndex = (outCh + batch*std::get<3>(params)) * oxSize * oySize; + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); + std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); + for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) { + const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3]; + const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(params)[0] * std::get<4>(params)[1]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { + const signedsize difx = static_cast<signedsize>(std::get<5>(params)[0] - ox * std::get<0>(params)[0]); + const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(params)[0] ? std::get<4>(params)[0] : dims[2] + difx); + for (std::size_t oy = 0; oy < oySize; ++oy) { + const signedsize dify = static_cast<signedsize>(std::get<5>(params)[1] - oy * std::get<0>(params)[1]); + const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); + const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(params)[1] ? std::get<4>(params)[1] : dims[3] + dify); + const std::size_t oIndexFull = oIndex + ox*oySize + oy; + const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<5>(params)[0]; + const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<5>(params)[1]; + + if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { + output[oIndexFull] += (weights[wIndex + 0*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 0*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 0*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 1*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 1*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 1*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 2*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 2*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 2*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]); + } else { + for (std::size_t sx = sxMin; sx < sxMax; ++sx) { + for (std::size_t sy = syMin; sy < syMax; ++sy) { + output[oIndexFull] += weights[wIndex + sx*std::get<4>(params)[1] + sy] * + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; + } + } + } + } + } + } + } + } +} + +namespace { +static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>); +static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>); +static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/operator/FCImpl.hpp b/include/aidge/backend/cpu/operator/FCImpl.hpp similarity index 88% rename from include/aidge/operator/FCImpl.hpp rename to include/aidge/backend/cpu/operator/FCImpl.hpp index 44f53a57f0cffe6717661c4d7f96647682b25571..c69cc0b08a58877108c78d6f12c29e9089c2f665 100644 --- a/include/aidge/operator/FCImpl.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef __AIDGE_CPU_OPERATOR_FCIMPL_H__ -#define __AIDGE_CPU_OPERATOR_FCIMPL_H__ +#ifndef AIDGE_CPU_OPERATOR_FCIMPL_H_ +#define AIDGE_CPU_OPERATOR_FCIMPL_H_ #include "aidge/backend/OperatorImpl.hpp" #include "aidge/operator/FC.hpp" @@ -47,10 +47,10 @@ class FCImpl_cpu : public OperatorImpl { public: NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t> &inputsSize) const override final; + NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final; NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - + void updateConsummerProducer() override final; void forward(); void backward(); @@ -61,4 +61,4 @@ static Registrar<FC_Op> registrarFCImpl_cpu("cpu", Aidge::FCImpl_cpu::create); } } // namespace Aidge -#endif /* __AIDGE_CPU_OPERATOR_FCIMPL_H__ */ \ No newline at end of file +#endif /* AIDGE_CPU_OPERATOR_FCIMPL_H_ */ diff --git a/include/aidge/operator/FCImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp similarity index 94% rename from include/aidge/operator/FCImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp index a481e2d5f80ec9c722af7f00b688003c12a4e35a..d6acb7dfea3415a8d67384745e16ecdd8bf06324 100644 --- a/include/aidge/operator/FCImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp @@ -1,128 +1,128 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef __AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H__ -#define __AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H__ - -#include "aidge/utils/Registrar.hpp" -#include <algorithm> - -#include "aidge/operator/FCImpl.hpp" - -namespace Aidge { -// template <class I, class W, class B, class O> -// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 4>& dims, -// const void* input_, const void* weights_, const void* biases_, void* output_) { -// // FIXME: missing FC parameters as arguments -// const I* input = static_cast<const I*>(input_); -// const W* weights = static_cast<const W*>(weights_); -// const B* biases = static_cast<const B*>(biases_); -// O* output = static_cast<O*>(output_); - -// for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) { -// std::size_t oIndex = outIdx * dims[3]; -// const B bias = std::get<1>(params) ? B(0) : biases[outIdx]; -// for (std::size_t batch = 0; batch < dims[3]; ++batch) { -// output[oIndex + batch] = bias; -// } -// } - -// for (std::size_t ix = 0; ix < dims[0]; ++ix) { -// for (std::size_t iy = 0; iy < dims[1]; ++iy) { -// for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) { -// const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix)); -// for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) { -// const std::size_t oIndex = dims[3] * outCh; -// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(params) + -// outCh; // (iIndex*std::get<0>(params) + oIndex)/dims[3]; -// for (std::size_t batch = 0; batch < dims[3]; ++batch) { -// output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; -// } -// } -// } -// } -// } -// } - -// template <class I, class W, class B, class O> -// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 2>& dims, -// const void* input_, const void* weights_, const void* biases_, void* output_) { -// // FIXME: missing FC parameters as arguments -// const I* input = static_cast<const I*>(input_); -// const W* weights = static_cast<const W*>(weights_); -// const B* biases = static_cast<const B*>(biases_); -// O* output = static_cast<O*>(output_); - -// // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N] - -// for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) { -// std::size_t oIndex = outIdx * dims[0]; -// const B bias = std::get<1>(params) ? B(0) : biases[outIdx]; -// for (std::size_t batch = 0; batch < dims[0]; ++batch) { -// output[oIndex + batch] = bias; -// } -// } - -// for (std::size_t batch = 0; batch < dims[0]; ++batch) { -// const std::size_t oIndex = dims[1] * batch; -// for (std::size_t i = 0; i < dims[1]; ++i) { -// for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) { -// std::size_t wIndex = i * std::get<0>(params) + outCh; // (iIndex*std::get<0>(params) + oIndex)/dims[3]; -// output[oIndex + outCh] += weights[wIndex] * input[i + batch]; -// } -// } -// } -// } - -template <class I, class W, class B, class O> -void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const DimSize_t batchSize, const DimSize_t oneInputSize, - const void* input_, const void* weights_, const void* biases_, void* output_) { - // FIXME: missing FC parameters as arguments - const I* input = static_cast<const I*>(input_); - const W* weights = static_cast<const W*>(weights_); - const B* biases = static_cast<const B*>(biases_); - O* output = static_cast<O*>(output_); - - if (std::get<1>(params)) { - std::fill(output, output+(batchSize*std::get<0>(params)), B(0)); - } - else { - for (std::size_t batch = 0; batch < batchSize; ++batch) { - std::copy(biases, biases+std::get<0>(params), output+(batch*std::get<0>(params))); - } - } - - for (std::size_t batch = 0; batch < batchSize; ++batch) { - for (std::size_t out = 0; out < std::get<0>(params); ++out) { - output[out + batch*std::get<0>(params)] = std::inner_product(input + batch*oneInputSize, - input + (batch + 1)*oneInputSize, - weights + out*oneInputSize, - output[out + batch*std::get<0>(params)]); - } - } -} - - -namespace { -static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>); -static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::FCImpl_cpu_forward_kernel<int, int, int, int>); -static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>); -} // namespace - -} // namespace Aidge - -#endif /* __AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H__ */ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" +#include <algorithm> + +#include "aidge/backend/cpu/operator/FCImpl.hpp" + +namespace Aidge { +// template <class I, class W, class B, class O> +// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 4>& dims, +// const void* input_, const void* weights_, const void* biases_, void* output_) { +// // FIXME: missing FC parameters as arguments +// const I* input = static_cast<const I*>(input_); +// const W* weights = static_cast<const W*>(weights_); +// const B* biases = static_cast<const B*>(biases_); +// O* output = static_cast<O*>(output_); + +// for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) { +// std::size_t oIndex = outIdx * dims[3]; +// const B bias = std::get<1>(params) ? B(0) : biases[outIdx]; +// for (std::size_t batch = 0; batch < dims[3]; ++batch) { +// output[oIndex + batch] = bias; +// } +// } + +// for (std::size_t ix = 0; ix < dims[0]; ++ix) { +// for (std::size_t iy = 0; iy < dims[1]; ++iy) { +// for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) { +// const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix)); +// for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) { +// const std::size_t oIndex = dims[3] * outCh; +// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(params) + +// outCh; // (iIndex*std::get<0>(params) + oIndex)/dims[3]; +// for (std::size_t batch = 0; batch < dims[3]; ++batch) { +// output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; +// } +// } +// } +// } +// } +// } + +// template <class I, class W, class B, class O> +// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 2>& dims, +// const void* input_, const void* weights_, const void* biases_, void* output_) { +// // FIXME: missing FC parameters as arguments +// const I* input = static_cast<const I*>(input_); +// const W* weights = static_cast<const W*>(weights_); +// const B* biases = static_cast<const B*>(biases_); +// O* output = static_cast<O*>(output_); + +// // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N] + +// for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) { +// std::size_t oIndex = outIdx * dims[0]; +// const B bias = std::get<1>(params) ? B(0) : biases[outIdx]; +// for (std::size_t batch = 0; batch < dims[0]; ++batch) { +// output[oIndex + batch] = bias; +// } +// } + +// for (std::size_t batch = 0; batch < dims[0]; ++batch) { +// const std::size_t oIndex = dims[1] * batch; +// for (std::size_t i = 0; i < dims[1]; ++i) { +// for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) { +// std::size_t wIndex = i * std::get<0>(params) + outCh; // (iIndex*std::get<0>(params) + oIndex)/dims[3]; +// output[oIndex + outCh] += weights[wIndex] * input[i + batch]; +// } +// } +// } +// } + +template <class I, class W, class B, class O> +void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const DimSize_t batchSize, const DimSize_t oneInputSize, + const void* input_, const void* weights_, const void* biases_, void* output_) { + // FIXME: missing FC parameters as arguments + const I* input = static_cast<const I*>(input_); + const W* weights = static_cast<const W*>(weights_); + const B* biases = static_cast<const B*>(biases_); + O* output = static_cast<O*>(output_); + + if (std::get<1>(params)) { + std::fill(output, output+(batchSize*std::get<0>(params)), B(0)); + } + else { + for (std::size_t batch = 0; batch < batchSize; ++batch) { + std::copy(biases, biases+std::get<0>(params), output+(batch*std::get<0>(params))); + } + } + + for (std::size_t batch = 0; batch < batchSize; ++batch) { + for (std::size_t out = 0; out < std::get<0>(params); ++out) { + output[out + batch*std::get<0>(params)] = std::inner_product(input + batch*oneInputSize, + input + (batch + 1)*oneInputSize, + weights + out*oneInputSize, + output[out + batch*std::get<0>(params)]); + } + } +} + + +namespace { +static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>); +static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::FCImpl_cpu_forward_kernel<int, int, int, int>); +static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>); +} // namespace + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/operator/LeakyReLUImpl.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp similarity index 86% rename from include/aidge/operator/LeakyReLUImpl.hpp rename to include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp index dd5bc4d9452374049ab3753a0331befa9b76d2e7..abe167bea16de01f861beb9701f747d39f265d9d 100644 --- a/include/aidge/operator/LeakyReLUImpl.hpp +++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef __AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H__ -#define __AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H__ +#ifndef AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_ +#define AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_ #include "aidge/backend/OperatorImpl.hpp" #include "aidge/operator/LeakyReLU.hpp" @@ -46,10 +46,10 @@ class LeakyReLUImpl_cpu : public OperatorImpl { public: NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t>& inputsSize) const override final; + NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final; NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - + void updateConsummerProducer() override final; void forward(); void backward(); @@ -60,4 +60,4 @@ static Registrar<LeakyReLU_Op> registrarLeakyReLUImpl_cpu("cpu", Aidge::LeakyReL } } // namespace Aidge -#endif /* __AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H__ */ \ No newline at end of file +#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_ */ diff --git a/include/aidge/operator/LeakyReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp similarity index 85% rename from include/aidge/operator/LeakyReLUImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp index e41a8f20ebd3c405f7adbc9ed4ded3080c9688ce..ff9a8ac6a8f968f244429b330401d794f16fac01 100644 --- a/include/aidge/operator/LeakyReLUImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp @@ -1,45 +1,45 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef __AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H__ -#define __AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H__ - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/operator/LeakyReLUImpl.hpp" - -namespace Aidge { -template <class I, class O> -void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Parameters& params, - std::size_t inputLenght, - const void* input_, - void* output_) { - - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); - I negativeSlope = static_cast<I>(std::get<0>(params)); - - for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope; - } -} - -namespace { -static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::LeakyReLUImpl_cpu_forward_kernel<float, float>); -static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::LeakyReLUImpl_cpu_forward_kernel<int, int>); -static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::LeakyReLUImpl_cpu_forward_kernel<double, double>); -} // namespace -} // namespace Aidge - -#endif /* __AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H__ */ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" + +namespace Aidge { +template <class I, class O> +void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Parameters& params, + std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + I negativeSlope = static_cast<I>(std::get<0>(params)); + + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope; + } +} + +namespace { +static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, Aidge::LeakyReLUImpl_cpu_forward_kernel<float, float>); +static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, Aidge::LeakyReLUImpl_cpu_forward_kernel<int, int>); +static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, Aidge::LeakyReLUImpl_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/operator/ProducerImpl.hpp b/include/aidge/backend/cpu/operator/ProducerImpl.hpp similarity index 82% rename from include/aidge/operator/ProducerImpl.hpp rename to include/aidge/backend/cpu/operator/ProducerImpl.hpp index d1376df3572c986f3c2369c72141680ab6291b0b..032172dbf0995fc62ce631aa5eba1cabf2374ad3 100644 --- a/include/aidge/operator/ProducerImpl.hpp +++ b/include/aidge/backend/cpu/operator/ProducerImpl.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef __AIDGE_CPU_OPERATOR_PRODUCERIMPL_H__ -#define __AIDGE_CPU_OPERATOR_PRODUCERIMPL_H__ +#ifndef AIDGE_CPU_OPERATOR_PRODUCERIMPL_H_ +#define AIDGE_CPU_OPERATOR_PRODUCERIMPL_H_ #include <memory> @@ -34,9 +34,10 @@ class ProducerImpl_cpu : public OperatorImpl { public: NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t> &inputsSize) const override final; + NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final; NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; + void updateConsummerProducer() override final; void forward(); @@ -48,4 +49,4 @@ static Registrar<Producer_Op> registrarProducer1DImpl_cpu("cpu", Aidge::Producer } // namespace } // namespace Aidge -#endif /* __AIDGE_CPU_OPERATOR_PRODUCERIMPL_H__ */ \ No newline at end of file +#endif /* AIDGE_CPU_OPERATOR_PRODUCERIMPL_H_ */ diff --git a/include/aidge/operator/ReLUImpl.hpp b/include/aidge/backend/cpu/operator/ReLUImpl.hpp similarity index 86% rename from include/aidge/operator/ReLUImpl.hpp rename to include/aidge/backend/cpu/operator/ReLUImpl.hpp index 905a76917a25f7db0e65748d28c67ef06f353170..537bdeeaf89b388a82e819330649c2ae3445c590 100644 --- a/include/aidge/operator/ReLUImpl.hpp +++ b/include/aidge/backend/cpu/operator/ReLUImpl.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef __AIDGE_CPU_OPERATOR_RELUIMPL_H__ -#define __AIDGE_CPU_OPERATOR_RELUIMPL_H__ +#ifndef AIDGE_CPU_OPERATOR_RELUIMPL_H_ +#define AIDGE_CPU_OPERATOR_RELUIMPL_H_ #include "aidge/backend/OperatorImpl.hpp" #include "aidge/operator/ReLU.hpp" @@ -46,10 +46,10 @@ class ReLUImpl_cpu : public OperatorImpl { public: NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t>& inputsSize) const override final; + NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final; NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - + void updateConsummerProducer() override final; void forward(); void backward(); @@ -60,4 +60,4 @@ static Registrar<ReLU_Op> registrarReLUImpl_cpu("cpu", Aidge::ReLUImpl_cpu::crea } } // namespace Aidge -#endif /* __AIDGE_CPU_OPERATOR_RELUIMPL_H__ */ \ No newline at end of file +#endif /* AIDGE_CPU_OPERATOR_RELUIMPL_H_ */ diff --git a/include/aidge/operator/ReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp similarity index 84% rename from include/aidge/operator/ReLUImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp index 640455a43791c72fcb4832987e1a035239f746af..955099a6fe76352e6ea692b99a2a2d1561a30a6d 100644 --- a/include/aidge/operator/ReLUImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp @@ -1,43 +1,43 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef __AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H__ -#define __AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H__ - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/operator/ReLUImpl.hpp" - -namespace Aidge { -template <class I, class O> -void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght, - const void* input_, - void* output_) { - - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); - - for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = input[i] > 0 ? input[i] : 0; - } -} - -namespace { -static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::ReLUImpl_cpu_forward_kernel<float, float>); -static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::ReLUImpl_cpu_forward_kernel<int, int>); -static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::ReLUImpl_cpu_forward_kernel<double, double>); -} // namespace -} // namespace Aidge - -#endif /* __AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H__ */ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/ReLUImpl.hpp" + +namespace Aidge { +template <class I, class O> +void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = input[i] > 0 ? input[i] : 0; + } +} + +namespace { +static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, Aidge::ReLUImpl_cpu_forward_kernel<float, float>); +static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, Aidge::ReLUImpl_cpu_forward_kernel<int, int>); +static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, Aidge::ReLUImpl_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/operator/SoftmaxImpl.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp similarity index 86% rename from include/aidge/operator/SoftmaxImpl.hpp rename to include/aidge/backend/cpu/operator/SoftmaxImpl.hpp index c4d718bbadf09bfefbd4509ad0b99ffc144b4e61..08567ab98e55233f1f578e82cb39ac5681f0a839 100644 --- a/include/aidge/operator/SoftmaxImpl.hpp +++ b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef __AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H__ -#define __AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H__ +#ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ +#define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ #include "aidge/backend/OperatorImpl.hpp" #include "aidge/operator/Softmax.hpp" @@ -46,10 +46,10 @@ class SoftmaxImpl_cpu : public OperatorImpl { public: NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t>& inputsSize) const override final; + NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final; NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - + void updateConsummerProducer() override final; void forward(); void backward(); @@ -60,4 +60,4 @@ static Registrar<Softmax_Op> registrarSoftmaxImpl_cpu("cpu", Aidge::SoftmaxImpl_ } } // namespace Aidge -#endif /* __AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H__ */ \ No newline at end of file +#endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ */ diff --git a/include/aidge/operator/SoftmaxImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp similarity index 88% rename from include/aidge/operator/SoftmaxImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp index d1634e28a9b57cf2f2d486237947779b41e121bd..297a3a321667dfc8c5a2bb0e3fc3bebce8825950 100644 --- a/include/aidge/operator/SoftmaxImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp @@ -1,64 +1,64 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef __AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H__ -#define __AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H__ - -#include "aidge/utils/Registrar.hpp" -#include <cstddef> -#include <cmath> -#include "aidge/data/Data.hpp" -#include "aidge/utils/Types.h" - -#include "aidge/operator/SoftmaxImpl.hpp" - -namespace Aidge { -template <class I, class O> -void SoftmaxImpl_cpu_forward_kernel(const DimSize_t batchSize, - const DimSize_t channelSize, - const DimSize_t featureSize, - const void* input_, - void* output_) { - - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); - - for (std::size_t batch = 0; batch < batchSize; ++batch) { - for (std::size_t feature = 0; feature < featureSize; ++feature) { - std::size_t ioIndex = batch*channelSize*featureSize + feature; - - I sum(0.0); - for (std::size_t ch = 0; ch < channelSize; ++ch) { - output[ioIndex] = std::exp(input[ioIndex]); - sum += output[ioIndex]; - ioIndex+=featureSize; - } - - ioIndex = batch*channelSize*featureSize + feature; - for (std::size_t ch = 0; ch < channelSize; ++ch) { - output[ioIndex] /= sum; - ioIndex += featureSize; - } - } - } -} - -namespace { -static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>); -static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::SoftmaxImpl_cpu_forward_kernel<int, int>); -static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>); -} // namespace -} // namespace Aidge - -#endif /* __AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H__ */ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" +#include <cstddef> +#include <cmath> +#include "aidge/data/Data.hpp" +#include "aidge/utils/Types.h" + +#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" + +namespace Aidge { +template <class I, class O> +void SoftmaxImpl_cpu_forward_kernel(const DimSize_t batchSize, + const DimSize_t channelSize, + const DimSize_t featureSize, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + for (std::size_t batch = 0; batch < batchSize; ++batch) { + for (std::size_t feature = 0; feature < featureSize; ++feature) { + std::size_t ioIndex = batch*channelSize*featureSize + feature; + + I sum(0.0); + for (std::size_t ch = 0; ch < channelSize; ++ch) { + output[ioIndex] = std::exp(input[ioIndex]); + sum += output[ioIndex]; + ioIndex+=featureSize; + } + + ioIndex = batch*channelSize*featureSize + feature; + for (std::size_t ch = 0; ch < channelSize; ++ch) { + output[ioIndex] /= sum; + ioIndex += featureSize; + } + } + } +} + +namespace { +static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>); +static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, Aidge::SoftmaxImpl_cpu_forward_kernel<int, int>); +static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_ */ diff --git a/python_binding/pybind_cpu.cpp b/python_binding/pybind_cpu.cpp index afe125154979849d61038d918a669679ac2a4b91..4a325bf51716ee6a920b3fcbde394b3e5b7c1d0f 100644 --- a/python_binding/pybind_cpu.cpp +++ b/python_binding/pybind_cpu.cpp @@ -1,6 +1,6 @@ #include <pybind11/pybind11.h> // Need to call this header to register every impl -#include "aidge/aidge_backend_cpu.hpp" +#include "aidge/backend/cpu.hpp" namespace py = pybind11; diff --git a/setup.ps1 b/setup.ps1 new file mode 100644 index 0000000000000000000000000000000000000000..748739834ff10802085d68c3360a87978cf7e8a7 --- /dev/null +++ b/setup.ps1 @@ -0,0 +1,38 @@ +# Helper setup tool to automatically build aidge_backend_cpu on Windows. + +# Requirements +################################################################################ +# aidge_core must be installed first in $env:AIDGE_INSTALL_PATH. + +# Enable or disable automatic installation of requirements +# Run .\setup.ps1 -install_reqs:$false to disable it +param ([bool]$install_reqs=$true) + +if (-not $env:AIDGE_INSTALL_PATH) +{ + Write-Error -Message "AIDGE_INSTALL_PATH environment variable must be set to aidge_core install path." -ErrorAction Stop +} + +# 1. Setup environment +################################################################################ +if ($install_reqs) +{ + # No additional dependencies +} + +# 2. Compile & install aidge_core +################################################################################ +$env:CMAKE_PREFIX_PATH=$env:AIDGE_INSTALL_PATH +mkdir -Force build_cpp +mkdir -Force $env:AIDGE_INSTALL_PATH +Set-Location build_cpp +cmake -DCMAKE_INSTALL_PREFIX:PATH=$env:AIDGE_INSTALL_PATH -DCMAKE_BUILD_TYPE=Debug .. +if(!$?) { $lastError = $LASTEXITCODE; Set-Location $PSScriptRoot; Exit $lastError } +cmake --build . -j2 +if(!$?) { $lastError = $LASTEXITCODE; Set-Location $PSScriptRoot; Exit $lastError } +cmake --install . --config Debug +if(!$?) { $lastError = $LASTEXITCODE; Set-Location $PSScriptRoot; Exit $lastError } +# Optional: run the unit tests +ctest --output-on-failure +if(!$?) { $lastError = $LASTEXITCODE; Set-Location $PSScriptRoot; Exit $lastError } +Set-Location $PSScriptRoot diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp index f4e08ba540b814a81be9cbea74ebc7644f6f843a..d3da42185237a59146af17199e34a00dbebd6d96 100644 --- a/src/operator/AddImpl.cpp +++ b/src/operator/AddImpl.cpp @@ -16,11 +16,11 @@ #include <vector> #include "aidge/operator/Conv.hpp" - -#include "aidge/operator/AddImpl.hpp" -#include "aidge/operator/AddImpl_forward_kernels.hpp" #include "aidge/utils/Types.h" +#include "aidge/backend/cpu/operator/AddImpl.hpp" +#include "aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp" + ////////////////////////////////// // AddImpl_cpu<1> ////////////////////////////////// @@ -48,7 +48,13 @@ Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbConsumedData(Aidge::IOIndex_t /*inpu Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { return mNbProducedData[0]; } +void Aidge::AddImpl_cpu<1>::updateConsummerProducer(){ + // Update producer-consumer data + for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass + mNbProducedData[0]+= getRequiredMemory(0, {}); +} void Aidge::AddImpl_cpu<1>::forward() { // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); @@ -63,11 +69,6 @@ void Aidge::AddImpl_cpu<1>::forward() { mOp.getInput(0)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - // Update producer-consumer data - for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); } void Aidge::AddImpl_cpu<1>::backward() { @@ -95,7 +96,7 @@ Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredProtected(const Aidge::IOInd return 0; } -Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, __attribute__((unused)) const std::vector<Aidge::DimSize_t>& inputsSize) const { +Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { // Requires the whole tensors, regardless of available data on inputs assert(outputIdx == 0 && "operator has only one output"); @@ -112,7 +113,13 @@ Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbConsumedData(Aidge::IOIndex_t inputI Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { return mNbProducedData[0]; } +void Aidge::AddImpl_cpu<2>::updateConsummerProducer(){ + // Update producer-consumer data + for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass + mNbProducedData[0]+= getRequiredMemory(0, {}); +} void Aidge::AddImpl_cpu<2>::forward() { // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); @@ -130,11 +137,6 @@ void Aidge::AddImpl_cpu<2>::forward() { mOp.mInputs[1]->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - // Update producer-consumer data - for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx]+= getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); } void Aidge::AddImpl_cpu<2>::backward() { @@ -180,7 +182,13 @@ Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbProducedData(Aidge::IOIndex_t output assert(static_cast<std::size_t>(outputIdx) < mNbProducedData.size()); return mNbProducedData[static_cast<std::size_t>(outputIdx)]; } +void Aidge::AddImpl_cpu<3>::updateConsummerProducer(){ + // Update producer-consumer data + for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass + mNbProducedData[0]+= getRequiredMemory(0, {}); +} void Aidge::AddImpl_cpu<3>::forward() { // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); @@ -201,13 +209,8 @@ void Aidge::AddImpl_cpu<3>::forward() { mOp.mInputs[2]->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - // Update producer-consumer data - for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); } void Aidge::AddImpl_cpu<3>::backward() { printf("Not implemented yet.\n"); -} \ No newline at end of file +} diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp index 2e1e901d35f2ac8620f1c4be53413ce58e9260f9..6c434a5c38853a1dee66db5be95b6b1bfdde8162 100644 --- a/src/operator/AvgPoolingImpl.cpp +++ b/src/operator/AvgPoolingImpl.cpp @@ -9,16 +9,16 @@ * ********************************************************************************/ -#include "aidge/operator/AvgPoolingImpl.hpp" - #include <cassert> #include <numeric> #include <thread> #include <vector> -#include "aidge/operator/AvgPoolingImpl_forward_kernels.hpp" -#include "aidge/operator/AvgPooling.hpp" #include "aidge/utils/Types.h" +#include "aidge/operator/AvgPooling.hpp" + +#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" +#include "aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp" Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { assert(mOp.getInput(inputIdx) && "requires valid input"); @@ -39,6 +39,7 @@ Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getRequiredMemory(const Aidge::IOIn const std::vector<Aidge::DimSize_t> & /*inputsSize*/) const { // Requires the whole tensors, regardless of available data on inputs assert(outputIdx == 0 && "operator has only one output"); + (void) outputIdx; const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); @@ -53,7 +54,13 @@ Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbProducedData(Aidge::IOIndex_t assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size())); return mNbProducedData[static_cast<std::size_t>(outputIdx)]; } - +void Aidge::AvgPoolingImpl2D_cpu::updateConsummerProducer(){ + // Update producer-consumer data + for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum + // amount for a forward pass + mNbProducedData[0] += getRequiredMemory(0, {}); +} void Aidge::AvgPoolingImpl2D_cpu::forward() { // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); @@ -68,13 +75,6 @@ void Aidge::AvgPoolingImpl2D_cpu::forward() { mOp.getInput(0)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - - // Update producer-consumer data - for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum - // amount for a forward pass - - mNbProducedData[0] += getRequiredMemory(0, {}); } void Aidge::AvgPoolingImpl2D_cpu::backward() { printf("Not implemented yet.\n"); } diff --git a/src/operator/BatchNormImpl.cpp b/src/operator/BatchNormImpl.cpp index 5bb7d0a9d36e3f7918ce1a5aa6ae4e9dbb96e9a1..a0d4d032ded9ede1b2dba307aa967af330167d25 100644 --- a/src/operator/BatchNormImpl.cpp +++ b/src/operator/BatchNormImpl.cpp @@ -9,15 +9,15 @@ * ********************************************************************************/ -#include "aidge/operator/BatchNormImpl.hpp" - #include <cassert> #include <numeric> // std::accumulate #include <vector> -#include "aidge/operator/BatchNormImpl_forward_kernels.hpp" -#include "aidge/operator/BatchNorm.hpp" #include "aidge/utils/Types.h" +#include "aidge/operator/BatchNorm.hpp" + +#include "aidge/backend/cpu/operator/BatchNormImpl.hpp" +#include "aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp" Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { assert(mOp.getInput(inputIdx) && "requires valid input"); @@ -34,10 +34,11 @@ Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*i return 0; } -Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getRequiredMemory(__attribute__((unused)) const Aidge::IOIndex_t outputIdx, - __attribute__((unused)) const std::vector<Aidge::DimSize_t> &inputsSize) const { +Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, + const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const { // Requires the whole tensors, regardless of available data on inputs assert(outputIdx == 0 && "operator has only one output"); + (void) outputIdx; const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); @@ -52,7 +53,14 @@ Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbProducedData(Aidge::IOIndex_t o assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size())); return mNbProducedData[static_cast<std::size_t>(outputIdx)]; } +void Aidge::BatchNormImpl2D_cpu::updateConsummerProducer(){ + // Update producer-consumer data + for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum + // amount for a forward pass + mNbProducedData[0] += getRequiredMemory(0, {}); +} void Aidge::BatchNormImpl2D_cpu::forward() { // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); @@ -78,12 +86,7 @@ void Aidge::BatchNormImpl2D_cpu::forward() { mOp.getOutput(0)->getImpl()->rawPtr(), true); - // Update producer-consumer data - for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum - // amount for a forward pass - mNbProducedData[0] += getRequiredMemory(0, {}); } void Aidge::BatchNormImpl2D_cpu::backward() { printf("Not implemented yet.\n"); } diff --git a/src/operator/ConvDepthWiseImpl.cpp b/src/operator/ConvDepthWiseImpl.cpp index 178d602ffa73c00efef596dc2d31f51619b6600d..3e920cf68366b82bce8df29c8aea0c838e6a1364 100644 --- a/src/operator/ConvDepthWiseImpl.cpp +++ b/src/operator/ConvDepthWiseImpl.cpp @@ -9,17 +9,17 @@ * ********************************************************************************/ -#include "aidge/operator/ConvDepthWiseImpl.hpp" - #include <cassert> #include <chrono> // std::chrono::milliseconds #include <numeric> // std::accumulate #include <thread> // std::this_thread::sleep_for #include <vector> -#include "aidge/operator/ConvDepthWiseImpl_forward_kernels.hpp" -#include "aidge/operator/ConvDepthWise.hpp" #include "aidge/utils/Types.h" +#include "aidge/operator/ConvDepthWise.hpp" + +#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" +#include "aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp" Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { assert(mOp.getInput(inputIdx) && "requires valid input"); @@ -36,10 +36,11 @@ Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredProtected(IOIndex_t return 0; } -Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getRequiredMemory(__attribute__((unused)) const Aidge::IOIndex_t outputIdx, - __attribute__((unused)) const std::vector<Aidge::DimSize_t> &inputsSize) const { +Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, + const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const { // Requires the whole tensors, regardless of available data on inputs assert(outputIdx == 0 && "operator has only one output"); + (void) outputIdx; const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); @@ -54,7 +55,14 @@ Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbProducedData(Aidge::IOIndex assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size())); return mNbProducedData[static_cast<std::size_t>(outputIdx)]; } +void Aidge::ConvDepthWiseImpl2D_cpu::updateConsummerProducer(){ + // Update producer-consumer data + for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum + // amount for a forward pass + mNbProducedData[0] += getRequiredMemory(0, {}); +} void Aidge::ConvDepthWiseImpl2D_cpu::forward() { // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); @@ -72,14 +80,6 @@ void Aidge::ConvDepthWiseImpl2D_cpu::forward() { kernelFunc(mOp.getParams(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.getInput(1)->getImpl()->rawPtr(), mOp.getInput(2)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - - - // Update producer-consumer data - for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum - // amount for a forward pass - - mNbProducedData[0] += getRequiredMemory(0, {}); } void Aidge::ConvDepthWiseImpl2D_cpu::backward() { printf("Not implemented yet.\n"); } diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index 58c83da14aeca4ae9104dea525a4ad236243775f..b4ddf80929923a9c2c5998ac8614ebb0d3afe000 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -9,17 +9,17 @@ * ********************************************************************************/ -#include "aidge/operator/ConvImpl.hpp" - #include <cassert> #include <chrono> // std::chrono::milliseconds #include <numeric> // std::accumulate #include <thread> // std::this_thread::sleep_for #include <vector> -#include "aidge/operator/ConvImpl_forward_kernels.hpp" -#include "aidge/operator/Conv.hpp" #include "aidge/utils/Types.h" +#include "aidge/operator/Conv.hpp" + +#include "aidge/backend/cpu/operator/ConvImpl.hpp" +#include "aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp" Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { assert(mOp.getInput(inputIdx) && "requires valid input"); @@ -36,10 +36,11 @@ Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputI return 0; } -Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getRequiredMemory(__attribute__((unused)) const Aidge::IOIndex_t outputIdx, - __attribute__((unused)) const std::vector<Aidge::DimSize_t> &inputsSize) const { +Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, + const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const { // Requires the whole tensors, regardless of available data on inputs assert(outputIdx == 0 && "operator has only one output"); + (void) outputIdx; const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); @@ -54,7 +55,14 @@ Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbProducedData(Aidge::IOIndex_t output assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size())); return mNbProducedData[static_cast<std::size_t>(outputIdx)]; } +void Aidge::ConvImpl2D_cpu::updateConsummerProducer(){ + // Update producer-consumer data + for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum + // amount for a forward pass + mNbProducedData[0] += getRequiredMemory(0, {}); +} void Aidge::ConvImpl2D_cpu::forward() { // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); @@ -71,15 +79,7 @@ void Aidge::ConvImpl2D_cpu::forward() { mOp.getInput(0)->getImpl()->rawPtr(), mOp.getInput(1)->getImpl()->rawPtr(), mOp.getInput(2)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - // FIXME: Dummy wait for some earlier scheduler tests - std::this_thread::sleep_for(std::chrono::milliseconds(mOp.get<ConvParam::OutChannels>())); - - // Update producer-consumer data - for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum - // amount for a forward pass - mNbProducedData[0] += getRequiredMemory(0, {}); } void Aidge::ConvImpl2D_cpu::backward() { printf("Not implemented yet.\n"); } diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp index aa47296931302cff379f8e296a5ab527b0f2477b..086902be0ab1c2027a8c62c143bc27921e5e9e1b 100644 --- a/src/operator/FCImpl.cpp +++ b/src/operator/FCImpl.cpp @@ -16,10 +16,11 @@ #include <vector> #include "aidge/operator/FC.hpp" -#include "aidge/operator/FCImpl.hpp" -#include "aidge/operator/FCImpl_forward_kernels.hpp" #include "aidge/utils/Types.h" +#include "aidge/backend/cpu/operator/FCImpl.hpp" +#include "aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp" + Aidge::NbElts_t Aidge::FCImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { assert(mOp.getInput(inputIdx) && "requires valid input"); @@ -44,10 +45,11 @@ Aidge::NbElts_t } Aidge::NbElts_t Aidge::FCImpl_cpu::getRequiredMemory( - __attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t> &inputsSize) const + const IOIndex_t outputIdx, const std::vector<DimSize_t> &/*inputsSize*/) const { // Requires the whole tensors, regardless of available data on inputs assert(outputIdx == 0 && "operator has only one output"); + (void) outputIdx; const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); return std::accumulate( @@ -69,6 +71,16 @@ Aidge::NbElts_t Aidge::FCImpl_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx) return mNbProducedData[static_cast<std::size_t>(outputIdx)]; } +void Aidge::FCImpl_cpu::updateConsummerProducer(){ + // Update producer-consumer data + for (IOIndex_t inputIdx = 0; static_cast<std::size_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx] + += getNbRequiredData(static_cast<std::size_t>(inputIdx)); // each input is consumed by the minimum + // amount for a forward pass + + mNbProducedData[0] += getRequiredMemory(0, {}); +} + void Aidge::FCImpl_cpu::forward() { // FIXME: uncomment the following code once memory handling will work @@ -93,7 +105,7 @@ void Aidge::FCImpl_cpu::forward() // mOp.mInputs[2]->getImpl()->rawPtr(), // mOp.getOutput(0)->getImpl()->rawPtr()); // } - // else + // else kernelFunc( mOp.getParams(), mOp.getInput(0)->dims()[0], @@ -102,19 +114,8 @@ void Aidge::FCImpl_cpu::forward() mOp.mInputs[1]->getImpl()->rawPtr(), mOp.mInputs[2]->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - - - - // FIXME: Dummy wait for some earlier scheduler tests - std::this_thread::sleep_for(std::chrono::milliseconds(mOp.get<FCParam::OutChannels>())); - // Update producer-consumer data - for (IOIndex_t inputIdx = 0; static_cast<std::size_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx] - += getNbRequiredData(static_cast<std::size_t>(inputIdx)); // each input is consumed by the minimum - // amount for a forward pass - mNbProducedData[0] += getRequiredMemory(0, {}); } void Aidge::FCImpl_cpu::backward() diff --git a/src/operator/LeakyReLUImpl.cpp b/src/operator/LeakyReLUImpl.cpp index 8c88e1f7a507cc426416d53dc895dce077ead415..f6a44d381081c7c7f1dcbbf02d91212168cc07aa 100644 --- a/src/operator/LeakyReLUImpl.cpp +++ b/src/operator/LeakyReLUImpl.cpp @@ -13,14 +13,13 @@ #include <chrono> // std::chrono::milliseconds #include <numeric> // std::accumulate #include <thread> // std::this_thread::sleep_for +#include <vector> #include "aidge/operator/LeakyReLU.hpp" - -#include "aidge/operator/LeakyReLUImpl.hpp" -#include "aidge/operator/LeakyReLUImpl_forward_kernels.hpp" #include "aidge/utils/Types.h" -#include <numeric> -#include <vector> + +#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" +#include "aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp" // FIXME: replace whole Tensor with minimum needed data quantity Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const { @@ -38,7 +37,7 @@ Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredProtected(const Aidge::IO return 0; } -Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getRequiredMemory(__attribute__((unused)) const Aidge::IOIndex_t outputIdx, __attribute__((unused)) const std::vector<Aidge::DimSize_t> &inputsSize) const { +Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const { const auto& outputDims = mOp.getOutput(0)->dims(); return std::accumulate(outputDims.begin(), outputDims.end(), static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); @@ -51,7 +50,11 @@ Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbConsumedData(Aidge::IOIndex_t /*i Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { return mNbProducedData[0]; } +void Aidge::LeakyReLUImpl_cpu::updateConsummerProducer(){ + mNbConsumedData[0]+= getNbRequiredData(0); // each input is consumed by the minimum amount for a forward pass + mNbProducedData[0]+= getRequiredMemory(0, {}); +} void Aidge::LeakyReLUImpl_cpu::forward() { // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); @@ -66,11 +69,6 @@ void Aidge::LeakyReLUImpl_cpu::forward() { std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), mOp.getInput(0)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - - - mNbConsumedData[0]+= getNbRequiredData(0); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); } void Aidge::LeakyReLUImpl_cpu::backward() { diff --git a/src/operator/ProducerImpl.cpp b/src/operator/ProducerImpl.cpp index 69e4ba281cf8ddf7fb273da6508a42f03b5074c8..664f3745414380fbaf5654ab035ba2ab957da87b 100644 --- a/src/operator/ProducerImpl.cpp +++ b/src/operator/ProducerImpl.cpp @@ -17,7 +17,7 @@ #include "aidge/operator/Producer.hpp" #include "aidge/utils/Types.h" -#include "aidge/operator/ProducerImpl.hpp" +#include "aidge/backend/cpu/operator/ProducerImpl.hpp" std::size_t Aidge::ProducerImpl_cpu::getNbRequiredData( @@ -42,10 +42,11 @@ std::size_t Aidge::ProducerImpl_cpu::getNbRequiredProtected( std::size_t Aidge::ProducerImpl_cpu::getRequiredMemory( - __attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t> &inputsSize) const + const IOIndex_t outputIdx, const std::vector<DimSize_t> &/*inputsSize*/) const { // Requires the whole tensors, regardless of available data on inputs assert(outputIdx == 0 && "operator has only one output"); + (void) outputIdx; const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); return std::accumulate( @@ -60,7 +61,8 @@ Aidge::DimSize_t Aidge::ProducerImpl_cpu::getNbProducedData( { return getRequiredMemory(0, {}); } - +void Aidge::ProducerImpl_cpu::updateConsummerProducer(){ +} void Aidge::ProducerImpl_cpu::forward() { @@ -70,4 +72,4 @@ void Aidge::ProducerImpl_cpu::forward() void Aidge::ProducerImpl_cpu::backward() { printf("Not implemented yet.\n"); -} \ No newline at end of file +} diff --git a/src/operator/ReLUImpl.cpp b/src/operator/ReLUImpl.cpp index 9cbf0efe07bc15791e3bfcc28d86e1463d908def..cea50bc1e72cfa8e60cdd0f1839c03bcd568e052 100644 --- a/src/operator/ReLUImpl.cpp +++ b/src/operator/ReLUImpl.cpp @@ -13,14 +13,13 @@ #include <chrono> // std::chrono::milliseconds #include <numeric> // std::accumulate #include <thread> // std::this_thread::sleep_for +#include <vector> #include "aidge/operator/ReLU.hpp" - -#include "aidge/operator/ReLUImpl.hpp" -#include "aidge/operator/ReLUImpl_forward_kernels.hpp" #include "aidge/utils/Types.h" -#include <numeric> -#include <vector> + +#include "aidge/backend/cpu/operator/ReLUImpl.hpp" +#include "aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp" // FIXME: replace whole Tensor with minimum needed data quantity Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const { @@ -38,7 +37,7 @@ Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex return 0; } -Aidge::NbElts_t Aidge::ReLUImpl_cpu::getRequiredMemory(__attribute__((unused)) const Aidge::IOIndex_t outputIdx, __attribute__((unused)) const std::vector<Aidge::DimSize_t> &inputsSize) const { +Aidge::NbElts_t Aidge::ReLUImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const { const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); return std::accumulate(outputDims.begin(), outputDims.end(), static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); @@ -51,7 +50,11 @@ Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbConsumedData(Aidge::IOIndex_t /*inputI Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { return mNbProducedData[0]; } +void Aidge::ReLUImpl_cpu::updateConsummerProducer(){ + mNbConsumedData[0]+= getNbRequiredData(0); // each input is consumed by the minimum amount for a forward pass + mNbProducedData[0]+= getRequiredMemory(0, {}); +} void Aidge::ReLUImpl_cpu::forward() { // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); @@ -66,10 +69,6 @@ void Aidge::ReLUImpl_cpu::forward() { mOp.getInput(0)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr()); - - mNbConsumedData[0]+= getNbRequiredData(0); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); } void Aidge::ReLUImpl_cpu::backward() { diff --git a/src/operator/SoftmaxImpl.cpp b/src/operator/SoftmaxImpl.cpp index b0f978c9e928209d843aec6795f77e92ff57b263..03e8f9305617f6a7ced878470e3c94ba625f5b22 100644 --- a/src/operator/SoftmaxImpl.cpp +++ b/src/operator/SoftmaxImpl.cpp @@ -13,14 +13,13 @@ #include <chrono> // std::chrono::milliseconds #include <numeric> // std::accumulate #include <thread> // std::this_thread::sleep_for +#include <vector> #include "aidge/operator/Softmax.hpp" - -#include "aidge/operator/SoftmaxImpl.hpp" -#include "aidge/operator/SoftmaxImpl_forward_kernels.hpp" #include "aidge/utils/Types.h" -#include <numeric> -#include <vector> + +#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" +#include "aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp" // FIXME: replace whole Tensor with minimum needed data quantity Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const { @@ -38,7 +37,7 @@ Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbRequiredProtected(const Aidge::IOIn return 0; } -Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getRequiredMemory(__attribute__((unused)) const Aidge::IOIndex_t outputIdx, __attribute__((unused)) const std::vector<Aidge::DimSize_t> &inputsSize) const { +Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const { const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); return std::accumulate(outputDims.begin(), outputDims.end(), static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); @@ -51,7 +50,14 @@ Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbConsumedData(Aidge::IOIndex_t /*inp Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { return mNbProducedData[0]; } +void Aidge::SoftmaxImpl_cpu::updateConsummerProducer(){ + // Update producer-consumer data + for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum + // amount for a forward pass + mNbProducedData[0] += getRequiredMemory(0, {}); +} void Aidge::SoftmaxImpl_cpu::forward() { // FIXME: uncomment the following code once memory handling will work assert(mOp.getInput(0) && "missing input #0"); diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt index 62f99c1c368736fac64e85818034f3db8ce88a2c..671cdd5ac1262ab61b35a70a234236aff4a3cc15 100644 --- a/unit_tests/CMakeLists.txt +++ b/unit_tests/CMakeLists.txt @@ -1,4 +1,3 @@ - Include(FetchContent) FetchContent_Declare( @@ -10,7 +9,7 @@ FetchContent_Declare( FetchContent_MakeAvailable(Catch2) file(GLOB_RECURSE src_files "*.cpp") -message(STATUS "TEST FILES : ${src_files}") + add_executable(tests${module_name} ${src_files}) target_link_libraries(tests${module_name} PUBLIC ${module_name}) diff --git a/unit_tests/Test_Scheduler.cpp b/unit_tests/Test_Scheduler.cpp index 055f4efef8985bc5c0def2f5d397e3e5f3ce96d8..78ab8d5b149e8f702558658fef0442f225de3813 100644 --- a/unit_tests/Test_Scheduler.cpp +++ b/unit_tests/Test_Scheduler.cpp @@ -18,7 +18,8 @@ #include "aidge/graph/GraphView.hpp" #include "aidge/graph/OpArgs.hpp" #include "aidge/scheduler/Scheduler.hpp" -#include "aidge/aidge_backend_cpu.hpp" + +#include "aidge/backend/cpu.hpp" using namespace Aidge; diff --git a/unit_tests/Test_TensorImpl.cpp b/unit_tests/Test_TensorImpl.cpp index d5aa94d0ea2053e8e977de86cab4d3e87337a8fb..f55e25f55359cbcbcb9a53e916b345d1fb5a6b22 100644 --- a/unit_tests/Test_TensorImpl.cpp +++ b/unit_tests/Test_TensorImpl.cpp @@ -14,7 +14,7 @@ #include <catch2/catch_test_macros.hpp> #include "aidge/data/Tensor.hpp" -#include "aidge/data/TensorImpl.hpp" +#include "aidge/backend/cpu/data/TensorImpl.hpp" using namespace Aidge; @@ -57,4 +57,30 @@ TEST_CASE("Tensor creation") { } } } +TEST_CASE("Tensor fill") { + SECTION("Instantiate batches independantly") { + // initialization with 0s + std::shared_ptr<Tensor> concatenatedTensor= std::make_shared<Tensor>(Array2D<int, 3, 5>{}); + //concatenatedTensor->print(); + + std::shared_ptr<Tensor> myTensor1 = std::make_shared<Tensor>(Array1D<int, 5>{{1,2,3,4,5}}); + std::shared_ptr<Tensor> myTensor2 = std::make_shared<Tensor>(Array1D<int, 5>{{6,7,8,9,10}}); + std::shared_ptr<Tensor> myTensor3 = std::make_shared<Tensor>(Array1D<int, 5>{{11,12,13,14,15}}); + + // use copy function from implementation + concatenatedTensor->getImpl()->copy(myTensor1->getImpl()->rawPtr(), 5, 0); + concatenatedTensor->getImpl()->copy(myTensor2->getImpl()->rawPtr(), 5, 5); + concatenatedTensor->getImpl()->copy(myTensor3->getImpl()->rawPtr(), 5, 10); + // concatenatedTensor->print(); + + std::shared_ptr<Tensor> expectedTensor= std::make_shared<Tensor>(Array2D<int, 3, 5>{ + {{1,2,3,4,5}, + {6,7,8,9,10}, + {11,12,13,14,15}} + }); + // expectedTensor->print(); + + REQUIRE(*concatenatedTensor == *expectedTensor); + } +} diff --git a/unit_tests/operator/Test_AddImpl.cpp b/unit_tests/operator/Test_AddImpl.cpp index 3443ac6ba936106eebc9a1a26299d31fc457f32f..e24d7ac6bd97586ebdeddce5ccb75807ddf530f0 100644 --- a/unit_tests/operator/Test_AddImpl.cpp +++ b/unit_tests/operator/Test_AddImpl.cpp @@ -12,10 +12,10 @@ #include <catch2/catch_test_macros.hpp> #include "aidge/data/Tensor.hpp" -#include "aidge/data/TensorImpl.hpp" -#include "aidge/aidge_backend_cpu.hpp" #include "aidge/operator/Add.hpp" +#include "aidge/backend/cpu.hpp" + using namespace Aidge; TEST_CASE("[cpu/operator] Add(forward)") { diff --git a/unit_tests/operator/Test_AvgPoolingImpl.cpp b/unit_tests/operator/Test_AvgPoolingImpl.cpp index 178f1ba2f5e54d3df6dba2ec4d58f7bce718e7d1..10d4c09b32528e2cdcdbf2c56204e6911fca0187 100644 --- a/unit_tests/operator/Test_AvgPoolingImpl.cpp +++ b/unit_tests/operator/Test_AvgPoolingImpl.cpp @@ -14,10 +14,10 @@ #include <cstdlib> #include "aidge/data/Tensor.hpp" -#include "aidge/data/TensorImpl.hpp" -#include "aidge/aidge_backend_cpu.hpp" #include "aidge/operator/AvgPooling.hpp" +#include "aidge/backend/cpu.hpp" + using namespace Aidge; TEST_CASE("[cpu/operator] AvgPooling(forward)") { diff --git a/unit_tests/operator/Test_BatchNormImpl.cpp b/unit_tests/operator/Test_BatchNormImpl.cpp index 9436ceb3dd49b1984cf2ba67cd91b08143d59fc8..e6107a028e0c3d62f69821ff2650b45f34da103f 100644 --- a/unit_tests/operator/Test_BatchNormImpl.cpp +++ b/unit_tests/operator/Test_BatchNormImpl.cpp @@ -13,10 +13,10 @@ #include <memory> #include "aidge/data/Tensor.hpp" -#include "aidge/data/TensorImpl.hpp" -#include "aidge/aidge_backend_cpu.hpp" #include "aidge/operator/BatchNorm.hpp" +#include "aidge/backend/cpu.hpp" + using namespace Aidge; TEST_CASE("[cpu/operator] BatchNorm(forward)") { diff --git a/unit_tests/operator/Test_ConvDepthWiseImpl.cpp b/unit_tests/operator/Test_ConvDepthWiseImpl.cpp index 48a6cc88103d88cd53c16ef0bb81cdd32f2f8e73..0d0ed4b928d64cafc96907fedf3ee0d642a255d0 100644 --- a/unit_tests/operator/Test_ConvDepthWiseImpl.cpp +++ b/unit_tests/operator/Test_ConvDepthWiseImpl.cpp @@ -13,10 +13,10 @@ #include <memory> #include "aidge/data/Tensor.hpp" -#include "aidge/data/TensorImpl.hpp" -#include "aidge/aidge_backend_cpu.hpp" #include "aidge/operator/ConvDepthWise.hpp" +#include "aidge/backend/cpu.hpp" + using namespace Aidge; TEST_CASE("[cpu/operator] ConvDepthWise(forward)") { diff --git a/unit_tests/operator/Test_ConvImpl.cpp b/unit_tests/operator/Test_ConvImpl.cpp index 2c314af411e8578d83d319507ac179db46c19e79..23ff1aaebcfb79a4d4b1abc4f1a77f1c6de63b21 100644 --- a/unit_tests/operator/Test_ConvImpl.cpp +++ b/unit_tests/operator/Test_ConvImpl.cpp @@ -14,10 +14,10 @@ #include <memory> #include "aidge/data/Tensor.hpp" -#include "aidge/data/TensorImpl.hpp" -#include "aidge/aidge_backend_cpu.hpp" #include "aidge/operator/Conv.hpp" +#include "aidge/backend/cpu.hpp" + using namespace Aidge; TEST_CASE("[cpu/operator] Conv(forward)") { diff --git a/unit_tests/operator/Test_FCImpl.cpp b/unit_tests/operator/Test_FCImpl.cpp index be672eccfb175b6996180148299ca34a28ec7db1..e3494e20205f1a295eb537100b59fb7bbc26116a 100644 --- a/unit_tests/operator/Test_FCImpl.cpp +++ b/unit_tests/operator/Test_FCImpl.cpp @@ -12,11 +12,11 @@ #include <catch2/catch_test_macros.hpp> #include <memory> -#include "aidge/aidge_backend_cpu.hpp" -#include "aidge/data/TensorImpl.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/FC.hpp" +#include "aidge/backend/cpu.hpp" + using namespace Aidge; TEST_CASE("[cpu/oeprator] FC(forward)") { diff --git a/unit_tests/operator/Test_LeakyReLUImpl.cpp b/unit_tests/operator/Test_LeakyReLUImpl.cpp index b6686d8987ad26d13f1bab1b9e12be7f060d610d..7096962e196c2ace4abf2b0b14aca8dfa37d3441 100644 --- a/unit_tests/operator/Test_LeakyReLUImpl.cpp +++ b/unit_tests/operator/Test_LeakyReLUImpl.cpp @@ -12,10 +12,10 @@ #include <catch2/catch_test_macros.hpp> #include "aidge/data/Tensor.hpp" -#include "aidge/data/TensorImpl.hpp" -#include "aidge/aidge_backend_cpu.hpp" #include "aidge/operator/LeakyReLU.hpp" +#include "aidge/backend/cpu.hpp" + using namespace Aidge; TEST_CASE("[cpu/operator] LeakyReLU(forward)") { diff --git a/unit_tests/operator/Test_ReLUImpl.cpp b/unit_tests/operator/Test_ReLUImpl.cpp index 8d3a2b91980a58eba1d6d48bc259413e98da649c..9752a4914b5cb3cd06f2654cf64e0c193c5dd65b 100644 --- a/unit_tests/operator/Test_ReLUImpl.cpp +++ b/unit_tests/operator/Test_ReLUImpl.cpp @@ -12,10 +12,10 @@ #include <catch2/catch_test_macros.hpp> #include "aidge/data/Tensor.hpp" -#include "aidge/data/TensorImpl.hpp" -#include "aidge/aidge_backend_cpu.hpp" #include "aidge/operator/ReLU.hpp" +#include "aidge/backend/cpu.hpp" + #include <memory> diff --git a/unit_tests/operator/Test_SoftmaxImpl.cpp b/unit_tests/operator/Test_SoftmaxImpl.cpp index 104062124fbab91f6519e37b80d7bfe0b4ddd281..bad34102b589e3d73956f43593456d885373b3de 100644 --- a/unit_tests/operator/Test_SoftmaxImpl.cpp +++ b/unit_tests/operator/Test_SoftmaxImpl.cpp @@ -12,10 +12,10 @@ #include <catch2/catch_test_macros.hpp> #include "aidge/data/Tensor.hpp" -#include "aidge/data/TensorImpl.hpp" -#include "aidge/aidge_backend_cpu.hpp" #include "aidge/operator/Softmax.hpp" +#include "aidge/backend/cpu.hpp" + #include <memory> using namespace Aidge;