diff --git a/.gitignore b/.gitignore index f37378e300efeb5362882eb8d6eb59f028563a0e..0e14676b900cb1418593019be70cc4d20aba2883 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# common +.cache + # C++ Build build*/ install*/ @@ -11,6 +14,8 @@ __pycache__ *.pyc *.egg-info dist*/ +wheelhouse/ +aidge_backend_cpu/_version.py # Mermaid *.mmd @@ -19,4 +24,4 @@ dist*/ xml*/ # ONNX -*.onnx \ No newline at end of file +*.onnx diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 420442101a3892683f52e28e3bc9c8022abbcab5..97fcaa704b72922d35ad70feb923633fa194c850 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -4,18 +4,27 @@ ############################################################################### stages: - # Analyse code - static_analysis - # Build Aidge - build - # Unit test stage - test - # Code coverage - coverage + - release + - deploy include: - - local: '/.gitlab/ci/_global.gitlab-ci.yml' - # - local: '/.gitlab/ci/static_analysis.gitlab-ci.yml' - - local: '/.gitlab/ci/build.gitlab-ci.yml' - - local: '/.gitlab/ci/test.gitlab-ci.yml' - # - local: '/.gitlab/ci/coverage.gitlab-ci.yml' + - project: 'eclipse/aidge/gitlab_shared_files' + ref: 'main' + file: + # choose which jobs to run by including the corresponding files. + - '.gitlab/ci/ubuntu_cpp.gitlab-ci.yml' + + - '.gitlab/ci/ubuntu_python.gitlab-ci.yml' + - '.gitlab/ci/release/cibuildwheel_ubuntu.gitlab-ci.yml' + + - '.gitlab/ci/windows_cpp.gitlab-ci.yml' + + - '.gitlab/ci/windows_python.gitlab-ci.yml' + - '.gitlab/ci/release/cibuildwheel_windows.gitlab-ci.yml' + + + diff --git a/.gitlab/ci/_global.gitlab-ci.yml b/.gitlab/ci/_global.gitlab-ci.yml deleted file mode 100644 index 331373fe0f27e7750183eb2e76fe83300cf316a8..0000000000000000000000000000000000000000 --- a/.gitlab/ci/_global.gitlab-ci.yml +++ /dev/null @@ -1,24 +0,0 @@ -################################################################################ -# Centralized definitions of common job parameter values. # -# Parameters with many optional configurations may be in separate files. # -# # -################################################################################ -variables: - GIT_SUBMODULE_STRATEGY: recursive - OMP_NUM_THREADS: 4 - GIT_SSL_NO_VERIFY: 1 - DEBIAN_FRONTEND: noninteractive - -# See https://docs.gitlab.com/ee/ci/yaml/workflow.html#switch-between-branch-pipelines-and-merge-request-pipelines -workflow: - rules: - - if: $CI_PIPELINE_SOURCE == "merge_request_event" - - if: $CI_COMMIT_BRANCH && $CI_OPEN_MERGE_REQUESTS - when: never - - if: $CI_COMMIT_BRANCH - -default: - image: nvidia/cuda:12.2.0-devel-ubuntu22.04 - before_script: - - apt update - - apt install -y cmake cppcheck python-is-python3 pip git gcovr unzip curl diff --git a/.gitlab/ci/build.gitlab-ci.yml b/.gitlab/ci/build.gitlab-ci.yml deleted file mode 100644 index 18963ced1084c56c1e4c04dceec735126bba962a..0000000000000000000000000000000000000000 --- a/.gitlab/ci/build.gitlab-ci.yml +++ /dev/null @@ -1,214 +0,0 @@ -include: - - remote: 'https://gitlab.eclipse.org/eclipse/aidge/gitlab_shared_files/-/raw/main/.gitlab/ci/shared_script.gitlab-ci.yml' - -build:ubuntu_cpp: - stage: build - needs: [] - tags: - - docker - script: - # Download dependencies - # aidge_core - - DEPENDENCY_NAME="aidge_core" - - DEPENDENCY_JOB="build:ubuntu_cpp" - - !reference [.download_dependency, script] - - # Build current module - - export CMAKE_PREFIX_PATH=../install_cpp - - mkdir -p build_cpp - - cd build_cpp - - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON .. - - make -j4 all install - - artifacts: - expire_in: 1 week - paths: - - build_cpp/ - - install_cpp/ - -build:ubuntu_cpp_g++10: - stage: build - needs: [] - tags: - - docker - - script: - # Download dependencies - # aidge_core - - DEPENDENCY_NAME="aidge_core" - - DEPENDENCY_JOB="build:ubuntu_cpp" - - !reference [.download_dependency, script] - - # Build current module - - export CMAKE_PREFIX_PATH=../install_cpp - - apt install -y g++-10 - - mkdir -p build_cpp - - mkdir -p install_cpp - - cd build_cpp - - export CXX=/usr/bin/g++-10 - - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON .. - - make -j4 all install - -build:ubuntu_cpp_g++12: - stage: build - needs: [] - tags: - - docker - - script: - # Download dependencies - # aidge_core - - DEPENDENCY_NAME="aidge_core" - - DEPENDENCY_JOB="build:ubuntu_cpp" - - !reference [.download_dependency, script] - - # Build current module - - export CMAKE_PREFIX_PATH=../install_cpp - - apt install -y g++-12 - - mkdir -p build_cpp - - mkdir -p install_cpp - - cd build_cpp - - export CXX=/usr/bin/g++-12 - - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON .. - - make -j4 all install - -build:ubuntu_cpp_clang12: - stage: build - needs: [] - tags: - - docker - - script: - # Download dependencies - # aidge_core - - DEPENDENCY_NAME="aidge_core" - - DEPENDENCY_JOB="build:ubuntu_cpp" - - !reference [.download_dependency, script] - - # Build current module - - export CMAKE_PREFIX_PATH=../install_cpp - - apt install -y clang-12 - - mkdir -p build_cpp - - mkdir -p install_cpp - - cd build_cpp - - export CXX=/usr/bin/clang++-12 - - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON .. - - make -j4 all install - -build:ubuntu_cpp_clang15: - stage: build - needs: [] - tags: - - docker - - script: - # Download dependencies - # aidge_core - - DEPENDENCY_NAME="aidge_core" - - DEPENDENCY_JOB="build:ubuntu_cpp" - - !reference [.download_dependency, script] - - # Build current module - - export CMAKE_PREFIX_PATH=../install_cpp - - apt install -y clang-15 - - mkdir -p build_cpp - - mkdir -p install_cpp - - cd build_cpp - - export CXX=/usr/bin/clang++-15 - - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON .. - - make -j4 all install - -build:ubuntu_python: - stage: build - needs: [] - tags: - - docker - - script: - # Download dependencies - # aidge_core (Python) - - DEPENDENCY_NAME="aidge_core" - - DEPENDENCY_JOB="build:ubuntu_python" - - !reference [.download_dependency, script] - - - python3 -m pip install virtualenv - - virtualenv venv - - source venv/bin/activate - - python3 -m pip install -r requirements.txt - - python3 -m pip install . - - python3 -m pip install numpy unittest-xml-reporting - - python3 -m pip list - artifacts: - expire_in: 1 week - paths: - - venv/ - -build:windows_cpp: - stage: build - needs: [] - tags: - - windows - - image: buildtools - before_script: - # Install Chocolatey - - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) - # Install dependencies - - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y - - choco install git -Y - - choco install python -Y - # Update PATH - - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") - script: - # Download dependencies - # aidge_core - - $DEPENDENCY_NAME="aidge_core" - - $DEPENDENCY_JOB="build:windows_cpp" - - !reference [.download_dependency_windows, script] - - Remove-Item .\build_cpp\ -Recurse -Force -ErrorAction Ignore - - - $env:CMAKE_PREFIX_PATH = '../install_cpp' - - mkdir -p build_cpp - - cd build_cpp - - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug .. - - cmake --build . -j2 - - cmake --install . --config Debug - - artifacts: - expire_in: 1 week - paths: - - build_cpp/ - - install_cpp/ - -build:windows_python: - stage: build - needs: [] - tags: - - windows - - image: buildtools - before_script: - # Install Chocolatey - - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) - # Install dependencies - - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y - - choco install git -Y - - choco install python -Y - # Update PATH - - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") - script: - # Download dependencies - # aidge_core (Python) - - $DEPENDENCY_NAME="aidge_core" - - $DEPENDENCY_JOB="build:windows_python" - - !reference [.download_dependency_windows, script] - - - python -m pip install virtualenv - - virtualenv venv - - venv\Scripts\Activate.ps1 - - python -m pip install -r requirements.txt - - python -m pip install . - artifacts: - expire_in: 1 week - paths: - - venv/ diff --git a/.gitlab/ci/cibuildwheel_build_deps_before_build_wheel.ps1 b/.gitlab/ci/cibuildwheel_build_deps_before_build_wheel.ps1 new file mode 100755 index 0000000000000000000000000000000000000000..12e1b7566cf8ea534ea71f8416630dae9267e0cc --- /dev/null +++ b/.gitlab/ci/cibuildwheel_build_deps_before_build_wheel.ps1 @@ -0,0 +1,23 @@ +$ErrorActionPreference = "Stop" + +# Retrieve and clean the dependencies string from the environment variable +$AIDGE_DEPENDENCIES = $env:AIDGE_DEPENDENCIES -split ' ' +Write-Host "Aidge dependencies : $AIDGE_DEPENDENCIES" +if ( $($AIDGE_DEPENDENCIES.Length) -eq 0) { + Write-Host "- No dependencies provided for current repsitory" + New-Item -ItemType Directory -Force -Path ".\build" | Out-Null + Remove-Item -Path ".\build\*" -Recurse -Force + } else { + Write-Host "Retrieving given dependencies to build current package : $AIDGE_DEPENDENCIES" + foreach ($dep in $($AIDGE_DEPENDENCIES -split " ")) { + Write-Host "Retrieving : $dep" + $curr_loc=$(Get-Location) + Set-Location $dep + Get-Location + Get-ChildItem . + New-Item -Path ".\build" -ItemType Directory -Force | Out-Null + Get-ChildItem -Path ".\build" -File | Remove-Item -Force + python -m pip install . -v + Set-Location $curr_loc + } +} diff --git a/.gitlab/ci/cibuildwheel_build_deps_before_build_wheel.sh b/.gitlab/ci/cibuildwheel_build_deps_before_build_wheel.sh new file mode 100755 index 0000000000000000000000000000000000000000..4f74488ae41714a4ce03ba7514bf93842768c5ae --- /dev/null +++ b/.gitlab/ci/cibuildwheel_build_deps_before_build_wheel.sh @@ -0,0 +1,40 @@ +#!/bin/bash +set -e +if [[ "$1" == "" ]]; then + echo "build aidge deps in cibuildwheel container before building wheel." + echo "search path defines where the dependencies will be searched." + echo "Hint : In wheel containers, files are mounted on /host by default." + echo "\nusage : ./cibuildwheel_build_deps_before_build_wheel.sh $search_path" +fi +set -x +if [[ $AIDGE_DEPENDENCIES == "" ]]; then # case for aidge_ core + mkdir -p build # creating build if its not already there to hold the build of cpp files + rm -rf build/* # build from scratch +else + for repo in $AIDGE_DEPENDENCIES ; do # case for other projects + search_path=$1 + REPO_PATH=$(find $search_path ! -writable -prune -o -type d \ + -name "$repo" \ + -not -path "*/install/*" \ + -not -path "*/.git/*" \ + -not -path "*/miniconda/*" \ + -not -path "*/conda/*" \ + -not -path "*/.local/*" \ + -not -path "*/lib/*" \ + -not -path "*/$repo/$repo/*" \ + -not -path "*/proc/*" \ + -print -quit) + if [[ -z "$REPO_PATH" ]]; then + echo "ERROR : dependency $repo not found in search_path \"$search_path\". ABORTING." + exit -1 + fi + + cd $REPO_PATH + mkdir -p build # creating build if its not already there to hold the build of cpp files + rm -rf build/* # build from scratch + pip install . -v + cd - + done +fi +set +x +set +e diff --git a/.gitlab/ci/coverage.gitlab-ci.yml b/.gitlab/ci/coverage.gitlab-ci.yml deleted file mode 100644 index 33547fc3f52771c456fba3d34a6e8d96eebafd8a..0000000000000000000000000000000000000000 --- a/.gitlab/ci/coverage.gitlab-ci.yml +++ /dev/null @@ -1,41 +0,0 @@ -coverage:ubuntu_cpp: - stage: coverage - needs: ["build:ubuntu_cpp"] - tags: - - docker - script: - - cd build_cpp - - ctest --output-on-failure - # HTML report for visualization - - gcovr --html-details --exclude-unreachable-branches -o coverage.html --root ${CI_PROJECT_DIR} --filter '\.\./include/' --filter '\.\./src/' - # Coberta XML report for Gitlab integration - - gcovr --xml-pretty --exclude-unreachable-branches --print-summary -o coverage.xml --root ${CI_PROJECT_DIR} --filter '\.\./include/' --filter '\.\./src/' - coverage: /^\s*lines:\s*\d+.\d+\%/ - artifacts: - name: ${CI_JOB_NAME}-${CI_COMMIT_REF_NAME}-${CI_COMMIT_SHA} - expire_in: 2 days - reports: - coverage_report: - coverage_format: cobertura - path: build_cpp/coverage.xml - -coverage:ubuntu_python: - stage: coverage - needs: ["build:ubuntu_python"] - tags: - - docker - script: - - source venv/bin/activate - - python3 -m pip install numpy coverage - - cd ${CI_PROJECT_NAME} - # Retrieve the installation path of the module, since it is installed with pip. - - export MODULE_LOCATION=`python -c "import ${CI_PROJECT_NAME} as _; print(_.__path__[0])"` - - python3 -m coverage run --source=$MODULE_LOCATION -m unittest discover -s unit_tests/ -v -b - - python3 -m coverage report - - python3 -m coverage xml - coverage: '/(?i)total.*? (100(?:\.0+)?\%|[1-9]?\d(?:\.\d+)?\%)$/' - artifacts: - reports: - coverage_report: - coverage_format: cobertura - path: ${CI_PROJECT_NAME}/coverage.xml diff --git a/.gitlab/ci/static_analysis.gitlab-ci.yml b/.gitlab/ci/static_analysis.gitlab-ci.yml deleted file mode 100644 index 0ea9b711885442e7f260ae86e313464b592127a0..0000000000000000000000000000000000000000 --- a/.gitlab/ci/static_analysis.gitlab-ci.yml +++ /dev/null @@ -1,37 +0,0 @@ -static_analysis:cpp: - stage: static_analysis - tags: - - static_analysis - allow_failure: true - script: - - mkdir -p $CI_COMMIT_REF_NAME - - cppcheck -j 4 --enable=all --inconclusive --force --xml --xml-version=2 . 2> cppcheck-result.xml - - python -m pip install Pygments - - cppcheck-htmlreport --file=cppcheck-result.xml --report-dir=$CI_COMMIT_REF_NAME --source-dir=src - - python3 -m pip install -U cppcheck_codequality - - cppcheck-codequality --input-file=cppcheck-result.xml --output-file=cppcheck.json - - mkdir -p public/cpp - - mv $CI_COMMIT_REF_NAME public/cpp/ - artifacts: - paths: - - public - reports: - codequality: cppcheck.json - -static_analysis:python: - stage: static_analysis - tags: - - static_analysis - allow_failure: true - script: - - pip install pylint - - pip install pylint-gitlab - - pylint --rcfile=.pylintrc --exit-zero --output-format=pylint_gitlab.GitlabCodeClimateReporter ${CI_PROJECT_NAME}/ > codeclimate.json - - pylint --rcfile=.pylintrc --exit-zero --output-format=pylint_gitlab.GitlabPagesHtmlReporter ${CI_PROJECT_NAME}/ > pylint.html - - mkdir -p public/python/$CI_COMMIT_REF_NAME - - mv pylint.html public/python/$CI_COMMIT_REF_NAME/ - artifacts: - paths: - - public - reports: - codequality: codeclimate.json \ No newline at end of file diff --git a/.gitlab/ci/test.gitlab-ci.yml b/.gitlab/ci/test.gitlab-ci.yml deleted file mode 100644 index 3cada635eb25b3eb87e8318eb6e26723f7a27dd6..0000000000000000000000000000000000000000 --- a/.gitlab/ci/test.gitlab-ci.yml +++ /dev/null @@ -1,48 +0,0 @@ -test:ubuntu_cpp: - stage: test - needs: ["build:ubuntu_cpp"] - tags: - - docker - script: - - cd build_cpp - - ctest --output-junit ctest-results.xml --output-on-failure - artifacts: - reports: - junit: build_cpp/ctest-results.xml - -test:ubuntu_python: - stage: test - needs: ["build:ubuntu_python"] - tags: - - docker - script: - - source venv/bin/activate - - cd ${CI_PROJECT_NAME} - - # Run on discovery all tests located in core/unit_tests/python and discard the stdout - # only to show the errors/warnings and the results of the tests - - python3 -m xmlrunner discover -s unit_tests/ -v -b --output-file xmlrunner-results.xml - artifacts: - reports: - junit: ${CI_PROJECT_NAME}/xmlrunner-results.xml - -test:windows_cpp: - stage: test - needs: ["build:windows_cpp"] - tags: - - windows - image: buildtools - before_script: - # Install Chocolatey - - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) - # Install dependencies - - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y - - choco install python -Y - # Update PATH - - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User") - script: - - cd build_cpp - - ctest --output-junit ctest-results.xml --output-on-failure - artifacts: - reports: - junit: build_cpp/ctest-results.xml diff --git a/.pylintrc b/.pylintrc deleted file mode 100644 index 20ad01971c6e02397253a115490a3afc458b546d..0000000000000000000000000000000000000000 --- a/.pylintrc +++ /dev/null @@ -1,644 +0,0 @@ -[MASTER] - -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code. -extension-pkg-allow-list= aidge_core, aidge_backend_cpu, torch, tensorflow - -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code. (This is an alternative name to extension-pkg-allow-list -# for backward compatibility.) -extension-pkg-whitelist= - -# Return non-zero exit code if any of these messages/categories are detected, -# even if score is above --fail-under value. Syntax same as enable. Messages -# specified are enabled, while categories only check already-enabled messages. -fail-on= - -# Specify a score threshold to be exceeded before program exits with error. -fail-under=0.0 - -# Files or directories to be skipped. They should be base names, not paths. -ignore=CVS - -# Add files or directories matching the regex patterns to the ignore-list. The -# regex matches against paths. -ignore-paths= - -# Files or directories matching the regex patterns are skipped. The regex -# matches against base names, not paths. -ignore-patterns= - -# Python code to execute, usually for sys.path manipulation such as -# pygtk.require(). -#init-hook= - -# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the -# number of processors available to use. -jobs=1 - -# Control the amount of potential inferred values when inferring a single -# object. This can help the performance when dealing with large functions or -# complex, nested conditions. -limit-inference-results=100 - -# List of plugins (as comma separated values of python module names) to load, -# usually to register additional checkers. -load-plugins= - -# Pickle collected data for later comparisons. -persistent=yes - -# When enabled, pylint would attempt to guess common misconfiguration and emit -# user-friendly hints instead of false-positive error messages. -suggestion-mode=yes - -# Allow loading of arbitrary C extensions. Extensions are imported into the -# active Python interpreter and may run arbitrary code. -unsafe-load-any-extension=no - - -[MESSAGES CONTROL] - -# Only show warnings with the listed confidence levels. Leave empty to show -# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. -confidence= - -# Disable the message, report, category or checker with the given id(s). You -# can either give multiple identifiers separated by comma (,) or put this -# option multiple times (only on the command line, not in the configuration -# file where it should appear only once). You can also use "--disable=all" to -# disable everything first and then reenable specific checks. For example, if -# you want to run only the similarities checker, you can use "--disable=all -# --enable=similarities". If you want to run only the classes checker, but have -# no Warning level messages displayed, use "--disable=all --enable=classes -# --disable=W". -disable=print-statement, - parameter-unpacking, - unpacking-in-except, - old-raise-syntax, - backtick, - long-suffix, - old-ne-operator, - old-octal-literal, - import-star-module-level, - non-ascii-bytes-literal, - raw-checker-failed, - bad-inline-option, - locally-disabled, - file-ignored, - suppressed-message, - useless-suppression, - deprecated-pragma, - use-symbolic-message-instead, - apply-builtin, - basestring-builtin, - buffer-builtin, - cmp-builtin, - coerce-builtin, - execfile-builtin, - file-builtin, - long-builtin, - raw_input-builtin, - reduce-builtin, - standarderror-builtin, - unicode-builtin, - xrange-builtin, - coerce-method, - delslice-method, - getslice-method, - setslice-method, - no-absolute-import, - old-division, - dict-iter-method, - dict-view-method, - next-method-called, - metaclass-assignment, - indexing-exception, - raising-string, - reload-builtin, - oct-method, - hex-method, - nonzero-method, - cmp-method, - input-builtin, - round-builtin, - intern-builtin, - unichr-builtin, - map-builtin-not-iterating, - zip-builtin-not-iterating, - range-builtin-not-iterating, - filter-builtin-not-iterating, - using-cmp-argument, - eq-without-hash, - div-method, - idiv-method, - rdiv-method, - exception-message-attribute, - invalid-str-codec, - sys-max-int, - bad-python3-import, - deprecated-string-function, - deprecated-str-translate-call, - deprecated-itertools-function, - deprecated-types-field, - next-method-defined, - dict-items-not-iterating, - dict-keys-not-iterating, - dict-values-not-iterating, - deprecated-operator-function, - deprecated-urllib-function, - xreadlines-attribute, - deprecated-sys-function, - exception-escape, - comprehension-escape, - c-extension-no-member, - too-many-locals, - missing-class-docstring, - missing-function-docstring, - too-many-ancestor, - too-many-arguments, - protected-access, - too-many-branches, - too-many-ancestors, - wrong-import-order, - wrong-import-position, - -# Enable the message, report, category or checker with the given id(s). You can -# either give multiple identifier separated by comma (,) or put this option -# multiple time (only on the command line, not in the configuration file where -# it should appear only once). See also the "--disable" option for examples. -enable=c-extension-no-member - - -[REPORTS] - -# Python expression which should return a score less than or equal to 10. You -# have access to the variables 'error', 'warning', 'refactor', and 'convention' -# which contain the number of messages in each category, as well as 'statement' -# which is the total number of statements analyzed. This score is used by the -# global evaluation report (RP0004). -evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) - -# Template used to display messages. This is a python new-style format string -# used to format the message information. See doc for all details. -#msg-template= - -# Set the output format. Available formats are text, parseable, colorized, json -# and msvs (visual studio). You can also give a reporter class, e.g. -# mypackage.mymodule.MyReporterClass. -output-format=text - -# Tells whether to display a full report or only the messages. -reports=no - -# Activate the evaluation score. -score=yes - - -[REFACTORING] - -# Maximum number of nested blocks for function / method body -max-nested-blocks=5 - -# Complete name of functions that never returns. When checking for -# inconsistent-return-statements if a never returning function is called then -# it will be considered as an explicit return statement and no message will be -# printed. -never-returning-functions=sys.exit,argparse.parse_error - - -[BASIC] - -# Naming style matching correct argument names. -argument-naming-style=snake_case - -# Regular expression matching correct argument names. Overrides argument- -# naming-style. -#argument-rgx= - -# Naming style matching correct attribute names. -attr-naming-style=snake_case - -# Regular expression matching correct attribute names. Overrides attr-naming- -# style. -#attr-rgx= - -# Bad variable names which should always be refused, separated by a comma. -bad-names=foo, - bar, - baz, - toto, - tutu, - tata - -# Bad variable names regexes, separated by a comma. If names match any regex, -# they will always be refused -bad-names-rgxs= - -# Naming style matching correct class attribute names. -class-attribute-naming-style=any - -# Regular expression matching correct class attribute names. Overrides class- -# attribute-naming-style. -#class-attribute-rgx= - -# Naming style matching correct class constant names. -class-const-naming-style=UPPER_CASE - -# Regular expression matching correct class constant names. Overrides class- -# const-naming-style. -#class-const-rgx= - -# Naming style matching correct class names. -class-naming-style=PascalCase - -# Regular expression matching correct class names. Overrides class-naming- -# style. -#class-rgx= - -# Naming style matching correct constant names. -const-naming-style=UPPER_CASE - -# Regular expression matching correct constant names. Overrides const-naming- -# style. -#const-rgx= - -# Minimum line length for functions/classes that require docstrings, shorter -# ones are exempt. -docstring-min-length=-1 - -# Naming style matching correct function names. -function-naming-style=snake_case - -# Regular expression matching correct function names. Overrides function- -# naming-style. -#function-rgx= - -# Good variable names which should always be accepted, separated by a comma. -good-names=i, - j, - k, - ex, - Run, - _, - -# Good variable names regexes, separated by a comma. If names match any regex, -# they will always be accepted -good-names-rgxs= - -# Include a hint for the correct naming format with invalid-name. -include-naming-hint=no - -# Naming style matching correct inline iteration names. -inlinevar-naming-style=any - -# Regular expression matching correct inline iteration names. Overrides -# inlinevar-naming-style. -#inlinevar-rgx= - -# Naming style matching correct method names. -method-naming-style=snake_case - -# Regular expression matching correct method names. Overrides method-naming- -# style. -#method-rgx= - -# Naming style matching correct module names. -module-naming-style=snake_case - -# Regular expression matching correct module names. Overrides module-naming- -# style. -#module-rgx= - -# Colon-delimited sets of names that determine each other's naming style when -# the name regexes allow several styles. -name-group= - -# Regular expression which should only match function or class names that do -# not require a docstring. -no-docstring-rgx=^_ - -# List of decorators that produce properties, such as abc.abstractproperty. Add -# to this list to register other decorators that produce valid properties. -# These decorators are taken in consideration only for invalid-name. -property-classes=abc.abstractproperty - -# Naming style matching correct variable names. -variable-naming-style=snake_case - -# Regular expression matching correct variable names. Overrides variable- -# naming-style. -#variable-rgx= - - -[FORMAT] - -# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. -expected-line-ending-format= - -# Regexp for a line that is allowed to be longer than the limit. -ignore-long-lines=^\s*(# )?<?https?://\S+>?$ - -# Number of spaces of indent required inside a hanging or continued line. -indent-after-paren=4 - -# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 -# tab). -indent-string=' ' - -# Maximum number of characters on a single line. -max-line-length=200 - -# Maximum number of lines in a module. -max-module-lines=1000 - -# Allow the body of a class to be on the same line as the declaration if body -# contains single statement. -single-line-class-stmt=no - -# Allow the body of an if to be on the same line as the test if there is no -# else. -single-line-if-stmt=no - - -[LOGGING] - -# The type of string formatting that logging methods do. `old` means using % -# formatting, `new` is for `{}` formatting. -logging-format-style=old - -# Logging modules to check that the string format arguments are in logging -# function parameter format. -logging-modules=logging - - -[MISCELLANEOUS] - -# List of note tags to take in consideration, separated by a comma. -notes=FIXME, - XXX, - TODO - -# Regular expression of note tags to take in consideration. -#notes-rgx= - - -[SIMILARITIES] - -# Comments are removed from the similarity computation -ignore-comments=yes - -# Docstrings are removed from the similarity computation -ignore-docstrings=yes - -# Imports are removed from the similarity computation -ignore-imports=no - -# Signatures are removed from the similarity computation -ignore-signatures=no - -# Minimum lines number of a similarity. -min-similarity-lines=4 - - -[SPELLING] - -# Limits count of emitted suggestions for spelling mistakes. -max-spelling-suggestions=4 - -# Spelling dictionary name. Available dictionaries: none. To make it work, -# install the 'python-enchant' package. -spelling-dict= - -# List of comma separated words that should be considered directives if they -# appear and the beginning of a comment and should not be checked. -spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: - -# List of comma separated words that should not be checked. -spelling-ignore-words= - -# A path to a file that contains the private dictionary; one word per line. -spelling-private-dict-file= - -# Tells whether to store unknown words to the private dictionary (see the -# --spelling-private-dict-file option) instead of raising a message. -spelling-store-unknown-words=no - - -[STRING] - -# This flag controls whether inconsistent-quotes generates a warning when the -# character used as a quote delimiter is used inconsistently within a module. -check-quote-consistency=no - -# This flag controls whether the implicit-str-concat should generate a warning -# on implicit string concatenation in sequences defined over several lines. -check-str-concat-over-line-jumps=no - - -[TYPECHECK] - -# List of decorators that produce context managers, such as -# contextlib.contextmanager. Add to this list to register other decorators that -# produce valid context managers. -contextmanager-decorators=contextlib.contextmanager - -# List of members which are set dynamically and missed by pylint inference -# system, and so shouldn't trigger E1101 when accessed. Python regular -# expressions are accepted. -generated-members= - -# Tells whether missing members accessed in mixin class should be ignored. A -# mixin class is detected if its name ends with "mixin" (case insensitive). -ignore-mixin-members=yes - -# Tells whether to warn about missing members when the owner of the attribute -# is inferred to be None. -ignore-none=yes - -# This flag controls whether pylint should warn about no-member and similar -# checks whenever an opaque object is returned when inferring. The inference -# can return multiple potential results while evaluating a Python object, but -# some branches might not be evaluated, which results in partial inference. In -# that case, it might be useful to still emit no-member and other checks for -# the rest of the inferred objects. -ignore-on-opaque-inference=yes - -# List of class names for which member attributes should not be checked (useful -# for classes with dynamically set attributes). This supports the use of -# qualified names. -ignored-classes=optparse.Values, - thread._local, - _thread._local, - aidge.global_variables, - aidge.cells.abstract_cell.Trainable, - torch, - tensorflow, - -# List of module names for which member attributes should not be checked -# (useful for modules/projects where namespaces are manipulated during runtime -# and thus existing member attributes cannot be deduced by static analysis). It -# supports qualified module names, as well as Unix pattern matching. -ignored-modules= aidge_core, aidge_backend_cpu - -# Show a hint with possible names when a member name was not found. The aspect -# of finding the hint is based on edit distance. -missing-member-hint=yes - -# The minimum edit distance a name should have in order to be considered a -# similar match for a missing member name. -missing-member-hint-distance=1 - -# The total number of similar names that should be taken in consideration when -# showing a hint for a missing member. -missing-member-max-choices=1 - -# List of decorators that change the signature of a decorated function. -signature-mutators= - - -[VARIABLES] - -# List of additional names supposed to be defined in builtins. Remember that -# you should avoid defining new builtins when possible. -additional-builtins= - -# Tells whether unused global variables should be treated as a violation. -allow-global-unused-variables=yes - -# List of names allowed to shadow builtins -allowed-redefined-builtins= - -# List of strings which can identify a callback function by name. A callback -# name must start or end with one of those strings. -callbacks=cb_, - _cb - -# A regular expression matching the name of dummy variables (i.e. expected to -# not be used). -dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ - -# Argument names that match this expression will be ignored. Default to name -# with leading underscore. -ignored-argument-names=_.*|^ignored_|^unused_ - -# Tells whether we should check for unused import in __init__ files. -init-import=no - -# List of qualified module names which can have objects that can redefine -# builtins. -redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io - - -[CLASSES] - -# Warn about protected attribute access inside special methods -check-protected-access-in-special-methods=no - -# List of method names used to declare (i.e. assign) instance attributes. -defining-attr-methods=__init__, - __new__, - setUp, - __post_init__ - -# List of member names, which should be excluded from the protected access -# warning. -exclude-protected=_asdict, - _fields, - _replace, - _source, - _make - -# List of valid names for the first argument in a class method. -valid-classmethod-first-arg=cls - -# List of valid names for the first argument in a metaclass class method. -valid-metaclass-classmethod-first-arg=cls - - -[DESIGN] - -# List of qualified class names to ignore when countint class parents (see -# R0901) -ignored-parents= - -# Maximum number of arguments for function / method. -max-args=5 - -# Maximum number of attributes for a class (see R0902). -max-attributes=7 - -# Maximum number of boolean expressions in an if statement (see R0916). -max-bool-expr=5 - -# Maximum number of branch for function / method body. -max-branches=12 - -# Maximum number of locals for function / method body. -max-locals=15 - -# Maximum number of parents for a class (see R0901). -max-parents=7 - -# Maximum number of public methods for a class (see R0904). -max-public-methods=20 - -# Maximum number of return / yield for function / method body. -max-returns=6 - -# Maximum number of statements in function / method body. -max-statements=50 - -# Minimum number of public methods for a class (see R0903). -min-public-methods=2 - - -[IMPORTS] - -# List of modules that can be imported at any level, not just the top level -# one. -allow-any-import-level= - -# Allow wildcard imports from modules that define __all__. -allow-wildcard-with-all=no - -# Analyse import fallback blocks. This can be used to support both Python 2 and -# 3 compatible code, which means that the block might have code that exists -# only in one or another interpreter, leading to false positives when analysed. -analyse-fallback-blocks=no - -# Deprecated modules which should not be used, separated by a comma. -deprecated-modules= - -# Output a graph (.gv or any supported image format) of external dependencies -# to the given file (report RP0402 must not be disabled). -ext-import-graph= - -# Output a graph (.gv or any supported image format) of all (i.e. internal and -# external) dependencies to the given file (report RP0402 must not be -# disabled). -import-graph= - -# Output a graph (.gv or any supported image format) of internal dependencies -# to the given file (report RP0402 must not be disabled). -int-import-graph= - -# Force import order to recognize a module as part of the standard -# compatibility libraries. -known-standard-library= - -# Force import order to recognize a module as part of a third party library. -known-third-party=enchant - -# Couples of modules and preferred modules, separated by a comma. -preferred-modules= - - -[EXCEPTIONS] - -# Exceptions that will emit a warning when being caught. Defaults to -# "BaseException, Exception". -overgeneral-exceptions=BaseException, - Exception \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index a9603c550f89f106fcc9da818a7bd67492ec863f..3574e25cec5977bc2249c7d756041c09650f9b11 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,33 +1,34 @@ -cmake_minimum_required(VERSION 3.15) - +cmake_minimum_required(VERSION 3.18) +set(CXX_STANDARD 14) file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version) -add_definitions(-DPROJECT_VERSION="${version}") -file(STRINGS "${CMAKE_SOURCE_DIR}/project_name.txt" project) -message(STATUS "Project name: ${project}") +project(aidge_backend_cpu + VERSION ${version} + DESCRIPTION "CPU implementations of the operators of aidge framework." + LANGUAGES CXX) + +message(STATUS "Project name: ${CMAKE_PROJECT_NAME}") message(STATUS "Project version: ${version}") +add_definitions(-DPROJECT_VERSION="${version}") execute_process( COMMAND git rev-parse --short HEAD WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE GIT_COMMIT_HASH OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET ) message(STATUS "Latest git commit: ${GIT_COMMIT_HASH}") - -# Define a preprocessor macro with the Git commit version add_definitions(-DGIT_COMMIT_HASH="${GIT_COMMIT_HASH}") -# Note : project name is {project} and python module name is also {project} -set(module_name _${project}) # target name - -project(${project}) -set(CXX_STANDARD 14) +# Note : project name is ${CMAKE_PROJECT_NAME} and python module name is also ${CMAKE_PROJECT_NAME} +set(module_name _${CMAKE_PROJECT_NAME}) # target name +set(pybind_module_name ${CMAKE_PROJECT_NAME}) # name of submodule for python bindings ############################################## # Define options -option(PYBIND "python binding" ON) +option(PYBIND "python binding" OFF) option(WERROR "Warning as error" OFF) option(TEST "Enable tests" ON) option(COVERAGE "Enable coverage" OFF) @@ -36,14 +37,20 @@ option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memor ############################################## # Import utils CMakeLists set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") -include(PybindModuleCreation) if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) Include(CodeCoverage) endif() ############################################## -# Find system dependencies +# FIND Dependencies +if(NOT $ENV{AIDGE_INSTALL} STREQUAL "") + set(CMAKE_INSTALL_PREFIX $ENV{AIDGE_INSTALL}) + list(APPEND CMAKE_PREFIX_PATH $ENV{AIDGE_INSTALL}) + message(WARNING "Env var AIDGE_INSTALL detected : $ENV{AIDGE_INSTALL}. Set CMAKE_INSTALL_PREFIX to AIDGE_INSTALL & added to CMAKE_PREFIX_PATH" + "\n\tCMAKE_INSTALL_PREFIX = ${CMAKE_INSTALL_PREFIX}" + "\n\tCMAKE_PREFIX_PATH = ${CMAKE_PREFIX_PATH}") +endif() find_package(aidge_core REQUIRED) ############################################## @@ -52,14 +59,25 @@ file(GLOB_RECURSE src_files "src/*.cpp") file(GLOB_RECURSE inc_files "include/*.hpp") add_library(${module_name} ${src_files} ${inc_files}) + target_link_libraries(${module_name} PUBLIC - _aidge_core # _ is added because we link the target not the project + _aidge_core # _ is added because we link the exported target and not the project ) #Set target properties set_property(TARGET ${module_name} PROPERTY POSITION_INDEPENDENT_CODE ON) +# PYTHON BINDING +if (PYBIND) + # Python binding lib is by default installed in <prefix>/python_packages/<package>/ + # When installed from python, setup.py should set it to the python package dir + set(PYBIND_INSTALL_PREFIX python_packages/${pybind_module_name} CACHE PATH "Python package install prefix") + + include(PybindModuleCreation) + generate_python_binding(${pybind_module_name} ${module_name}) +endif() + if( ${ENABLE_ASAN} ) message("Building ${module_name} with ASAN.") set(SANITIZE_FLAGS -fsanitize=address -fno-omit-frame-pointer) @@ -81,20 +99,6 @@ target_include_directories(${module_name} ${CMAKE_CURRENT_SOURCE_DIR}/src ) -# PYTHON BINDING -if (PYBIND) - generate_python_binding(${project} ${module_name}) - - # Handles Python + pybind11 headers dependencies - target_link_libraries(${module_name} - PUBLIC - pybind11::pybind11 - PRIVATE - Python::Python - ) -endif() - -target_link_libraries(${module_name} PUBLIC fmt::fmt) target_compile_features(${module_name} PRIVATE cxx_std_14) target_compile_options(${module_name} PRIVATE @@ -110,22 +114,25 @@ endif() ############################################## # Installation instructions - include(GNUInstallDirs) -set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/${project}) +set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/${CMAKE_PROJECT_NAME}) -install(TARGETS ${module_name} EXPORT ${project}-targets +install(TARGETS ${module_name} EXPORT ${CMAKE_PROJECT_NAME}-targets LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ) - install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) -#Export the targets to a script +if (PYBIND) + install(TARGETS ${pybind_module_name} + DESTINATION ${PYBIND_INSTALL_PREFIX} + ) +endif() -install(EXPORT ${project}-targets - FILE "${project}-targets.cmake" +#Export the targets to a script +install(EXPORT ${CMAKE_PROJECT_NAME}-targets + FILE "${CMAKE_PROJECT_NAME}-targets.cmake" DESTINATION ${INSTALL_CONFIGDIR} COMPONENT ${module_name} ) @@ -133,32 +140,37 @@ install(EXPORT ${project}-targets #Create a ConfigVersion.cmake file include(CMakePackageConfigHelpers) write_basic_package_version_file( - "${CMAKE_CURRENT_BINARY_DIR}/${project}-config-version.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}-config-version.cmake" VERSION ${version} COMPATIBILITY AnyNewerVersion ) -configure_package_config_file("${project}-config.cmake.in" - "${CMAKE_CURRENT_BINARY_DIR}/${project}-config.cmake" +configure_package_config_file("${CMAKE_PROJECT_NAME}-config.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}-config.cmake" INSTALL_DESTINATION ${INSTALL_CONFIGDIR} ) #Install the config, configversion and custom find modules install(FILES - "${CMAKE_CURRENT_BINARY_DIR}/${project}-config.cmake" - "${CMAKE_CURRENT_BINARY_DIR}/${project}-config-version.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}-config.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}-config-version.cmake" DESTINATION ${INSTALL_CONFIGDIR} ) ############################################## ## Exporting from the build tree -export(EXPORT ${project}-targets - FILE "${CMAKE_CURRENT_BINARY_DIR}/${project}-targets.cmake") +message(STATUS "Exporting created targets to use them in another build") +export(EXPORT ${CMAKE_PROJECT_NAME}-targets + FILE "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}-targets.cmake") ############################################## ## Add test if(TEST) - enable_testing() - add_subdirectory(unit_tests) + if (AIDGE_REQUIRES_PYTHON AND NOT AIDGE_PYTHON_HAS_EMBED) + message(WARNING "Skipping compilation of tests: missing Python embedded interpreter") + else() + enable_testing() + add_subdirectory(unit_tests) + endif() endif() diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..61f22a8c8ed0c92dab03c0533d7617d96d42c03d --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,8 @@ +include README.md LICENSE +recursive-include aidge_backend_cpu *.py +recursive-exclude aidge_backend_cpu/unit_tests *.py + +recursive-include include *.hpp +recursive-include src *.cpp +recursive-include python_binding *.cpp +include CMakeLists.txt diff --git a/README.md b/README.md index e67b9d8bade1a862beee8f7dbe59ceac1469efe3..96283603759f03415b7dc1b99f3905550427f633 100644 --- a/README.md +++ b/README.md @@ -5,47 +5,53 @@ You can find in this folder the library that implements the CPU operators. <br> Those operators can be used on any machine with an Linux OS. -So far be sure to have the correct requirements to use this library -- GCC -- Make -- CMake -- aidge_core -- Python (optional, if you have no intend to use this library in python with pybind) +[TOC] -## Pip installation +## Installation -You will need to install first the ``aidge_core`` library before installing ``aidge_backend_cpu``. +### Dependencies +- `GCC` +- `Make`/`Ninja` +- `CMake` +- `Python` (optional, if you have no intend to use this library in python with pybind) -If you have set a custom install path for the ``aidge_core`` library, make sure to use the same one here. +#### Aidge dependencies + - `aidge_core` -Then run in your python environnement : +### Pip installation ``` bash pip install . -v ``` +> **TIPS :** Use environment variables to change compilation options : +> - `AIDGE_INSTALL` : to set the installation folder. Defaults to `<python_prefix>/lib/libAidge`. :warning: This path must be identical to aidge_core install path. +> - `AIDGE_PYTHON_BUILD_TYPE` : to set the compilation mode to **Debug** or **Release** or "" (for default flags). Defaults to **Release**. +> - `AIDGE_BUILD_GEN` : to set the build backend (for development mode) or "" for the cmake default. Default to "". -## Standard C++ Compilation +## Pip installation for development -You will need to compile first the Core library before compiling the CPU one. -The makefile is designed to do it for you. +To setup using pip in development (or editable mode), use the `--no-build-isolation -e` options to pip. -To only compile the CPU library, run -``` -make cpu_only +For instance run the following command in your python environnement for a typical setup : +``` bash +export AIDGE_PYTHON_BUILD_TYPE= # default flags (no debug info but fastest build time) +export AIDGE_PYTHON_BUILD_TYPE=Debug # or if one really need to debug the C++ code +pip install -U pip setuptools setuptools_scm[toml] cmake # Pre-install build requirements (refer to the pyproject.toml [build-system] section) +pip install -v --no-build-isolation -e . ``` -To compile the CPU library + the associated unitary tests, run -``` -make cpu_tests -``` +Refer to `aidge_core/README.md` for more details on development build options. -To compile the CPU library with the python binding, run -``` -make cpu_with_pybind -``` -Important: this command can also be run with `make`. +### Standard C++ Compilation +You will need to compile and install the [Core Library](https://gitlab.eclipse.org/eclipse/aidge/aidge_core) before compiling the CPU one. -To compile the CPU library with the python binding + the associated unitary tests, run -``` -make cpu_with_pybind_tests +Once this has been done, you'll need run CMake with the +`CMAKE_INSTALL_PREFIX:PATH` flag, in order to indicate to CMake where +`aidge_core` has been installed : +```sh +cmake -DCMAKE_INSTALL_PREFIX:PATH=$(path_to_install_folder) $(CMAKE PARAMETERS) $(projet_root) + +make all ``` + +More detailed information is available in the [Aidge User Guide](https://eclipse.dev/aidge/source/GetStarted/install.html) diff --git a/aidge_backend_cpu-config.cmake.in b/aidge_backend_cpu-config.cmake.in index f3604be11c27d86caf1ad8a48b333b9bd8f30625..d8e1372bc8a7b79bd09c79b654af4291c995ac58 100644 --- a/aidge_backend_cpu-config.cmake.in +++ b/aidge_backend_cpu-config.cmake.in @@ -1,3 +1,10 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) +find_dependency(aidge_core) + +include(CMakeFindDependencyMacro) + include(${CMAKE_CURRENT_LIST_DIR}/aidge_backend_cpu-config-version.cmake) include(${CMAKE_CURRENT_LIST_DIR}/aidge_backend_cpu-targets.cmake) diff --git a/aidge_backend_cpu/__init__.py b/aidge_backend_cpu/__init__.py index 8f18440daf33134718273b4b3fdd4d99039b6ddf..a7fe1ea3abdea25b18af6e7e0a1958f01f928433 100644 --- a/aidge_backend_cpu/__init__.py +++ b/aidge_backend_cpu/__init__.py @@ -1 +1,3 @@ -from aidge_backend_cpu.aidge_backend_cpu import * # import so generated by PyBind \ No newline at end of file +import aidge_core +from aidge_backend_cpu.aidge_backend_cpu import * # import so generated by PyBind +from ._version import * diff --git a/cmake/PybindModuleCreation.cmake b/cmake/PybindModuleCreation.cmake index 87e70fc38c9e4ec4ddb44cbe5d7fb2a31c2e94d6..a520039f6505a7178acefaca076fa3f659e41bcb 100644 --- a/cmake/PybindModuleCreation.cmake +++ b/cmake/PybindModuleCreation.cmake @@ -1,21 +1,24 @@ -function(generate_python_binding name target_to_bind) - add_definitions(-DPYBIND) +function(generate_python_binding pybind_module_name target_to_bind) + + find_package(Python COMPONENTS Interpreter Development.Module) + Include(FetchContent) + set(PYBIND_VERSION v2.10.4) + message(STATUS "Retrieving pybind ${PYBIND_VERSION} from git") + FetchContent_Declare( - PyBind11 - GIT_REPOSITORY https://github.com/pybind/pybind11.git - GIT_TAG v2.10.4 # or a later release + PyBind11 + GIT_REPOSITORY https://github.com/pybind/pybind11.git + GIT_TAG ${PYBIND_VERSION} # or a later release ) - # Use the New FindPython mode, recommanded. Requires CMake 3.15+ - find_package(Python COMPONENTS Interpreter Development) FetchContent_MakeAvailable(PyBind11) - message(STATUS "Creating binding for module ${name}") + message(STATUS "Creating binding for module ${pybind_module_name}") file(GLOB_RECURSE pybind_src_files "python_binding/*.cpp") - pybind11_add_module(${name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install - target_include_directories(${name} PUBLIC "python_binding") - target_link_libraries(${name} PUBLIC ${target_to_bind}) + pybind11_add_module(${pybind_module_name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install + target_include_directories(${pybind_module_name} PRIVATE "python_binding") + target_link_libraries(${pybind_module_name} PRIVATE ${target_to_bind}) endfunction() diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index 11f9c264098d5a238d0d1f8e6bc4fac0cc099549..b45aa1cb4151d8d6c5268d4a94da97bb25a89a40 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -12,22 +12,30 @@ #ifndef AIDGE_CPU_IMPORTS_H_ #define AIDGE_CPU_IMPORTS_H_ +#include "aidge/backend/cpu/operator/AbsImpl.hpp" #include "aidge/backend/cpu/operator/AddImpl.hpp" +#include "aidge/backend/cpu/operator/AndImpl.hpp" +#include "aidge/backend/cpu/operator/ArgMaxImpl.hpp" #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/operator/BatchNormImpl.hpp" +#include "aidge/backend/cpu/operator/BitShiftImpl.hpp" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp" +#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp" #include "aidge/backend/cpu/operator/DivImpl.hpp" #include "aidge/backend/cpu/operator/ErfImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp" +#include "aidge/backend/cpu/operator/FoldImpl.hpp" #include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp" #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" +#include "aidge/backend/cpu/operator/LnImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl.hpp" #include "aidge/backend/cpu/operator/MulImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/backend/cpu/operator/PowImpl.hpp" #include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp" +#include "aidge/backend/cpu/operator/ReduceSumImpl.hpp" #include "aidge/backend/cpu/operator/ReLUImpl.hpp" #include "aidge/backend/cpu/operator/ScalingImpl.hpp" #include "aidge/backend/cpu/operator/SigmoidImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/AbsImpl.hpp b/include/aidge/backend/cpu/operator/AbsImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8233d47c4d1e2dc7bf724600ec083bcaa0d667e9 --- /dev/null +++ b/include/aidge/backend/cpu/operator/AbsImpl.hpp @@ -0,0 +1,31 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_ABSIMPL_H_ +#define AIDGE_CPU_OPERATOR_ABSIMPL_H_ + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/Abs.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include <memory> +#include <vector> + +namespace Aidge { +// Operator implementation entry point for the backend +using AbsImpl_cpu = OperatorImpl_cpu<Abs_Op, + void(const std::size_t, const void*, void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(Abs_Op, "cpu", Aidge::AbsImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_ABSIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AbsImpl_kernels.hpp similarity index 50% rename from include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/AbsImpl_kernels.hpp index 9e57b6dfcb0da322f5b21944fb10ec7a10cd0ab8..16e5f9dee26a6f8b760e14a1ad66a40d8f0f7e93 100644 --- a/include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AbsImpl_kernels.hpp @@ -9,34 +9,39 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_TANHIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_TANHIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_ABSIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_ABSIMPL_KERNELS_H_ + +#include <cmath> #include "aidge/utils/Registrar.hpp" -#include "aidge/backend/cpu/operator/TanhImpl.hpp" +#include "aidge/backend/cpu/operator/AbsImpl.hpp" namespace Aidge { template <class I, class O> -void TanhImpl_cpu_forward_kernel(std::size_t inputLenght, +void AbsImpl_cpu_forward_kernel(std::size_t inputLenght, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); -//#pragma omp parallel for if (inputLenght > 1024) for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = std::tanh(input[i]); + output[i] = std::abs(input[i]); } } -namespace { -static Registrar<TanhImplForward_cpu> registrarTanhImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::TanhImpl_cpu_forward_kernel<float, float>); -static Registrar<TanhImplForward_cpu> registrarTanhImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::TanhImpl_cpu_forward_kernel<double, double>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(AbsImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::AbsImpl_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(AbsImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::AbsImpl_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(AbsImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::AbsImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_TANHIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_ABSIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/AddImpl.hpp b/include/aidge/backend/cpu/operator/AddImpl.hpp index 7a1497a2f4a2ae0e6005897ae504502505bbe60a..5e795922a67be178dde588e8e5e346ec268efe86 100644 --- a/include/aidge/backend/cpu/operator/AddImpl.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl.hpp @@ -17,36 +17,18 @@ #include <string> #include <vector> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Add.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" namespace Aidge { +// Operator implementation entry point for the backend +using AddImpl_cpu = OperatorImpl_cpu<Add_Op, + void(const std::vector<const void*>, const std::vector<std::vector<std::size_t>>&, const std::size_t, const std::vector<std::size_t>&, void*)>; -// compute kernel registry for forward and backward -class AddImplForward_cpu - : public Registrable<AddImplForward_cpu, std::tuple<DataType, DataType>, void(const std::vector<const void*>, const std::vector<std::vector<std::size_t>>&, const std::size_t, const std::vector<std::size_t>&, void*)> {}; - -class AddImplBackward_cpu - : public Registrable<AddImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::vector<const void*>, const std::vector<std::vector<std::size_t>>&, const std::size_t, const std::vector<std::size_t>&, void*)> {}; - - -class AddImpl_cpu : public OperatorImpl { -public: - AddImpl_cpu(const Add_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<AddImpl_cpu> create(const Add_Op& op) { - return std::make_unique<AddImpl_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - void forward() override; -}; - -namespace { -static Registrar<Add_Op> registrarAddImpl_cpu("cpu", Aidge::AddImpl_cpu::create); -} // namespace +// Implementation entry point registration to Operator +REGISTRAR(Add_Op, "cpu", Aidge::AddImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_ADDIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp similarity index 60% rename from include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/AddImpl_kernels.hpp index 94b22dcc7fc8251f8ca907ab0b060b0275309c9d..4a4ba2a8999c4dc33fc743b5a3a7dad023f9e0dd 100644 --- a/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_ADDIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_ADDIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_ADDIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_ADDIMPL_KERNELS_H_ #include "aidge/utils/Registrar.hpp" @@ -41,16 +41,19 @@ void AddImpl_cpu_forward_kernel(const std::vector<const void*> inputs_, const st } } -namespace { -static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::AddImpl_cpu_forward_kernel<float, float>); -static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::AddImpl_cpu_forward_kernel<double, double>); -static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>); -static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Int64( - {DataType::Int64, DataType::Int64}, Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(AddImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}}, + {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(AddImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}}, + {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(AddImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}}, + {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); +REGISTRAR(AddImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}}, + {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_FORWARD_KERNEL_H_ */ \ No newline at end of file +#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_KERNELS_H_ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu/operator/AndImpl.hpp b/include/aidge/backend/cpu/operator/AndImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..316a2fb922596642088d133a7fec49c988739bb7 --- /dev/null +++ b/include/aidge/backend/cpu/operator/AndImpl.hpp @@ -0,0 +1,32 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_ANDIMPL_H_ +#define AIDGE_CPU_OPERATOR_ANDIMPL_H_ + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/And.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <memory> +#include <vector> + +namespace Aidge { +// Operator implementation entry point for the backend +using AndImpl_cpu = OperatorImpl_cpu<And_Op, + void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(And_Op, "cpu", Aidge::AndImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_ANDIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp similarity index 53% rename from include/aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/AndImpl_kernels.hpp index 1146cfa77464f8bd1c33a0ec0113415dcf599b53..197e829f3527ce2f36c3ef5ee812a26477633703 100644 --- a/include/aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp @@ -1,63 +1,63 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_POWIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_POWIMPL_FORWARD_KERNEL_H_ - -#include "aidge/utils/Registrar.hpp" -#include <cmath> - -#include "aidge/backend/cpu/data/Broadcasting.hpp" -#include "aidge/backend/cpu/operator/PowImpl.hpp" - -namespace Aidge { -template <class I1, class I2, class O> -void PowImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, - const std::vector<std::size_t>& input2Dims, - const std::vector<std::size_t>& outputDims, - const void* input1_, - const void* input2_, - void* output_) { - - const I1* input_1 = static_cast<const I1*>(input1_); - const I2* input_2 = static_cast<const I2*>(input2_); - O* output = static_cast<O*>(output_); - - size_t totalElements = 1; - for (size_t dimSize : outputDims) { - totalElements *= dimSize; - } - - for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) - { - std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); - - std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); - std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); - - output[oIndex] = std::pow(input_1[idx1], input_2[idx2]); - } -} - -namespace { -static Registrar<PowImplForward_cpu> registrarPowImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::PowImpl_cpu_forward_kernel<float, float, float>); -static Registrar<PowImplForward_cpu> registrarPowImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::PowImpl_cpu_forward_kernel<int, int, int>); -static Registrar<PowImplForward_cpu> registrarPowImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::PowImpl_cpu_forward_kernel<double, double, double>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_POWIMPL_FORWARD_KERNEL_H_ */ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_ANDIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_ANDIMPL_KERNELS_H_ + +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/operator/AndImpl.hpp" +#include "aidge/utils/Registrar.hpp" + +namespace Aidge { +template <class I1, class I2, class O> +void AndImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, + const std::vector<std::size_t>& input2Dims, + const std::vector<std::size_t>& outputDims, + const void* input1_, + const void* input2_, + void* output_) { + + const I1* input_1 = static_cast<const I1*>(input1_); + const I2* input_2 = static_cast<const I2*>(input2_); + O* output = static_cast<O*>(output_); + + size_t totalElements = 1; + for (size_t dimSize : outputDims) { + totalElements *= dimSize; + } + + for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) + { + std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); + + std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); + std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); + + output[oIndex] = static_cast<O>(input_1[idx1] == input_2[idx2]); + } +} + +// Kernels registration to implementation entry point +REGISTRAR(AndImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<float, float, float>, nullptr}); +REGISTRAR(AndImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<double, double, double>, nullptr}); +REGISTRAR(AndImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr}); +REGISTRAR(AndImpl_cpu, + {DataType::Int64}, + {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>, nullptr}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_ANDIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ArgMaxImpl.hpp b/include/aidge/backend/cpu/operator/ArgMaxImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b1a2d5168013e4f9595f4275b98143cfc3509629 --- /dev/null +++ b/include/aidge/backend/cpu/operator/ArgMaxImpl.hpp @@ -0,0 +1,38 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_ARGMAXIMPL_H_ +#define AIDGE_CPU_OPERATOR_ARGMAXIMPL_H_ + +#include <array> +#include <memory> +#include <tuple> +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Operator implementation entry point for the backend +using ArgMaxImpl_cpu = OperatorImpl_cpu<ArgMax_Op, + void(std::int32_t, + DimSize_t, + const std::vector<DimSize_t>&, + const void *, + void *)>; + +// Implementation entry point registration to Operator +REGISTRAR(ArgMax_Op, "cpu", Aidge::ArgMaxImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_ARGMAXIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ArgMaxImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ArgMaxImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1bedec701766fc59fac233a1c400df1042369c5a --- /dev/null +++ b/include/aidge/backend/cpu/operator/ArgMaxImpl_kernels.hpp @@ -0,0 +1,87 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_ARGMAXIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_ARGMAXIMPL_KERNELS_H_ + +#include <algorithm> // std::for_each +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t +#include <functional> //std::multiplies +#include <numeric> //std::accumulate +#include <vector> +#include <limits> + +#include "aidge/backend/cpu/operator/ArgMaxImpl.hpp" +#include "aidge/data/Data.hpp" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/utils/Registrar.hpp" + +namespace Aidge { +template <class I, class O> +void ArgMaxImpl_cpu_forward_kernel(std::int32_t axis_, + DimSize_t select_last_index, + const std::vector<DimSize_t>& inputDims, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + const std::size_t axis = static_cast<std::size_t>(axis_); + + std::size_t stride_post = 1; + for (std::size_t i = axis + 1; i < inputDims.size(); ++i) { + stride_post *= inputDims[i]; + } + std::size_t stride_pre = 1; + for (std::size_t i = 0; i < axis; ++i) { + stride_pre *= inputDims[i]; + } + const std::size_t dim_i = inputDims[axis]; + for (std::size_t pre = 0; pre < stride_pre; ++pre) { + for (std::size_t post = 0; post < stride_post; ++post) { + const std::size_t idx_i = pre * dim_i * stride_post + post; + const std::size_t idx_o = pre * stride_post + post; + I max = std::numeric_limits<I>::min(); + for (std::size_t i = 0; i < dim_i; ++i) { + I curr_value = input[idx_i + i*stride_post]; + if (select_last_index) { + if (curr_value>=max) { + output[idx_o] = i; + max = curr_value; + } + } + else { + if (curr_value > max) { + output[idx_o] = i; + max = curr_value; + } + } + } + } + } + +} + +// Kernels registration to implementation entry point +REGISTRAR(ArgMaxImpl_cpu, + {DataType::Float32}, + {ProdConso::defaultModel, Aidge::ArgMaxImpl_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(ArgMaxImpl_cpu, + {DataType::Float64}, + {ProdConso::defaultModel, Aidge::ArgMaxImpl_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(ArgMaxImpl_cpu, + {DataType::Int32}, + {ProdConso::defaultModel, Aidge::ArgMaxImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_ARGMAXIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp index 12a5dc334619c16e6ad3a77f0cd76f4db7a87b77..adea96ca43a1ad9d2a49777426913ca4676e4f32 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp @@ -17,49 +17,24 @@ #include <tuple> #include <vector> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/AvgPooling.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { -// class AvgPooling_Op; - -// compute kernel registry for forward and backward -class AvgPoolingImpl2DForward_cpu - : public Registrable<AvgPoolingImpl2DForward_cpu, - std::tuple<DataType, DataType>, - void(const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 4>&, - const void *, - void *)> {}; -class AvgPoolingImpl2DBackward_cpu - : public Registrable<AvgPoolingImpl2DBackward_cpu, - std::tuple<DataType, DataType>, - void(const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 4>&, - const void *, - void *)> {}; - -class AvgPoolingImpl2D_cpu : public OperatorImpl { -public: - AvgPoolingImpl2D_cpu(const AvgPooling_Op<2> &op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<AvgPoolingImpl2D_cpu> create(const AvgPooling_Op<2> &op) { - return std::make_unique<AvgPoolingImpl2D_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; -}; - -namespace { -// add cpu backend to AvgPooling_Op<2> implementation registry -static Registrar<AvgPooling_Op<2>> registrarAvgPoolingImpl2D_cpu("cpu", Aidge::AvgPoolingImpl2D_cpu::create); -} // namespace +// Operator implementation entry point for the backend +using AvgPooling2D_Op = AvgPooling_Op<2>; +using AvgPoolingImpl2D_cpu = OperatorImpl_cpu<AvgPooling_Op<2>, + void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 4>&, + const void *, + void *)>; + +// Implementation entry point registration to Operator +REGISTRAR(AvgPooling2D_Op, "cpu", Aidge::AvgPoolingImpl2D_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp similarity index 85% rename from include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp index c7d9f86235c3bf1d7d01cf429cab29d156592fb5..f6da9dcb026101b93de862499d42ae8734532d52 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_KERNELS_H_ #include <array> #include <tuple> @@ -101,17 +101,16 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD } } -namespace { -static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Float32( - std::tuple<DataType, DataType>({DataType::Float32, DataType::Float32}), - Aidge::AvgPoolingImpl2D_cpu_forward_kernel<float, float>); -static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, - Aidge::AvgPoolingImpl2D_cpu_forward_kernel<int, int>); -static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, - Aidge::AvgPoolingImpl2D_cpu_forward_kernel<double, double>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(AvgPoolingImpl2D_cpu, + {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::AvgPoolingImpl2D_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(AvgPoolingImpl2D_cpu, + {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::AvgPoolingImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); +REGISTRAR(AvgPoolingImpl2D_cpu, + {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::AvgPoolingImpl2D_cpu_forward_kernel<double, double>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp index 93bdab2d3f37e3bd8dc1e68ab68a05de8c8015ed..36a100b21edc6cd63a0176c89f2f1e57c10001c7 100644 --- a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp +++ b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp @@ -17,58 +17,29 @@ #include <tuple> #include <vector> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/BatchNorm.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { -// class BatchNorm_Op; - -// compute kernel registry for forward and backward -class BatchNormImpl2DForward_cpu - : public Registrable<BatchNormImpl2DForward_cpu, - std::tuple<DataType, DataType, DataType>, - void(float, - float, - const std::array<DimSize_t, 4> &, - const void *, - const void *, - const void *, - void *, - void *, - void *, - const bool)> {}; -class BatchNormImpl2DBackward_cpu - : public Registrable<BatchNormImpl2DBackward_cpu, - std::tuple<DataType, DataType, DataType>, - void(float, - float, - const std::array<DimSize_t, 4> &, - const void *, - const void *, - const void *, - void *, - void *, - void *)> {}; - -class BatchNormImpl2D_cpu : public OperatorImpl { -public: - BatchNormImpl2D_cpu(const BatchNorm_Op<2> &op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<BatchNormImpl2D_cpu> create(const BatchNorm_Op<2> &op) { - return std::make_unique<BatchNormImpl2D_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; -}; - -namespace { -// add cpu backend to BatchNorm_Op<2> implementation registry -static Registrar<BatchNorm_Op<2>> registrarBatchNormImpl2D_cpu("cpu", Aidge::BatchNormImpl2D_cpu::create); -} // namespace +// Operator implementation entry point for the backend +using BatchNorm2D_Op = BatchNorm_Op<2>; +using BatchNormImpl2D_cpu = OperatorImpl_cpu<BatchNorm_Op<2>, + void(float, + float, + const std::array<DimSize_t, 4> &, + const void *, + const void *, + const void *, + void *, + void *, + void *, + const bool)>; + +// Implementation entry point registration to Operator +REGISTRAR(BatchNorm2D_Op, "cpu", Aidge::BatchNormImpl2D_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_BATCHNORMIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl_kernels.hpp similarity index 90% rename from include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/BatchNormImpl_kernels.hpp index 19f232a783bccf0a800d41f2bc566ccf6e04f05e..ec71e3b8e37e344c551fd643dc7b3957bdddcb67 100644 --- a/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/BatchNormImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_BATCHNORMIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_BATCHNORMIMPL_KERNELS_H_ #include "aidge/utils/Registrar.hpp" @@ -96,15 +96,10 @@ void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std } } - - - - -namespace { -static Registrar<BatchNormImpl2DForward_cpu> registrarBatchNormImpl2DForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::BatchNormImpl2D_cpu_forward_kernel<float, float, float>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(BatchNormImpl2D_cpu, + {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::BatchNormImpl2D_cpu_forward_kernel<float, float, float>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_BATCHNORMIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/BitShiftImpl.hpp b/include/aidge/backend/cpu/operator/BitShiftImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6da67bb7dd4469b6ca609c5aea1ae70dfca3f939 --- /dev/null +++ b/include/aidge/backend/cpu/operator/BitShiftImpl.hpp @@ -0,0 +1,38 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_ +#define AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_ + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/BitShift.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <memory> +#include <vector> + +namespace Aidge { +// Operator implementation entry point for the backend +using BitShiftImpl_cpu = OperatorImpl_cpu<BitShift_Op, + void(const BitShift_Op::BitShiftDirection, + const std::vector<std::size_t>&, + const std::vector<std::size_t>&, + const std::vector<std::size_t>&, + const void*, + const void*, + void*)>; + + // Implementation entry point registration to Operator + REGISTRAR(BitShift_Op,"cpu",Aidge::BitShiftImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp b/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f815e946ea2e4abaff48a6e5155368d564e88e8c --- /dev/null +++ b/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp @@ -0,0 +1,70 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_BITSHIFTIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_BITSHIFTIMPL_KERNELS_H_ + +#include "aidge/utils/Registrar.hpp" + +#include <cstdint> // std::int32_t, std::int64_t +#include "aidge/operator/BitShift.hpp" + +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/operator/BitShiftImpl.hpp" + + + +namespace Aidge { +template <class I1, class I2, class O> +void BitShiftImpl_cpu_forward_kernel( + const BitShift_Op::BitShiftDirection direction, + const std::vector<std::size_t>& input1Dims, + const std::vector<std::size_t>& input2Dims, + const std::vector<std::size_t>& outputDims, + const void* input1_, + const void* input2_, + void* output_ + ) { + + const I1* input_1 = static_cast<const I1*>(input1_); + const I2* input_2 = static_cast<const I2*>(input2_); + O* output = static_cast<O*>(output_); + + const size_t totalElements = std::accumulate(outputDims.begin(), outputDims.end(), std::size_t(1), std::multiplies<std::size_t>()); + + for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) + { + std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); + std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); + std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); + if(direction == BitShift_Op::BitShiftDirection::right) + + { + output[oIndex]= input_1[idx1] >> input_2[idx2]; + } + else + { + output[oIndex] = input_1[idx1] << input_2[idx2]; + } + } +} + +REGISTRAR(BitShiftImpl_cpu, +{DataType::Int32}, +{ProdConso::inPlaceModel,Aidge::BitShiftImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>,nullptr}); +REGISTRAR(BitShiftImpl_cpu, +{DataType::Int64}, +{ProdConso::inPlaceModel,Aidge::BitShiftImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>,nullptr}); + + +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_BitShiftIMPL_KERNELS_H_ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..83e7e030f526e0db3cff4741eabe39e287130562 --- /dev/null +++ b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp @@ -0,0 +1,34 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_ +#define AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_ + +#include <cstddef> +#include <memory> +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/ConstantOfShape.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Operator implementation entry point for the backend +using ConstantOfShapeImpl_cpu = OperatorImpl_cpu<ConstantOfShape_Op, + void(const std::vector<DimSize_t>, const Tensor&, void *)>; + +// Implementation entry point registration to Operator +REGISTRAR(ConstantOfShape_Op, "cpu", Aidge::ConstantOfShapeImpl_cpu::create); +} // namespace Aidge + +#endif /* _AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_ */ + diff --git a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..18ab9c0a77c4545c955fc4fe1f1fc1cbcb763bf7 --- /dev/null +++ b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp @@ -0,0 +1,71 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_ + +#include <aidge/data/Tensor.hpp> +#include <aidge/data/half.hpp> +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <functional> // std::multiplies +#include <numeric> // std::accumulate +#include <vector> + +#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp" +#include "aidge/data/Data.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +template <class O> +void ConstantOfShapeimpl_cpu_forward_kernel( + const std::vector<DimSize_t> output_dims, const Tensor &value, + void *output_) { + + O *output = static_cast<O *>(output_); + O val; + std::copy(static_cast<O *>(value.getImpl()->hostPtr()), + static_cast<O *>(value.getImpl()->hostPtr()) + + static_cast<NbElts_t>(1), + &val); + const size_t output_size = std::accumulate( + output_dims.begin(), output_dims.end(), 1, std::multiplies<DimSize_t>()); + for (size_t i = 0; i < output_size; ++i) { + output[i] = val; + } +} + +// Kernels registration to implementation entry point +REGISTRAR(ConstantOfShapeImpl_cpu, + {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float16}}, + {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<half_float::half>, nullptr}); +REGISTRAR(ConstantOfShapeImpl_cpu, + {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float32}}, + {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<float>, nullptr}); +REGISTRAR(ConstantOfShapeImpl_cpu, + {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float64}}, + {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<double>, nullptr}); +REGISTRAR(ConstantOfShapeImpl_cpu, + {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int16}}, + {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int16_t>, nullptr}); +REGISTRAR(ConstantOfShapeImpl_cpu, + {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int32}}, + {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int32_t>, nullptr}); +REGISTRAR(ConstantOfShapeImpl_cpu, + {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int64}}, + {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int64_t>, nullptr}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_ */ + diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp index ec886a310dd2edc616ced6ee447665eab3ce301a..5b985accfb7b9778993b557524de7b60060ad437 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp @@ -17,85 +17,39 @@ #include <tuple> #include <vector> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/ConvDepthWise.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { -// class ConvDepthWise_Op; -// compute kernel registry for forward and backward -class ConvDepthWiseImpl1DForward_cpu - : public Registrable<ConvDepthWiseImpl1DForward_cpu, - std::tuple<DataType, DataType, DataType, DataType>, - void(const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 3>&, - const void *, - const void *, - const void *, - void *)> {}; - -class ConvDepthWiseImpl1D_cpu : public OperatorImpl { -public: - ConvDepthWiseImpl1D_cpu(const ConvDepthWise_Op<1> &op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<ConvDepthWiseImpl1D_cpu> create(const ConvDepthWise_Op<1> &op) { - return std::make_unique<ConvDepthWiseImpl1D_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; -}; - -namespace { -// add cpu backend to ConvDepthWise_Op<1> implementation registry -static Registrar<ConvDepthWise_Op<1>> registrarConvDepthWiseImpl1D_cpu("cpu", Aidge::ConvDepthWiseImpl1D_cpu::create); -} // namespace - -// compute kernel registry for forward and backward -class ConvDepthWiseImpl2DForward_cpu - : public Registrable<ConvDepthWiseImpl2DForward_cpu, - std::tuple<DataType, DataType, DataType, DataType>, - void(const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 4> &, - const void *, - const void *, - const void *, - void *)> {}; -class ConvDepthWiseImpl2DBackward_cpu - : public Registrable<ConvDepthWiseImpl2DBackward_cpu, - std::tuple<DataType, DataType, DataType, DataType>, - void(const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - bool, - const std::array<DimSize_t, 4> &, - const void *, - const void *, - const void *, - void *)> {}; - -class ConvDepthWiseImpl2D_cpu : public OperatorImpl { -public: - ConvDepthWiseImpl2D_cpu(const ConvDepthWise_Op<2> &op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<ConvDepthWiseImpl2D_cpu> create(const ConvDepthWise_Op<2> &op) { - return std::make_unique<ConvDepthWiseImpl2D_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; -}; - -namespace { -// add cpu backend to ConvDepthWise_Op<2> implementation registry -static Registrar<ConvDepthWise_Op<2>> registrarConvDepthWiseImpl2D_cpu("cpu", Aidge::ConvDepthWiseImpl2D_cpu::create); -} // namespace +// Operator implementation entry point for the backend +using ConvDepthWise1D_Op = ConvDepthWise_Op<1>; +using ConvDepthWiseImpl1D_cpu = OperatorImpl_cpu<ConvDepthWise_Op<1>, + void(const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 3>&, + const void *, + const void *, + const void *, + void *)>; + +using ConvDepthWise2D_Op = ConvDepthWise_Op<2>; +using ConvDepthWiseImpl2D_cpu = OperatorImpl_cpu<ConvDepthWise_Op<2>, + void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 4> &, + const void *, + const void *, + const void *, + void *)>; + +// Implementation entry point registration to Operator +REGISTRAR(ConvDepthWise1D_Op, "cpu", Aidge::ConvDepthWiseImpl1D_cpu::create); +REGISTRAR(ConvDepthWise2D_Op, "cpu", Aidge::ConvDepthWiseImpl2D_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp similarity index 83% rename from include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp index a02aa672b92f089790ef1903af8b804f816f3baa..ff9bb148fa68d75e2d4b00804e13f063e3ca2cc0 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_KERNELS_H_ #include <algorithm> #include <array> @@ -86,17 +86,16 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri } } -namespace { -static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<float, float, float, float>); -static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>); -static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<double, double, double, double>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(ConvDepthWiseImpl1D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr}); +REGISTRAR(ConvDepthWiseImpl1D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>, nullptr}); +REGISTRAR(ConvDepthWiseImpl1D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr}); /** @@ -187,17 +186,16 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri } } -namespace { -static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>); -static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>); -static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(ConvDepthWiseImpl2D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr}); +REGISTRAR(ConvDepthWiseImpl2D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>, nullptr}); +REGISTRAR(ConvDepthWiseImpl2D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp index d7be46c251a82d1b631f4ad50e7175fa2f896d03..c06d0912f419909013f930867ce3c3238c1a5555 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp @@ -17,91 +17,41 @@ #include <tuple> #include <vector> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Conv.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { -// class Conv_Op; - -// compute kernel registry for forward and backward -// Conv 1D -class ConvImpl1DForward_cpu - : public Registrable<ConvImpl1DForward_cpu, - std::tuple<DataType, DataType, DataType, DataType>, - void(const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 1>&, - const std::array<DimSize_t, 3> &, - DimSize_t, - const void *, - const void *, - const void *, - void *)> {}; - -class ConvImpl1D_cpu : public OperatorImpl { - public: - ConvImpl1D_cpu(const Conv_Op<1>& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<ConvImpl1D_cpu> create(const Conv_Op<1> &op) { - return std::make_unique<ConvImpl1D_cpu>(op); - } - - public: - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; -}; - -namespace { -// add cpu backend to Conv_Op<1> implementation registry -static Registrar<Conv_Op<1>> registrarConvImpl1D_cpu("cpu", Aidge::ConvImpl1D_cpu::create); -} // namespace - -// Conv 2D -class ConvImpl2DForward_cpu - : public Registrable<ConvImpl2DForward_cpu, - std::tuple<DataType, DataType, DataType, DataType>, - void(const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 4> &, - DimSize_t, - const void *, - const void *, - const void *, - void *)> {}; -class ConvImpl2DBackward_cpu - : public Registrable<ConvImpl2DBackward_cpu, - std::tuple<DataType, DataType, DataType, DataType>, - void(const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - bool, - const std::array<DimSize_t, 4> &, - const void *, - const void *, - const void *, - void *)> {}; - -class ConvImpl2D_cpu : public OperatorImpl { - public: - ConvImpl2D_cpu(const Conv_Op<2>& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<ConvImpl2D_cpu> create(const Conv_Op<2> &op) { - return std::make_unique<ConvImpl2D_cpu>(op); - } - - public: - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; -}; - -namespace { -// add cpu backend to Conv_Op<2> implementation registry -static Registrar<Conv_Op<2>> registrarConvImpl2D_cpu("cpu", Aidge::ConvImpl2D_cpu::create); -} // namespace +// Operator implementation entry point for the backend +using Conv1D_Op = Conv_Op<1>; +using ConvImpl1D_cpu = OperatorImpl_cpu<Conv_Op<1>, + void(const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 3> &, + DimSize_t, + const void *, + const void *, + const void *, + void *)>; + +using Conv2D_Op = Conv_Op<2>; +using ConvImpl2D_cpu = OperatorImpl_cpu<Conv_Op<2>, + void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 4> &, + DimSize_t, + const void *, + const void *, + const void *, + void *)>; + +// Implementation entry point registration to Operator +REGISTRAR(Conv1D_Op, "cpu", Aidge::ConvImpl1D_cpu::create); +REGISTRAR(Conv2D_Op, "cpu", Aidge::ConvImpl2D_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_CONVIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp similarity index 70% rename from include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp index 88a71c47244788f2da5e576c8ad5170a92561909..cc3bd57cb17f2a0feb6a79af2c291e6f960467d8 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp @@ -9,18 +9,20 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ -#include <algorithm> #include <array> -#include <cmath> +#include <memory> +#include <tuple> +#include <vector> -#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp" -#include "aidge/data/half.hpp" +#include "aidge/operator/Conv.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { /** @@ -90,20 +92,19 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, } } -namespace { -static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>); -static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float16( - {DataType::Float16, DataType::Float16, DataType::Float16, DataType::Float16}, - Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>); -static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::ConvImpl1D_cpu_forward_kernel<int, int, int, int>); -static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(ConvImpl1D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr}); +REGISTRAR(ConvImpl1D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr}); +REGISTRAR(ConvImpl1D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, nullptr}); +REGISTRAR(ConvImpl1D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr}); /** @@ -135,49 +136,6 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, const W *weights = static_cast<const W *>(weights_); const B *biases = static_cast<const B *>(biases_); O *output = static_cast<O *>(output_); -/* - // output H size - const std::size_t oxSize = - static_cast<std::size_t>(static_cast<float>(inputDims[0] - kernelDims[0] + strideDims[0]) / - static_cast<float>(strideDims[0])); - // output W size - const std::size_t oySize = - static_cast<std::size_t>(static_cast<float>(inputDims[1] - kernelDims[1] + strideDims[1]) / - static_cast<float>(strideDims[1])); - - // TODO: kernel computation - // output (Xout, Yout, outCh, batch) - // input (Xin, Yin, inCh, batch) - // weight (kernelX, kernelY, inCh, outCh) - // does not take Dilation attribute into account - for (std::size_t ox = 0; ox < oxSize; ++ox) { - for (std::size_t oy = 0; oy < oySize; ++oy) { - const std::size_t ix = ox * strideDims[0]; - const std::size_t iy = oy * strideDims[1]; - - for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { - const std::size_t oIndex = inputDims[3] * (outCh + outChannels * (oy + oySize * ox)); - B biasVal = (biases != nullptr) ? biases[outCh] : B(0); - for (std::size_t batch = 0; batch < inputDims[3]; ++batch) { - output[oIndex + batch] = biasVal; - } - for (std::size_t inCh = 0; inCh < inputDims[2]; ++inCh) { - for (std::size_t sx = 0; sx < kernelDims[0]; ++sx) { - for (std::size_t sy = 0; sy < kernelDims[1]; ++sy) { - const std::size_t wIndex = - outCh + outChannels * (inCh + inputDims[2] * (sy + kernelDims[1] * sx)); - std::size_t iIndex = inputDims[3] * (inCh + inputDims[2] * ((iy + sy) + inputDims[1] * (ix + sx))); - for (std::size_t batch = 0; batch < inputDims[3]; ++batch) { - output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; - } - } - } - } - } - } - } -*/ - // output H size const std::size_t oxSize = @@ -240,20 +198,19 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, } } -namespace { -static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>); -static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float16( - {DataType::Float16, DataType::Float16, DataType::Float16, DataType::Float16}, - Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>); -static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>); -static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(ConvImpl2D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr}); +REGISTRAR(ConvImpl2D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr}); +REGISTRAR(ConvImpl2D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, nullptr}); +REGISTRAR(ConvImpl2D_cpu, + {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/DivImpl.hpp b/include/aidge/backend/cpu/operator/DivImpl.hpp index 3a19d7303464e3543bd1ce83e334c4a6bdb713a2..40c1b678a78713d6c3b27629ae898c715797b9b2 100644 --- a/include/aidge/backend/cpu/operator/DivImpl.hpp +++ b/include/aidge/backend/cpu/operator/DivImpl.hpp @@ -16,38 +16,18 @@ #include <tuple> #include <vector> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Div.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" namespace Aidge { +// Operator implementation entry point for the backend +using DivImpl_cpu = OperatorImpl_cpu<Div_Op, + void(const std::size_t, const std::size_t, const std::size_t, const void*, const void*,void*)>; -// compute kernel registry for forward and backward -class DivImplForward_cpu - // : public Registrable<DivImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)> { - : public Registrable<DivImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const std::size_t, const void*, const void*,void*)> { -}; -class DivImplBackward_cpu - : public Registrable<DivImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)> { -}; - -class DivImpl_cpu : public OperatorImpl { -public: - DivImpl_cpu(const Div_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<DivImpl_cpu> create(const Div_Op& op) { - return std::make_unique<DivImpl_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - - void forward() override final; -}; - -namespace { -static Registrar<Div_Op> registrarDivImpl_cpu("cpu", Aidge::DivImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(Div_Op, "cpu", Aidge::DivImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_DIVIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/DivImpl_kernels.hpp similarity index 77% rename from include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/DivImpl_kernels.hpp index 74db1128c111ae62bedb6fa61682abca62429cdb..ed6e55a79acbe23a689a67c22477f64f785a3aef 100644 --- a/include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/DivImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_ #include <numeric> // std::accumulate #include <cstddef> // std::size_t @@ -69,19 +69,16 @@ constexpr void DivImpl_cpu_forward_kernel(const std::size_t input1size_, } } - - -namespace { -static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::DivImpl_cpu_forward_kernel<float, float, float>); -static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::DivImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>); -static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::DivImpl_cpu_forward_kernel<double, double, double>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(DivImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<float, float, float>, nullptr}); +REGISTRAR(DivImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<double, double, double>, nullptr}); +REGISTRAR(DivImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ErfImpl.hpp b/include/aidge/backend/cpu/operator/ErfImpl.hpp index 6864803a542e4beed0259be9c4722d4215bec449..3d2835600367e81499cbe6af81a8475a0cd1b61e 100644 --- a/include/aidge/backend/cpu/operator/ErfImpl.hpp +++ b/include/aidge/backend/cpu/operator/ErfImpl.hpp @@ -12,7 +12,7 @@ #ifndef AIDGE_CPU_OPERATOR_ERFIMPL_H_ #define AIDGE_CPU_OPERATOR_ERFIMPL_H_ -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Erf.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" @@ -20,31 +20,12 @@ #include <vector> namespace Aidge { -// class Erf_Op; +// Operator implementation entry point for the backend +using ErfImpl_cpu = OperatorImpl_cpu<Erf_Op, + void(const std::size_t, const void*, void*)>; -// compute kernel registry for forward and backward -class ErfImplForward_cpu - : public Registrable<ErfImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { -}; -class ErfImplBackward_cpu - : public Registrable<ErfImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { -}; - -class ErfImpl_cpu : public OperatorImpl { -public: - ErfImpl_cpu(const Erf_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<ErfImpl_cpu> create(const Erf_Op& op) { - return std::make_unique<ErfImpl_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; -}; - -namespace { -static Registrar<Erf_Op> registrarErfImpl_cpu("cpu", Aidge::ErfImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(Erf_Op, "cpu", Aidge::ErfImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_ERFIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ErfImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ErfImpl_kernels.hpp similarity index 57% rename from include/aidge/backend/cpu/operator/ErfImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/ErfImpl_kernels.hpp index bb92401b6e72b1528d0342474bf394a7c29a4042..02041f55ce9a1b2476db575b40340b1bb6517ce1 100644 --- a/include/aidge/backend/cpu/operator/ErfImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ErfImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_ERFIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_ERFIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_ERFIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_ERFIMPL_KERNELS_H_ #include <cmath> @@ -32,14 +32,16 @@ void ErfImpl_cpu_forward_kernel(std::size_t inputLenght, } } -namespace { -static Registrar<ErfImplForward_cpu> registrarErfImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::ErfImpl_cpu_forward_kernel<float, float>); -static Registrar<ErfImplForward_cpu> registrarErfImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::ErfImpl_cpu_forward_kernel<int, int>); -static Registrar<ErfImplForward_cpu> registrarErfImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::ErfImpl_cpu_forward_kernel<double, double>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(ErfImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::ErfImpl_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(ErfImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::ErfImpl_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(ErfImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::ErfImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_ERFIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_ERFIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/FCImpl.hpp b/include/aidge/backend/cpu/operator/FCImpl.hpp index f21cd0ff330f61b942eb55f036c7b23458a5959a..e82352d9cba60440efef87faf97dfd4ed66565b6 100644 --- a/include/aidge/backend/cpu/operator/FCImpl.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl.hpp @@ -16,57 +16,33 @@ #include <memory> #include <vector> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/FC.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" namespace Aidge { -// class FC_Op; - -// compute kernel registry for forward and backward -class FCImplForward_cpu : public Registrable<FCImplForward_cpu, - std::tuple<DataType, - DataType, - DataType, - DataType>, - void(const DimSize_t, - const DimSize_t, - const DimSize_t, - const void *, - const void *, - const void *, - void *)> {}; -class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu, - std::tuple<DataType, - DataType, - DataType, - DataType>, - void(const DimSize_t, - const DimSize_t, - const DimSize_t, - const void *, - const void *, - const void *, - void *, - void *, - void *)> {}; - -class FCImpl_cpu : public OperatorImpl { -public: - FCImpl_cpu(const FC_Op &op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<FCImpl_cpu> create(const FC_Op &op) { - return std::make_unique<FCImpl_cpu>(op); - } - - void forward() override final; - void backward() override final; -}; - -namespace { -static Registrar<FC_Op> registrarFCImpl_cpu("cpu", Aidge::FCImpl_cpu::create); -} +// Operator implementation entry point for the backend +using FCImpl_cpu = OperatorImpl_cpu<FC_Op, + void(const DimSize_t, + const DimSize_t, + const DimSize_t, + const void *, + const void *, + const void *, + void *), + void(const DimSize_t, + const DimSize_t, + const DimSize_t, + const void *, + const void *, + const void *, + void *, + void *, + void *)>; + +// Implementation entry point registration to Operator +REGISTRAR(FC_Op, "cpu", Aidge::FCImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_FCIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp deleted file mode 100644 index c93a44d922dce2dc18df94bf903134ddadf5256f..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp +++ /dev/null @@ -1,92 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_FCIMPL_BACKWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_FCIMPL_BACKWARD_KERNEL_H_ - -#include "aidge/utils/Registrar.hpp" -#include <algorithm> - -#include "aidge/backend/cpu/operator/FCImpl.hpp" - -namespace Aidge { -template <class I, class O, class W, class B> -void FCImpl_cpu_backward_kernel(const DimSize_t batchSize, - const DimSize_t inputFeatureSize, - const DimSize_t outputFeatureSize, - const void* input_, - const void* originalInput_, - const void* weight_, - void* output_, - void* weightGrad_, - void* biasesGrad_) -{ - // FIXME: missing FC attributes as arguments - const I* input = static_cast<const I*>(input_); - const I* originalInput = static_cast<const I*>(originalInput_); - const W* weight = static_cast<const W*>(weight_); - O* output = static_cast<O*>(output_); - W* weightGrad = static_cast<W*>(weightGrad_); - B* biasesGrad = static_cast<B*>(biasesGrad_); - - - // bias grad - if (biasesGrad == nullptr) { // no bias - std::fill(biasesGrad, biasesGrad + outputFeatureSize, B(0)); - } else { - for (std::size_t o = 0; o < outputFeatureSize; ++o) { // nb outputs - B sum{0}; - for (std::size_t b = 0; b < batchSize; ++b) { - sum += input[b*outputFeatureSize + o]; - } - biasesGrad[o] = sum; - } - } - - // weight grad - for (std::size_t o = 0; o < outputFeatureSize; ++o) { - for (std::size_t c = 0; c < inputFeatureSize; ++c) { - W sum{0}; - for (std::size_t b = 0; b < batchSize; ++b) { - sum += originalInput[b*inputFeatureSize + c]*input[b*outputFeatureSize + o]; - } - weightGrad[o*inputFeatureSize + c] = sum; - } - } - - // input grad - for (std::size_t b = 0; b < batchSize; ++b) { - for (std::size_t c = 0; c < inputFeatureSize; ++c) { - O sum{0}; - for (std::size_t o = 0; o < outputFeatureSize; ++o) { - sum += weight[o*inputFeatureSize + c] * input[b*outputFeatureSize + o]; - } - output[b*inputFeatureSize + c] = sum; - } - } -} - - -namespace { -static Registrar<FCImplBackward_cpu> registrarFCImpl2DBackward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::FCImpl_cpu_backward_kernel<float, float, float, float>); -static Registrar<FCImplBackward_cpu> registrarFCImpl2DBackward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::FCImpl_cpu_backward_kernel<int, int, int, int>); -static Registrar<FCImplBackward_cpu> registrarFCImpl2DBackward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::FCImpl_cpu_backward_kernel<double, double, double, double>); -} // namespace - -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_FCIMPL_BACKWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp similarity index 62% rename from include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/FCImpl_kernels.hpp index caeacd1bda2fde086fd649c50a733e790fc2c000..c57f86e6ac6e74acebb48f471991e7181920f7c3 100644 --- a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_FCIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_FCIMPL_KERNELS_H_ #include <algorithm> @@ -115,19 +115,72 @@ void FCImpl_cpu_forward_kernel(const DimSize_t batchSize, } } +template <class I, class O, class W, class B> +void FCImpl_cpu_backward_kernel(const DimSize_t batchSize, + const DimSize_t inputFeatureSize, + const DimSize_t outputFeatureSize, + const void* input_, + const void* originalInput_, + const void* weight_, + void* output_, + void* weightGrad_, + void* biasesGrad_) +{ + // FIXME: missing FC attributes as arguments + const I* input = static_cast<const I*>(input_); + const I* originalInput = static_cast<const I*>(originalInput_); + const W* weight = static_cast<const W*>(weight_); + O* output = static_cast<O*>(output_); + W* weightGrad = static_cast<W*>(weightGrad_); + B* biasesGrad = static_cast<B*>(biasesGrad_); + + + // bias grad + if (biasesGrad == nullptr) { // no bias + std::fill(biasesGrad, biasesGrad + outputFeatureSize, B(0)); + } else { + for (std::size_t o = 0; o < outputFeatureSize; ++o) { // nb outputs + B sum{0}; + for (std::size_t b = 0; b < batchSize; ++b) { + sum += input[b*outputFeatureSize + o]; + } + biasesGrad[o] = sum; + } + } -namespace { -static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>); -static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::FCImpl_cpu_forward_kernel<int, int, int, int>); -static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>); -} // namespace + // weight grad + for (std::size_t o = 0; o < outputFeatureSize; ++o) { + for (std::size_t c = 0; c < inputFeatureSize; ++c) { + W sum{0}; + for (std::size_t b = 0; b < batchSize; ++b) { + sum += originalInput[b*inputFeatureSize + c]*input[b*outputFeatureSize + o]; + } + weightGrad[o*inputFeatureSize + c] = sum; + } + } + + // input grad + for (std::size_t b = 0; b < batchSize; ++b) { + for (std::size_t c = 0; c < inputFeatureSize; ++c) { + O sum{0}; + for (std::size_t o = 0; o < outputFeatureSize; ++o) { + sum += weight[o*inputFeatureSize + c] * input[b*outputFeatureSize + o]; + } + output[b*inputFeatureSize + c] = sum; + } + } +} +// Kernels registration to implementation entry point +REGISTRAR(FCImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}}, + {ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>, Aidge::FCImpl_cpu_backward_kernel<float, float, float, float>}); +REGISTRAR(FCImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}}, + {ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>, Aidge::FCImpl_cpu_backward_kernel<double, double, double, double>}); +REGISTRAR(FCImpl_cpu, + {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}}, + {ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, Aidge::FCImpl_cpu_backward_kernel<int32_t, int32_t, int32_t, int32_t>}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_FCIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/FoldImpl.hpp b/include/aidge/backend/cpu/operator/FoldImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..94ddbdcba8e33e12108968d536037ab1ccab2c8d --- /dev/null +++ b/include/aidge/backend/cpu/operator/FoldImpl.hpp @@ -0,0 +1,42 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_FOLDIMPL_H_ +#define AIDGE_CPU_OPERATOR_FOLDIMPL_H_ + +#include <array> +#include <memory> +#include <tuple> +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/Fold.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" + +namespace Aidge { +// Operator implementation entry point for the backend +using Fold2D_Op = Fold_Op<2>; +using FoldImpl2D_cpu = OperatorImpl_cpu<Fold_Op<2>, + void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::vector<DimSize_t> &, + const void *, + void *)>; + +// Implementation entry point registration to Operator +REGISTRAR(Fold2D_Op, "cpu", Aidge::FoldImpl2D_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_FOLDIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/FoldImpl_kernels.hpp b/include/aidge/backend/cpu/operator/FoldImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8cced8958f49f1cc4215c7cf463cc3391fb29246 --- /dev/null +++ b/include/aidge/backend/cpu/operator/FoldImpl_kernels.hpp @@ -0,0 +1,86 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_FOLDIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_FOLDIMPL_KERNELS_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/FoldImpl.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <cmath> +#include <array> +#include <algorithm> + +namespace Aidge { +template <class I, class O> +void FoldImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& outputDims, + const std::array<DimSize_t, 2>& strideDims, + const std::array<DimSize_t, 2>& dilationDims, + const std::array<DimSize_t, 2>& kernelDims, + const std::vector<DimSize_t> &dims, + const void *input_, void *output_) +{ + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); + + const DimSize_t inHeight = outputDims[0]; + const DimSize_t inWidth = outputDims[1]; + + const DimSize_t kernelExtentHeight = dilationDims[0] * + (kernelDims[0] - 1) + 1; + const DimSize_t outHeight = 1 + static_cast<DimSize_t>( + floor(static_cast<float>(inHeight - kernelExtentHeight) / + static_cast<float>(strideDims[0]))); + const DimSize_t kernelExtentWidth = dilationDims[1] * + (kernelDims[1] - 1) + 1; + const DimSize_t outWidth = 1 + static_cast<DimSize_t>( + floor(static_cast<float>(inWidth - kernelExtentWidth) / + static_cast<float>(strideDims[1]))); + const DimSize_t outChannels = dims[dims.size() - 2]; + const DimSize_t inChannels = outChannels / kernelDims[0] / kernelDims[1]; + + std::fill_n(output, dims[0] * outHeight * outWidth * outChannels, O(0)); + + for (DimSize_t n = 0; n < dims[0]; ++n) { + for (DimSize_t outC = 0; outC < outChannels; ++outC) { + const auto inOffsetW = outC % kernelDims[1]; + const auto inOffsetH = (outC / kernelDims[1]) % kernelDims[0]; + const auto inC = outC / kernelDims[0] / kernelDims[1]; + + for (DimSize_t outH = 0; outH < outHeight; ++outH) { + const auto inH = outH * strideDims[0] + inOffsetH * dilationDims[0]; + + for (DimSize_t outW = 0; outW < outWidth; ++outW) { + const auto inW = outW * strideDims[1] + inOffsetW * dilationDims[1]; + + output[((n * inChannels + inC) * inHeight + inH) * inWidth + inW] += + input[((n * outChannels + outC) * outHeight + outH) * outWidth + outW]; + } + } + } + } +} + +// Kernels registration to implementation entry point +REGISTRAR(FoldImpl2D_cpu, + {DataType::Float32}, + {ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(FoldImpl2D_cpu, + {DataType::Float64}, + {ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(FoldImpl2D_cpu, + {DataType::Int32}, + {ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_FOLDIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp index 758535de4cc506b8de4adf7004afbbfdd8185941..4e04b1a595a8660b1528e49921e7e3e7a567829a 100644 --- a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp @@ -15,41 +15,18 @@ #include <memory> #include <vector> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/GlobalAveragePooling.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" namespace Aidge { -// class GlobalAveragePooling_Op; +// Operator implementation entry point for the backend +using GlobalAveragePoolingImpl_cpu = OperatorImpl_cpu<GlobalAveragePooling_Op, + void(const std::vector<DimSize_t> &, const void *, void *)>; -class GlobalAveragePoolingImplForward_cpu - : public Registrable< - GlobalAveragePoolingImplForward_cpu, std::tuple<DataType, DataType>, - void(const std::vector<DimSize_t> &, const void *, void *)> {}; - -class GlobalAveragePoolingImplBackward_cpu - : public Registrable< - GlobalAveragePoolingImplBackward_cpu, std::tuple<DataType, DataType>, - void(const std::vector<DimSize_t> &, const void *, void *)> {}; - -class GlobalAveragePoolingImpl_cpu : public OperatorImpl { -public: - GlobalAveragePoolingImpl_cpu(const GlobalAveragePooling_Op &op) - : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<GlobalAveragePoolingImpl_cpu> - create(const GlobalAveragePooling_Op &op) { - return std::make_unique<GlobalAveragePoolingImpl_cpu>(op); - } - - void forward() override; -}; - -namespace { -static Registrar<GlobalAveragePooling_Op> registrarGlobalAveragePoolingImpl_cpu( - "cpu", Aidge::GlobalAveragePoolingImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(GlobalAveragePooling_Op, "cpu", Aidge::GlobalAveragePoolingImpl_cpu::create); } // namespace Aidge #endif /* _AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp similarity index 68% rename from include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp index 81f10975cc107a23448da3df14b88f6b31d55146..ed838a94cc0c0238a870427c3b774b29f7818b09 100644 --- a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_KERNELS_H_ #include <cstddef> #include <functional> // std::multiplies @@ -59,21 +59,16 @@ void GlobalAveragePoolingImpl_cpu_forward_kernel( } } -// Then we add the Registrar declaration for different input/output types -namespace { -static Registrar<GlobalAveragePoolingImplForward_cpu> - registrarGlobalAveragePoolingImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, - Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<float, float>); -static Registrar<GlobalAveragePoolingImplForward_cpu> - registrarGlobalAveragePoolingImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, - Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<int, int>); -static Registrar<GlobalAveragePoolingImplForward_cpu> - registrarGlobalAveragePoolingImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, - Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<double, double>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(GlobalAveragePoolingImpl_cpu, + {DataType::Float32}, + {ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(GlobalAveragePoolingImpl_cpu, + {DataType::Float64}, + {ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(GlobalAveragePoolingImpl_cpu, + {DataType::Int32}, + {ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/GridSampleImpl.hpp b/include/aidge/backend/cpu/operator/GridSampleImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..697bb35a983bc108c2a5d65db3c08ef462ffcdbd --- /dev/null +++ b/include/aidge/backend/cpu/operator/GridSampleImpl.hpp @@ -0,0 +1,38 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_ +#define AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_ + +#include <array> +#include <memory> +#include <tuple> +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/GridSample.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" + +namespace Aidge { +// Operator implementation entry point for the backend +using GridSampleImpl_cpu = OperatorImpl_cpu<GridSample_Op, + void(const GridSample_Op&, + const std::shared_ptr<Tensor>&, + const std::shared_ptr<Tensor>&, + const std::shared_ptr<Tensor>&)>; + +// Implementation entry point registration to Operator +REGISTRAR(GridSample_Op, "cpu", Aidge::GridSampleImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/GridSampleImpl_kernels.hpp b/include/aidge/backend/cpu/operator/GridSampleImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ea62fd010db8c155a3ff86ff8396797da5ebb6be --- /dev/null +++ b/include/aidge/backend/cpu/operator/GridSampleImpl_kernels.hpp @@ -0,0 +1,477 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ + +#include <algorithm> // std::max, std::min +#include <cmath> // std::fabs, std::trunf, std::nearbyint +#include <cstddef> // std::size_t +#include <cstdint> // std::int64_t + +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/backend/cpu/operator/GridSampleImpl.hpp" +#include "aidge/data/half.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +static bool in_bound(float coord, float lower_bound, float upper_bound) noexcept { + return (coord > lower_bound) && (coord < upper_bound); +} + +static float unnormalized_coord(float coord, float new_lower_bound, float new_upper_bound) noexcept { + return (coord + 1) / 2 * (new_upper_bound - new_lower_bound) + new_lower_bound; +} + +// unused +// static float normalized_coord(float coord, float prev_lower_bound, float prev_upper_bound) noexcept { +// return (coord + prev_lower_bound) / (prev_upper_bound-prev_lower_bound) * 2 - 1; +// } + +static float unnormalize_grid_sample_coord(float coord, std::size_t size, bool align_corners) noexcept { + return align_corners ? unnormalized_coord(coord, 0.0f, static_cast<float>(size) - 1.0f) + : unnormalized_coord(coord, -0.5f, static_cast<float>(size) - 0.5f); +} + +// unused +// static float normalize_grid_sample_coord(float coord, std::size_t size, bool align_corners) noexcept { +// return align_corners ? normalized_coord(coord, 0.0f, static_cast<float>(size) - 1.0f) +// : normalized_coord(coord, -0.5f, static_cast<float>(size) - 0.5f); +// } + +static float update_normalized_coord_with_padding(float coord, Aidge::GridSample_Op::PaddingMode padding_mode) { + if (!in_bound(coord, -1.0f, 1.0f)) { + if (padding_mode == Aidge::GridSample_Op::PaddingMode::Border) { + coord = std::min(std::max(-1.0f, coord), 1.0f); + } + else if (padding_mode == Aidge::GridSample_Op::PaddingMode::Reflection) { + float abs_coord = std::fabs(coord); + float int_coord = std::truncf(abs_coord); + std::int32_t nb_refl = static_cast<std::int32_t>((int_coord - 1) / 2); + float res = ((nb_refl + 1)*2) - abs_coord; + coord = (coord > 0) ? (nb_refl % 2 == 0 ? res : -res) \ + : (nb_refl % 2 == 0 ? -res : res); + } + + } + return coord; +} + +static inline std::int64_t update_unnormalized_coord_with_padding(std::int64_t coord, std::int64_t size, Aidge::GridSample_Op::PaddingMode padding_mode) { + if (!in_bound(coord, 0, size)) { + // out of bound. switch padding mode + if (padding_mode == Aidge::GridSample_Op::PaddingMode::Border) { + coord = std::min(std::max(std::int64_t(0), coord), size-std::int64_t(1)); + } else if (padding_mode == Aidge::GridSample_Op::PaddingMode::Reflection) { + const std::int64_t quotient = coord / (size-1); + const std::int64_t remainer = std::abs(coord - quotient*(size-1)); + coord = (quotient % 2 == 0) ? remainer : size - 1 - remainer; + } + } + return coord; +} + +namespace Aidge { +/** + * @brief Forward kernel for 1D GridSample on CPU backend. + * @tparam I Input data type. + * @tparam O Output data type. + * @param params tuple of Attributes from the Operator + * @param inputDims Array of input dimensions. + * @param input_ const input Tensor. + * @param grid_ const grid Tensor. + * @param output_ Output Tensor. + */ +template <class I, class O> +void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op& op, + const std::shared_ptr<Tensor>& in0, + const std::shared_ptr<Tensor>& in1, + const std::shared_ptr<Tensor>& out) +{ + const I* const input = static_cast<const I * const>(in0->getImpl()->rawPtr()); + const I* input_ptr = input; + float* const grid = static_cast<float* const>(in1->getImpl()->rawPtr()); + float* grid_ptr = grid; + O* const output = static_cast<O* const>(out->getImpl()->rawPtr()); + O* output_ptr = output; + + const std::size_t N = in0->dim(0); + const std::size_t C = in0->dim(1); + const std::size_t in_H = in0->dim(2); + const std::size_t grid_H = in1->dim(1); + + const std::size_t in_N_s = in0->stride(0); + const std::size_t in_C_s = in0->stride(1); + const std::size_t in_H_s = in0->stride(2); + const std::size_t grid_N_s = in1->stride(0); + const std::size_t grid_H_s = in1->stride(1); + const std::size_t out_N_s = out->stride(0); + const std::size_t out_C_s = out->stride(1); + const std::size_t out_H_s = out->stride(2); + + float* grid_ptr_N = grid; + const I* input_ptr_N = input; + O* output_ptr_N = output; + for (std::size_t n = 0; n < N; ++n) { + grid_ptr = grid_ptr_N; + for (std::size_t grid_x = 0; grid_x < grid_H; ++grid_x) { + output_ptr = output_ptr_N + grid_x*out_H_s; + /* + * change grid_x coord to match padding_mode + * Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] according to align_corners + * Handle computation of interpolation + * any value outside bounds is considered 0 + * if nearest: + * else if linear: + * else if cubic: + * else : nothing + */ + float x = *grid_ptr; + x = update_normalized_coord_with_padding(x, op.paddingMode()); + x = unnormalize_grid_sample_coord(x, in_H, op.alignCorners()); + if (op.mode() == GridSample_Op::Mode::Nearest) { + const std::int64_t x_rounded = std::nearbyintf(x); + + if (in_bound(x_rounded, 0, in_H)) { + input_ptr = input_ptr_N + x_rounded*in_H_s; + for (std::size_t c = 0; c < C; ++c) { + *output_ptr = *input_ptr; + input_ptr += in_C_s; + output_ptr += out_C_s; + } + } else { + for (std::size_t c = 0; c < C; ++c) { + *output_ptr = O(0); + output_ptr += out_C_s; + } + } + } else if (op.mode() == GridSample_Op::Mode::Linear) { + const std::int64_t x_inf = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_H, op.paddingMode()); + const std::int64_t x_sup = update_unnormalized_coord_with_padding(x_inf + 1, in_H, op.paddingMode()); + + const I* input_ptr_NC = input_ptr_N; + for (std::size_t c = 0; c < C; ++c) { + const I f_inf = in_bound(x_inf, 0, in_H) ? + input_ptr_NC[static_cast<std::size_t>(x_inf)*in_H_s] : I(0); + const I f_sup = in_bound(x_sup, 0, in_H) ? + input_ptr_NC[static_cast<std::size_t>(x_sup)*in_H_s] : I(0); + + *output_ptr = static_cast<O>(static_cast<I>(x - x_inf)*f_inf \ + + static_cast<I>(x_sup - x)*f_sup); + + input_ptr_NC += in_C_s; + output_ptr += out_C_s; + } + } else if (op.mode() == GridSample_Op::Mode::Cubic) { + const std::int64_t x_inf = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_H, op.paddingMode()); + const std::int64_t x_sup = update_unnormalized_coord_with_padding(x_inf + 1, in_H, op.paddingMode()); + const std::int64_t x_inf_inf = update_unnormalized_coord_with_padding(x_inf - 1, in_H, op.paddingMode()); + const std::int64_t x_sup_sup = update_unnormalized_coord_with_padding(x_sup + 1, in_H, op.paddingMode()); + + const I x1 = static_cast<I>(x - static_cast<float>(x_inf)); + const I x2 = x1 * x1; + const I x3 = x1 * x2; + + const I* input_ptr_NC = input_ptr_N; + for (std::size_t c = 0; c < C; ++c) { + const I f_inf_inf = in_bound(x_inf_inf, 0, in_H) ? input_ptr_NC[x_inf_inf*in_H_s] : I(0); + const I f_inf = in_bound(x_inf, 0, in_H) ? input_ptr_NC[x_inf*in_H_s] : I(0); + const I f_sup = in_bound(x_sup, 0, in_H) ? input_ptr_NC[x_sup*in_H_s] : I(0); + const I f_sup_sup = in_bound(x_sup_sup, 0, in_H) ? input_ptr_NC[x_sup_sup*in_H_s] : I(0); + + const I m_inf = (f_sup - f_inf_inf) / I(2); + const I m_sup = (f_sup_sup - f_inf) / I(2); + + *output_ptr = f_inf \ + + x1 * m_inf \ + + x2 * (3 * (f_sup - f_inf) - 2 * m_inf - m_sup) \ + + x3 * (2*(f_inf - f_sup) + m_inf + m_sup); + + input_ptr_NC += in_C_s; + output_ptr += out_C_s; + } + } + + grid_ptr += grid_H_s; + } + + input_ptr_N += in_N_s; + grid_ptr_N += grid_N_s; + output_ptr_N += out_N_s; + } +} + +// Kernels registration to implementation entry point +// only accept 1st input with only 1 spatial feat. (nb dims = 1) +REGISTRAR(GridSampleImpl_cpu, + {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Float16}}}, + {ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<half_float::half, half_float::half>, nullptr}); +REGISTRAR(GridSampleImpl_cpu, + {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Float32}}}, + {ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(GridSampleImpl_cpu, + {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Float64}}}, + {ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(GridSampleImpl_cpu, + {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Int32}}}, + {ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<int32_t, int32_t>, nullptr}); + + +/** + * @brief Forward kernel for 1D GridSample on CPU backend. + * @tparam I Input data type. + * @tparam O Output data type. + * @param params tuple of Attributes from the Operator + * @param inputDims Array of input dimensions. + * @param input_ const input Tensor. + * @param grid_ const grid Tensor. + * @param output_ Output Tensor. + */ +template <class I, class O> +void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op, + const std::shared_ptr<Tensor>& in0, + const std::shared_ptr<Tensor>& in1, + const std::shared_ptr<Tensor>& out) +{ + const I* input = static_cast<const I *>(in0->getImpl()->rawPtr()); + const I* input_ptr = input; + float* const grid = static_cast<float* const>(in0->getImpl()->rawPtr()); + float* grid_ptr = grid; + O* const output = static_cast<O* const>(out->getImpl()->rawPtr()); + + const std::size_t N = in0->dim(0); + const std::size_t C = in0->dim(1); + const std::size_t in_H = in0->dim(2); + const std::size_t in_W = in0->dim(3); + const std::size_t grid_H = in1->dim(1); + const std::size_t grid_W = in1->dim(2); + + const std::size_t in_N_s = in0->stride(0); + const std::size_t in_C_s = in0->stride(1); + const std::size_t in_H_s = in0->stride(2); + const std::size_t in_W_s = in0->stride(3); + const std::size_t grid_N_s = in1->stride(0); + const std::size_t grid_H_s = in1->stride(1); + const std::size_t grid_W_s = in1->stride(2); + const std::size_t grid_Coord_s = in1->stride(3); + const std::size_t out_N_s = out->stride(0); + const std::size_t out_C_s = out->stride(1); + const std::size_t out_H_s = out->stride(2); + const std::size_t out_W_s = out->stride(3); + + + float* grid_ptr_N = grid; + const I* input_ptr_N = input; + O* output_ptr_N = output; + for (std::size_t n = 0; n < N; ++n) { + for (std::size_t grid_y = 0; grid_y < grid_H; ++grid_y) { + for (std::size_t grid_x = 0; grid_x < grid_W; ++grid_x) { + O* output_ptr = output_ptr_N + grid_y*out_H_s + grid_y*out_W_s; + grid_ptr = grid_ptr_N + grid_y*grid_H_s + grid_x*grid_W_s; + /* + * change grid_x coord to match padding_mode + * Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] according to align_corners + * Handle computation of interpolation + * any value outside bounds is considered 0 + * if nearest: + * else if linear: + * else if cubic: + * else : nothing + */ + float x = *grid_ptr; + float y = grid_ptr[grid_Coord_s]; + x = update_normalized_coord_with_padding(x, op.paddingMode()); + x = unnormalize_grid_sample_coord(x, in_W, op.alignCorners()); + y = update_normalized_coord_with_padding(y, op.paddingMode()); + y = unnormalize_grid_sample_coord(y, in_H, op.alignCorners()); + if (op.mode() == GridSample_Op::Mode::Nearest) { + const std::int64_t x_rounded = std::nearbyintf(x); + const std::int64_t y_rounded = std::nearbyintf(y); + + if (in_bound(x_rounded, 0, in_W) && in_bound(y_rounded, 0, in_H)) { + input_ptr = input_ptr_N + y_rounded*in_H_s + x_rounded*in_W_s; + for (std::size_t c = 0; c < C; ++c) { + *output_ptr = *input_ptr; + input_ptr += in_C_s; + output_ptr += out_C_s; + } + } else { + for (std::size_t c = 0; c < C; ++c) { + *output_ptr = O(0); + output_ptr += out_C_s; + } + } + } else if (op.mode() == GridSample_Op::Mode::Linear) { + const std::int64_t x_r = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_W, op.paddingMode()); // right + const std::int64_t x_l = update_unnormalized_coord_with_padding(x_r + 1, in_W, op.paddingMode()); // left + + const std::int64_t y_t = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(y)), in_H, op.paddingMode()); // top + const std::int64_t y_b = update_unnormalized_coord_with_padding(y_t + 1, in_H, op.paddingMode()); // bottom + + const I* input_ptr_NC = input_ptr_N; + for (std::size_t c = 0; c < C; ++c) { + + const I f_tr = (in_bound(x_r, 0, in_W) && in_bound(y_t, 0, in_H)) ? + input_ptr_NC[static_cast<std::size_t>(y_t)*in_H_s + + static_cast<std::size_t>(x_r)*in_W_s] + : I(0); + const I f_tl = (in_bound(x_l, 0, in_W) && in_bound(y_t, 0, in_H)) ? + input_ptr_NC[static_cast<std::size_t>(y_t)*in_H_s + + static_cast<std::size_t>(x_l)*in_W_s] + : I(0); + const I f_br = (in_bound(x_r, 0, in_W) && in_bound(y_b, 0, in_H)) ? + input_ptr_NC[static_cast<std::size_t>(y_b)*in_H_s + + static_cast<std::size_t>(x_r)*in_W_s] + : I(0); + const I f_bl = (in_bound(x_l, 0, in_W) && in_bound(y_b, 0, in_H)) ? + input_ptr_NC[static_cast<std::size_t>(y_b)*in_H_s + + static_cast<std::size_t>(x_l)*in_W_s] + : I(0); + + // compute weighted sum of the 4 corners + const I w_tr = static_cast<I>((y - static_cast<float>(y_t))*(static_cast<float>(x_r) - x)); + const I w_tl = static_cast<I>((y - static_cast<float>(y_t))*(x - static_cast<float>(x_l))); + const I w_br = static_cast<I>((static_cast<float>(y_b) - y)*(static_cast<float>(x_r) - x)); + const I w_bl = static_cast<I>((static_cast<float>(y_b) - y)*(x - static_cast<float>(x_l))); + + *output_ptr = static_cast<O>(w_tr*f_tr + w_tl*f_tl + w_br*f_br + w_bl*f_bl); + + input_ptr_NC += in_C_s; + output_ptr += out_C_s; + } + } else if (op.mode() == GridSample_Op::Mode::Cubic) { + /* + * .. .. .. .. .. .. + * .. 00 01 02 03 .. + * .. 10 11 12 13 .. + * .. 20 21 22 23 .. + * .. 30 31 32 33 .. + * .. .. .. .. .. .. + */ + const std::int64_t x_1 = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_W, op.paddingMode()); + const std::int64_t x_0 = update_unnormalized_coord_with_padding(x_1 - 1, in_W, op.paddingMode()); + const std::int64_t x_2 = update_unnormalized_coord_with_padding(x_1 + 1, in_W, op.paddingMode()); + const std::int64_t x_3 = update_unnormalized_coord_with_padding(x_1 + 2, in_W, op.paddingMode()); + + const std::int64_t y_1 = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(y)), in_H, op.paddingMode()); + const std::int64_t y_0 = update_unnormalized_coord_with_padding(y_1 - 1, in_H, op.paddingMode()); + const std::int64_t y_2 = update_unnormalized_coord_with_padding(y_1 + 1, in_H, op.paddingMode()); + const std::int64_t y_3 = update_unnormalized_coord_with_padding(y_1 + 2, in_H, op.paddingMode()); + + const I* input_ptr_NC = input_ptr_N; + + for (std::size_t c = 0; c < C; ++c) { + const I f_00 = in_bound(x_0, 0, in_W) && in_bound(y_0, 0, in_H) ? + input_ptr_NC[x_0*in_W_s + y_0*in_H_s] : I(0); + const I f_01 = in_bound(x_0, 0, in_W) && in_bound(y_1, 0, in_H) ? + input_ptr_NC[x_0*in_W_s + y_1*in_H_s] : I(0); + const I f_02 = in_bound(x_0, 0, in_W) && in_bound(y_2, 0, in_H) ? + input_ptr_NC[x_0*in_W_s + y_2*in_H_s] : I(0); + const I f_03 = in_bound(x_0, 0, in_W) && in_bound(y_3, 0, in_H) ? + input_ptr_NC[x_0*in_W_s + y_3*in_H_s] : I(0); + const I f_10 = in_bound(x_1, 0, in_W) && in_bound(y_0, 0, in_H) ? + input_ptr_NC[x_1*in_W_s + y_0*in_H_s] : I(0); + const I f_20 = in_bound(x_2, 0, in_W) && in_bound(y_0, 0, in_H) ? + input_ptr_NC[x_2*in_W_s + y_0*in_H_s] : I(0); + const I f_30 = in_bound(x_3, 0, in_W) && in_bound(y_0, 0, in_H) ? + input_ptr_NC[x_3*in_W_s + y_0*in_H_s] : I(0); + const I f_11 = in_bound(x_1, 0, in_W) && in_bound(y_1, 0, in_H) ? + input_ptr_NC[x_1*in_W_s + y_1*in_H_s] : I(0); + const I f_12 = in_bound(x_1, 0, in_W) && in_bound(y_2, 0, in_H) ? + input_ptr_NC[x_1*in_W_s + y_2*in_H_s] : I(0); + const I f_13 = in_bound(x_1, 0, in_W) && in_bound(y_3, 0, in_H) ? + input_ptr_NC[x_1*in_W_s + y_3*in_H_s] : I(0); + const I f_21 = in_bound(x_2, 0, in_W) && in_bound(y_1, 0, in_H) ? + input_ptr_NC[x_2*in_W_s + y_1*in_H_s] : I(0); + const I f_22 = in_bound(x_2, 0, in_W) && in_bound(y_2, 0, in_H) ? + input_ptr_NC[x_2*in_W_s + y_2*in_H_s] : I(0); + const I f_23 = in_bound(x_2, 0, in_W) && in_bound(y_3, 0, in_H) ? + input_ptr_NC[x_2*in_W_s + y_3*in_H_s] : I(0); + const I f_31 = in_bound(x_3, 0, in_W) && in_bound(y_1, 0, in_H) ? + input_ptr_NC[x_3*in_W_s + y_1*in_H_s] : I(0); + const I f_32 = in_bound(x_3, 0, in_W) && in_bound(y_2, 0, in_H) ? + input_ptr_NC[x_3*in_W_s + y_2*in_H_s] : I(0); + const I f_33 = in_bound(x_3, 0, in_W) && in_bound(y_3, 0, in_H) ? + input_ptr_NC[x_3*in_W_s + y_3*in_H_s] : I(0); + + const I mx_11 = (f_21 - f_01) / I(2); + const I mx_12 = (f_22 - f_02) / I(2); + const I mx_21 = (f_31 - f_11) / I(2); + const I mx_22 = (f_32 - f_12) / I(2); + + const I my_11 = (f_12 - f_10) / I(2); + const I my_12 = (f_13 - f_11) / I(2); + const I my_21 = (f_22 - f_20) / I(2); + const I my_22 = (f_23 - f_21) / I(2); + + const I mxy_11 = (f_22 - f_20 - f_02 - + f_00) / I(4); + const I mxy_12 = (f_23 - f_21 - f_03 - + f_01) / I(4); + const I mxy_21 = (f_32 - f_30 - f_12 - + f_10) / I(4); + const I mxy_22 = (f_33 - f_31 - f_13 - + f_11) / I(4); + + const I a_00 = f_11; + const I a_10 = mx_11; + const I a_20 = I(3)*(f_21 - f_11) - I(2)*mx_11 - mx_21; + const I a_30 = I(2)*(f_11 - f_21) + mx_11 + mx_21; + const I a_01 = my_11; + const I a_11 = mxy_11; + const I a_21 = I(3)*(my_21 - my_11) - I(2)*mxy_11 - mxy_21; + const I a_31 = I(2)*(my_11 - my_21) + mxy_11 + mxy_21; + const I a_02 = I(3)*(f_12 - f_11) - I(2)*my_11 - my_12; + const I a_12 = I(3)*(mx_12 - mx_11) - I(2)*mxy_11 - mxy_12; + const I a_22 = I(9)*(f_11 + f_22 - f_21 - f_12) + I(3)*(I(2)*(mx_11 - mx_12 + my_11 - my_21) + mx_21 - mx_22 + my_12 - my_22) + mxy_22 + I(2)*(mxy_12 + mxy_21 + I(2)*mxy_11); + const I a_32 = - mxy_12 - mxy_22 + I(2)*(my_22 - my_12 - mxy_11 - mxy_21 + I(2)*(my_21 - my_11) + I(3)*(f_21 + f_12 - f_11 - f_22)) + I(3)*(mx_12 + mx_22 - mx_11 - mx_21); + const I a_03 = I(2)*(f_11 - f_12) + my_11 + my_12; + const I a_13 = I(2)*(mx_11 - mx_12) + mxy_11 + mxy_12; + const I a_23 = - mxy_21 - mxy_22 + I(2)*(-mx_21 + mx_22 - mxy_11 - mxy_12 + I(2)*(mx_12 - mx_11) + I(3)*(f_12 + f_21 - f_11 - f_22)) + I(3)*(my_21 + my_22 - my_11 - my_12); + const I a_33 = mxy_11 + mxy_21 + mxy_12 + mxy_22 + I(2)*(mx_11 + mx_21 - mx_12 - mx_22 + my_11 - my_21 + my_12 - my_22 + I(2)*(f_11 - f_21 - f_12 + f_22)); + + const I x2 = static_cast<I>(x*x); + const I x3 = static_cast<I>(x*x*x); + const I y2 = static_cast<I>(y*y); + const I y3 = static_cast<I>(y*y*y); + + *output_ptr = static_cast<O>( \ + a_00 + a_10*x + a_20*x2 + a_30*x3 \ + + a_01*y + a_11*x*y + a_21*x2*y + a_31*x3*y \ + + a_02*y2 + a_12*x*y2 + a_22*x2*y2 + a_32*x3*y2 \ + + a_03*y3 + a_13*x*y3 + a_23*x2*y3 + a_33*x3*y3); + + input_ptr_NC += in_C_s; + output_ptr += out_C_s; + } + } + } + } + + input_ptr_N += in_N_s; + grid_ptr_N += grid_N_s; + output_ptr_N += out_N_s; + } +} + +// Kernels registration to implementation entry point +// only accept 1st input with only 2 spatial feat. (nb dims = 2) +REGISTRAR(GridSampleImpl_cpu, + {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Float16}}}, + {ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<half_float::half, half_float::half>, nullptr}); +REGISTRAR(GridSampleImpl_cpu, + {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Float32}}}, + {ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(GridSampleImpl_cpu, + {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Float64}}}, + {ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(GridSampleImpl_cpu, + {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Int32}}}, + {ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp index c9ad909eee631189a81067eda076c0b8cbb13377..1e8c1a14435f53ad7a63b327944e0bb8c70c8661 100644 --- a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp +++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp @@ -16,47 +16,26 @@ #include <tuple> #include <vector> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/LeakyReLU.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { -// compute kernel registry for forward and backward -class LeakyReLUImplForward_cpu - : public Registrable<LeakyReLUImplForward_cpu, - std::tuple<DataType, DataType>, - void(const float, - std::size_t, - const void*, - void*)> {}; -class LeakyReLUImplBackward_cpu - : public Registrable<LeakyReLUImplBackward_cpu, - std::tuple<DataType, DataType>, - void(const float, - std::size_t, - const void*, - void*)> {}; - -class LeakyReLUImpl_cpu : public OperatorImpl { -public: - LeakyReLUImpl_cpu(const LeakyReLU_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<LeakyReLUImpl_cpu> create(const LeakyReLU_Op& op) { - return std::make_unique<LeakyReLUImpl_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - - void forward() override final; - - void backward() override final; -}; - -namespace { -static Registrar<LeakyReLU_Op> registrarLeakyReLUImpl_cpu("cpu", Aidge::LeakyReLUImpl_cpu::create); -} +// Operator implementation entry point for the backend +using LeakyReLUImpl_cpu = OperatorImpl_cpu<LeakyReLU_Op, + void(const float, + std::size_t, + const void*, + void*), + void(const float, + std::size_t, + const void*, + void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(LeakyReLU_Op, "cpu", Aidge::LeakyReLUImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp deleted file mode 100644 index e308d940890101ad396c7ed20541bbc4f8b035cf..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp +++ /dev/null @@ -1,45 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_BACKWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_BACKWARD_KERNEL_H_ - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" - -namespace Aidge { -template <class I, class O> -void LeakyReLUImpl_cpu_backward_kernel(const float negativeSlope_, - std::size_t inputLenght, - const void* input_, - void* output_) { - - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); - const I negativeSlope = static_cast<const I>(negativeSlope_); - - for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = (input[i] > 0) ? input[i] : negativeSlope*input[i]; - } -} - -namespace { -static Registrar<LeakyReLUImplBackward_cpu> registrarLeakyReLUImplBackward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::LeakyReLUImpl_cpu_backward_kernel<float, float>); -static Registrar<LeakyReLUImplBackward_cpu> registrarLeakyReLUImplBackward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::LeakyReLUImpl_cpu_backward_kernel<int, int>); -static Registrar<LeakyReLUImplBackward_cpu> registrarLeakyReLUImplBackward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::LeakyReLUImpl_cpu_backward_kernel<double, double>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_BACKWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp deleted file mode 100644 index 450d0bf4ace4879f90e0104e14b5bf61366e96c2..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp +++ /dev/null @@ -1,45 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_ - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" - -namespace Aidge { -template <class I, class O> -void LeakyReLUImpl_cpu_forward_kernel(const float negativeSlope_, - std::size_t inputLenght, - const void* input_, - void* output_) { - - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); - const I negativeSlope = static_cast<const I>(negativeSlope_); - - for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = (input[i] >= 0) ? input[i] : input[i] * negativeSlope; - } -} - -namespace { -static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::LeakyReLUImpl_cpu_forward_kernel<float, float>); -static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::LeakyReLUImpl_cpu_forward_kernel<int, int>); -static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::LeakyReLUImpl_cpu_forward_kernel<double, double>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bc856f703aee8ba422887d43cb96db2132fc4603 --- /dev/null +++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp @@ -0,0 +1,62 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_KERNELS_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" + +namespace Aidge { +template <class I, class O> +void LeakyReLUImpl_cpu_forward_kernel(const float negativeSlope_, + std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + const I negativeSlope = static_cast<const I>(negativeSlope_); + + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = (input[i] >= 0) ? input[i] : input[i] * negativeSlope; + } +} + +template <class I, class O> +void LeakyReLUImpl_cpu_backward_kernel(const float negativeSlope_, + std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + const I negativeSlope = static_cast<const I>(negativeSlope_); + + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = (input[i] > 0) ? input[i] : negativeSlope*input[i]; + } +} + +// Kernels registration to implementation entry point +REGISTRAR(LeakyReLUImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::LeakyReLUImpl_cpu_forward_kernel<float, float>, Aidge::LeakyReLUImpl_cpu_backward_kernel<float, float>}); +REGISTRAR(LeakyReLUImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::LeakyReLUImpl_cpu_forward_kernel<double, double>, Aidge::LeakyReLUImpl_cpu_backward_kernel<double, double>}); +REGISTRAR(LeakyReLUImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::LeakyReLUImpl_cpu_forward_kernel<int32_t, int32_t>, Aidge::LeakyReLUImpl_cpu_backward_kernel<int32_t, int32_t>}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/LnImpl.hpp b/include/aidge/backend/cpu/operator/LnImpl.hpp index faa03855a4f881f2a644ebc4023871b7acd6275c..d48a7ae437d9ed1c7769d3628691993c1e9dcb90 100755 --- a/include/aidge/backend/cpu/operator/LnImpl.hpp +++ b/include/aidge/backend/cpu/operator/LnImpl.hpp @@ -12,7 +12,7 @@ #ifndef AIDGE_CPU_OPERATOR_LNIMPL_H_ #define AIDGE_CPU_OPERATOR_LNIMPL_H_ -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Ln.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" @@ -21,34 +21,13 @@ #include <vector> namespace Aidge { -// class Ln_Op; +// Operator implementation entry point for the backend +using LnImpl_cpu = OperatorImpl_cpu<Ln_Op, + void(const std::size_t, const void*, void*), + void(const std::size_t, const void*, const void*, void*)>; -// compute kernel registry for forward and backward -class LnImplForward_cpu - : public Registrable<LnImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { -}; -class LnImplBackward_cpu - : public Registrable<LnImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const void*, const void*, void*)> { -}; - -class LnImpl_cpu : public OperatorImpl { -public: - LnImpl_cpu(const Ln_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<LnImpl_cpu> create(const Ln_Op& op) { - return std::make_unique<LnImpl_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - - void forward() override final; - - void backward() override final; -}; - -namespace { -static Registrar<Ln_Op> registrarLnImpl_cpu("cpu", Aidge::LnImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(Ln_Op, "cpu", Aidge::LnImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_LNIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/LnImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/LnImpl_forward_kernels.hpp deleted file mode 100755 index ebb975512a6e7c0f7225c305372f0ec6e7060786..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/LnImpl_forward_kernels.hpp +++ /dev/null @@ -1,47 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_LNIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_LNIMPL_FORWARD_KERNEL_H_ - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/backend/cpu/operator/LnImpl.hpp" - -namespace Aidge { -template <class I, class O> -void LnImpl_cpu_forward_kernel(std::size_t inputLenght, - const void* input_, - void* output_) { - - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); - const float eps = 1.0e-20f; - -//#pragma omp parallel for if (inputLenght > 1024) - for (std::size_t i = 0; i < inputLenght; ++i) { - if (input[i] > I(eps)) { - output[i] = std::log(input[i]); - } else { - output[i] = std::log(I(eps)); - } - } -} - -namespace { -static Registrar<LnImplForward_cpu> registrarLnImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::LnImpl_cpu_forward_kernel<float, float>); -static Registrar<LnImplForward_cpu> registrarLnImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::LnImpl_cpu_forward_kernel<double, double>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_LNIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/LnImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp similarity index 50% rename from include/aidge/backend/cpu/operator/LnImpl_backward_kernels.hpp rename to include/aidge/backend/cpu/operator/LnImpl_kernels.hpp index 5fb82e35f8855d9d6e2eb85e9ab380c9f1fc9b90..b30b05bb806de08d4e70c67e66979fb3138980df 100755 --- a/include/aidge/backend/cpu/operator/LnImpl_backward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp @@ -1,50 +1,67 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_LNIMPL_BACKWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_LNIMPL_BACKWARD_KERNEL_H_ - -#include <cstddef> // std::size_t - -#include "aidge/backend/cpu/operator/LnImpl.hpp" -#include "aidge/utils/Registrar.hpp" - -namespace Aidge { -template <class I, class GI, class GO> -void LnImpl_cpu_backward_kernel(const std::size_t inputLenght, - const void* input_, const void* grad_output_, - void* grad_input_) { - - const I* input = static_cast<const I*>(input_); - const GO* grad_output = static_cast<const GO*>(grad_output_); - GI* grad_input = static_cast<GI*>(grad_input_); - const float eps = 1.0e-20f; - - for (std::size_t i = 0; i < inputLenght; ++i) { - if (input[i] > I(eps)) { - grad_input[i] = grad_output[i] / input[i]; - } else { - grad_input[i] = GI(0); - } - } -} - -namespace { -static Registrar<LnImplBackward_cpu> registrarLnImplBackward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::LnImpl_cpu_backward_kernel<float, float, float>); -static Registrar<LnImplBackward_cpu> registrarLnImplBackward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::LnImpl_cpu_backward_kernel<double, double, double>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_LNIMPL_BACKWARD_KERNEL_H_ */ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_LNIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_LNIMPL_KERNELS_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/LnImpl.hpp" + +namespace Aidge { +template <class I, class O> +void LnImpl_cpu_forward_kernel(std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + const float eps = 1.0e-20f; + +//#pragma omp parallel for if (inputLenght > 1024) + for (std::size_t i = 0; i < inputLenght; ++i) { + if (input[i] > I(eps)) { + output[i] = std::log(input[i]); + } else { + output[i] = std::log(I(eps)); + } + } +} + +template <class I, class GI, class GO> +void LnImpl_cpu_backward_kernel(const std::size_t inputLenght, + const void* input_, const void* grad_output_, + void* grad_input_) { + + const I* input = static_cast<const I*>(input_); + const GO* grad_output = static_cast<const GO*>(grad_output_); + GI* grad_input = static_cast<GI*>(grad_input_); + const float eps = 1.0e-20f; + + for (std::size_t i = 0; i < inputLenght; ++i) { + if (input[i] > I(eps)) { + grad_input[i] = grad_output[i] / input[i]; + } else { + grad_input[i] = GI(0); + } + } +} + +// Kernels registration to implementation entry point +REGISTRAR(LnImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::LnImpl_cpu_forward_kernel<float, float>, Aidge::LnImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(LnImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::LnImpl_cpu_forward_kernel<double, double>, Aidge::LnImpl_cpu_backward_kernel<double, double, double>}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_LNIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/MatMulImpl.hpp b/include/aidge/backend/cpu/operator/MatMulImpl.hpp index e4b76d64baadbcb1baa7d24180c4bb13ed47215b..c07aa5f8ffa62f5fffe3ca02638cc3c66cdaeedb 100644 --- a/include/aidge/backend/cpu/operator/MatMulImpl.hpp +++ b/include/aidge/backend/cpu/operator/MatMulImpl.hpp @@ -16,37 +16,20 @@ #include <memory> #include <vector> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/MatMul.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { +// Operator implementation entry point for the backend +using MatMulImpl_cpu = OperatorImpl_cpu<MatMul_Op, + void(const std::size_t, const std::size_t, const std::size_t, + const void *, const void *, void *)>; -class MatMulImplForward_cpu - : public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType>, - void(const std::size_t, const std::size_t, const std::size_t, - const void *, const void *, void *)> {}; -class MatMulImplBackward_cpu - : public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType>, - void(const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, - const void *, const void *, void *)> {}; - -class MatMulImpl_cpu : public OperatorImpl { -public: - MatMulImpl_cpu(const MatMul_Op &op): OperatorImpl(op, "cpu") {} - - static std::unique_ptr<MatMulImpl_cpu> create(const MatMul_Op &op) { - return std::make_unique<MatMulImpl_cpu>(op); - } - - void forward() override; -}; - -namespace { -static Registrar<MatMul_Op> registrarMatMulImpl_cpu("cpu", Aidge::MatMulImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(MatMul_Op, "cpu", Aidge::MatMulImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp deleted file mode 100644 index 5045580fa599aac64f2c1414bfdf2b87ea57e313..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp +++ /dev/null @@ -1,52 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ - -#include "aidge/backend/cpu/operator/MatMulImpl.hpp" - -namespace Aidge { - -template <class I, class O> -void MatMulImpl_cpu_forward_kernel(const std::size_t n, const std::size_t k, const std::size_t m, - const void* input1_, const void* input2_, void* output_) { - // FIXME: missing MatMul parameters as arguments - const I* input1 = static_cast<const I*>(input1_); - const I* input2 = static_cast<const I*>(input2_); - O* output = static_cast<O*>(output_); - - for (std::size_t i = 0; i < n; ++i) { - for (std::size_t j = 0; j < m; ++j) { - O sum = O(0); - for (std::size_t l = 0; l < k; ++l) { - sum += static_cast<O>(input1[i*k + l] * input2[l*m + j]); - } - output[i*m + j] = sum; - } - } -} - -namespace { -static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, - Aidge::MatMulImpl_cpu_forward_kernel<float, float>); -static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, - Aidge::MatMulImpl_cpu_forward_kernel<int, int>); -static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, - Aidge::MatMulImpl_cpu_forward_kernel<double, double>); -} // namespace - -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/MatMulImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MatMulImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5fc13baf49b1d0606eb4af5a54eec83fa5dce22a --- /dev/null +++ b/include/aidge/backend/cpu/operator/MatMulImpl_kernels.hpp @@ -0,0 +1,50 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_MATMULIMPL_KERNELS_H_ + +#include "aidge/backend/cpu/operator/MatMulImpl.hpp" + +namespace Aidge { + +template <class I, class O> +void MatMulImpl_cpu_forward_kernel(const std::size_t n, const std::size_t k, const std::size_t m, + const void* input1_, const void* input2_, void* __restrict output_) { + // FIXME: missing MatMul parameters as arguments + const I* input1 = static_cast<const I*>(input1_); + const I* input2 = static_cast<const I*>(input2_); + O* __restrict output = static_cast<O* __restrict>(output_); + + std::memset(output, O(0), n * m * sizeof(O)); + + for (std::size_t i = 0; i < n; ++i) { + for (std::size_t l = 0; l < k; ++l) { + for (std::size_t j = 0; j < m; ++j) { + output[i*m + j] += static_cast<O>(input1[i*k + l] * input2[l*m + j]); + } + } + } +} + +// Kernels registration to implementation entry point +REGISTRAR(MatMulImpl_cpu, + {DataType::Float32}, + {ProdConso::defaultModel, Aidge::MatMulImpl_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(MatMulImpl_cpu, + {DataType::Float64}, + {ProdConso::defaultModel, Aidge::MatMulImpl_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(MatMulImpl_cpu, + {DataType::Int32}, + {ProdConso::defaultModel, Aidge::MatMulImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp index 4dd30e1fb939837f6861313eda04d7d05f3c8110..68cc3621514de97d9837e10bcf90218abe559aaa 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp @@ -17,51 +17,25 @@ #include <tuple> #include <vector> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/MaxPooling.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { -// class MaxPooling_Op; - -// compute kernel registry for forward and backward -class MaxPoolingImpl2DForward_cpu - : public Registrable<MaxPoolingImpl2DForward_cpu, - std::tuple<DataType, DataType>, - void(const std::array<DimSize_t, 2>&, - const std::array<DimSize_t, 2>&, - const bool, - const std::array<DimSize_t, 4> &, - const void *, - void *)> {}; -class MaxPoolingImpl2DBackward_cpu - : public Registrable<MaxPoolingImpl2DBackward_cpu, - std::tuple<DataType, DataType>, - void(const std::array<DimSize_t, 2>&, +// Operator implementation entry point for the backend +using MaxPooling2D_Op = MaxPooling_Op<2>; +using MaxPoolingImpl2D_cpu = OperatorImpl_cpu<MaxPooling_Op<2>, + void(const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&, const bool, const std::array<DimSize_t, 4> &, const void *, - void *)> {}; - -class MaxPoolingImpl2D_cpu : public OperatorImpl { -public: - MaxPoolingImpl2D_cpu(const MaxPooling_Op<2> &op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<MaxPoolingImpl2D_cpu> create(const MaxPooling_Op<2> &op) { - return std::make_unique<MaxPoolingImpl2D_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; -}; + void *)>; -namespace { -// add cpu backend to MaxPooling_Op<2> implementation registry -static Registrar<MaxPooling_Op<2>> registrarMaxPoolingImpl2D_cpu("cpu", Aidge::MaxPoolingImpl2D_cpu::create); -} // namespace +// Implementation entry point registration to Operator +REGISTRAR(MaxPooling2D_Op, "cpu", Aidge::MaxPoolingImpl2D_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp similarity index 91% rename from include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp index 79a7bd154f4d4e19a71d719597992466c37c6a9f..7b6f04f141eb701849a8d436561bcf9e37471cfa 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_KERNELS_H_ #include <array> #include <cmath> @@ -199,17 +199,16 @@ void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha, */ -namespace { -static Registrar<MaxPoolingImpl2DForward_cpu> registrarMaxPoolingImpl2DForward_cpu_Float32( - std::tuple<DataType, DataType>({DataType::Float32, DataType::Float32}), - Aidge::MaxPoolingImpl2D_cpu_forward_kernel<float, float>); -static Registrar<MaxPoolingImpl2DForward_cpu> registrarMaxPoolingImpl2DForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, - Aidge::MaxPoolingImpl2D_cpu_forward_kernel<int, int>); -static Registrar<MaxPoolingImpl2DForward_cpu> registrarMaxPoolingImpl2DForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, - Aidge::MaxPoolingImpl2D_cpu_forward_kernel<double, double>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(MaxPoolingImpl2D_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(MaxPoolingImpl2D_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(MaxPoolingImpl2D_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/MulImpl.hpp b/include/aidge/backend/cpu/operator/MulImpl.hpp index 2d42194c417bd7d57c00f4325a4585cf59d95b24..05fceba17471229d83d9f8738614b2e747121b49 100644 --- a/include/aidge/backend/cpu/operator/MulImpl.hpp +++ b/include/aidge/backend/cpu/operator/MulImpl.hpp @@ -12,7 +12,7 @@ #ifndef AIDGE_CPU_OPERATOR_MULIMPL_H_ #define AIDGE_CPU_OPERATOR_MULIMPL_H_ -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Mul.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" @@ -21,31 +21,27 @@ #include <vector> namespace Aidge { -// class Mul_Op; - -// compute kernel registry for forward and backward -class MulImplForward_cpu - : public Registrable<MulImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)> { -}; -class MulImplBackward_cpu - : public Registrable<MulImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)> { -}; - -class MulImpl_cpu : public OperatorImpl { -public: - MulImpl_cpu(const Mul_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<MulImpl_cpu> create(const Mul_Op& op) { - return std::make_unique<MulImpl_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; -}; - -namespace { -static Registrar<Mul_Op> registrarMulImpl_cpu("cpu", Aidge::MulImpl_cpu::create); -} +// Operator implementation entry point for the backend +using MulImpl_cpu = OperatorImpl_cpu<Mul_Op, + void(const std::vector<std::size_t>&, + const std::vector<std::size_t>&, + const std::vector<std::size_t>&, + const void*, + const void*, + void*), + void(const std::size_t, + const std::size_t, + const std::size_t, + const std::vector<std::size_t>, + const std::vector<std::size_t>, + const void*, + const void*, + const void*, + void*, + void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(Mul_Op, "cpu", Aidge::MulImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_MULIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp deleted file mode 100644 index c44199ba4797682362f4a7cb223435d6d1585443..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp +++ /dev/null @@ -1,67 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_MULIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_MULIMPL_FORWARD_KERNEL_H_ - -#include "aidge/utils/Registrar.hpp" - -#include <cstdint> // std::int32_t, std::int64_t - -#include "aidge/backend/cpu/data/Broadcasting.hpp" -#include "aidge/backend/cpu/operator/MulImpl.hpp" - -namespace Aidge { -template <class I1, class I2, class O> -void MulImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, - const std::vector<std::size_t>& input2Dims, - const std::vector<std::size_t>& outputDims, - const void* input1_, - const void* input2_, - void* output_) { - - const I1* input_1 = static_cast<const I1*>(input1_); - const I2* input_2 = static_cast<const I2*>(input2_); - O* output = static_cast<O*>(output_); - - size_t totalElements = 1; - for (size_t dimSize : outputDims) { - totalElements *= dimSize; - } - - for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) - { - std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); - - std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); - std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); - - output[oIndex] = input_1[idx1] * input_2[idx2]; - } -} - -namespace { -static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::MulImpl_cpu_forward_kernel<float, float, float>); -static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::MulImpl_cpu_forward_kernel<double, double, double>); -static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::MulImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>); -static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Int64( - {DataType::Int64, DataType::Int64, DataType::Int64}, - Aidge::MulImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_MULIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c015b8f0182608fecd3da94220e9411decfd186c --- /dev/null +++ b/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp @@ -0,0 +1,126 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_MULIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_MULIMPL_KERNELS_H_ + +#include "aidge/utils/Registrar.hpp" + +#include <cstdint> // std::int32_t, std::int64_t + +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/operator/MulImpl.hpp" + +namespace Aidge { +template <class I1, class I2, class O> +void MulImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, + const std::vector<std::size_t>& input2Dims, + const std::vector<std::size_t>& outputDims, + const void* input1_, + const void* input2_, + void* output_) { + + const I1* input_1 = static_cast<const I1*>(input1_); + const I2* input_2 = static_cast<const I2*>(input2_); + O* output = static_cast<O*>(output_); + + size_t totalElements = 1; + for (size_t dimSize : outputDims) { + totalElements *= dimSize; + } + + for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) + { + std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); + + std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); + std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); + + output[oIndex] = input_1[idx1] * input_2[idx2]; + } +} + +template <class I1, class I2, class O> +void MulImpl_cpu_backward_kernel(const std::size_t input0Length, + const std::size_t input1Length, + const std::size_t grad0Length, + const std::vector<std::size_t> input0Dims, + const std::vector<std::size_t> input1Dims, + const void* input0_, + const void* input1_, + const void* grad_output_, + void* gradientInput0, + void* gradientInput1) +{ + const auto* input0 = static_cast<const I1*>(input0_); + const auto* input1 = static_cast<const I1*>(input1_); + const auto* grad_output = static_cast<const O*>(grad_output_); + auto* grad_input_0 = static_cast<I1*>(gradientInput0); + auto* grad_input_1 = static_cast<I2*>(gradientInput1); + + + if(input0Dims.size() >= input1Dims.size()) + { + AIDGE_ASSERT(input0Length == grad0Length, "Incorrect dimensions between Mul input and output tensors"); + + for(auto i = 0U; i < input0Length; ++i) + { + const auto indices = getMultiDimIndices(input1Dims, i); + const auto flattenedIndex = getFlattenedIndex(input1Dims, indices); + + grad_input_0[i] = input1[flattenedIndex] * grad_output[i]; + } + + for(std::size_t i = 0 ; i < grad0Length; ++i) + { + const auto indices = getMultiDimIndices(input1Dims, i); + const auto flattenedIndex = getFlattenedIndex(input1Dims, indices); + + grad_input_1[flattenedIndex] += input0[i] * grad_output[i]; + } + + } else { + AIDGE_ASSERT(input1Length == grad0Length, "Incorrect dimensions between Mul input and output tensors"); + + for(auto i = 0U; i < input1Length; ++i) + { + const auto indices = getMultiDimIndices(input0Dims, i); + const auto flattenedIndex = getFlattenedIndex(input0Dims, indices); + + grad_input_1[i] = input0[flattenedIndex] * grad_output[i]; + } + + for(std::size_t i = 0 ; i < grad0Length; ++i) + { + const auto indices = getMultiDimIndices(input0Dims, i); + const auto flattenedIndex = getFlattenedIndex(input0Dims, indices); + + grad_input_0[flattenedIndex] += input1[i] * grad_output[i]; + } + } +} + +// Kernels registration to implementation entry point +REGISTRAR(MulImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<float, float, float>, Aidge::MulImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(MulImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<double, double, double>, Aidge::MulImpl_cpu_backward_kernel<double, double, double>}); +REGISTRAR(MulImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, Aidge::MulImpl_cpu_backward_kernel<std::int32_t, std::int32_t, std::int32_t>}); +REGISTRAR(MulImpl_cpu, + {DataType::Int64}, + {ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>, Aidge::MulImpl_cpu_backward_kernel<std::int64_t, std::int64_t, std::int64_t>}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_MULIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/OperatorImpl.hpp b/include/aidge/backend/cpu/operator/OperatorImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..abf94ab9069a07e8f87819cb29c027b1adbfd9c6 --- /dev/null +++ b/include/aidge/backend/cpu/operator/OperatorImpl.hpp @@ -0,0 +1,50 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_IMPL_H_ +#define AIDGE_CPU_OPERATOR_IMPL_H_ + +#include <cstddef> // std::size_t +#include <memory> +#include <tuple> // std::tuple +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +template <class Op, class FwdFunc, class BwdFunc = void()> +class OperatorImpl_cpu : public OperatorImpl, + public Registrable<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>, ImplSpec, Impl<FwdFunc, BwdFunc>> +{ +public: + OperatorImpl_cpu(const Op& op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>> create(const Op& op) { + return std::make_unique<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>>(op); + } + + virtual std::shared_ptr<ProdConso> getProdConso() const override { + const auto impl = Registrar<OperatorImpl_cpu>::create(getBestMatch(getRequiredSpec())); + return impl.prodConso(mOp); + } + + virtual std::set<ImplSpec> getAvailableImplSpecs() const override { + return Registrar<OperatorImpl_cpu>::getKeys(); + } + + void forward() override; + void backward() override; +}; +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_IMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/PadImpl.hpp b/include/aidge/backend/cpu/operator/PadImpl.hpp index c6e41c29fd203fdd80b2acb9ad0dfcac91a0f66c..bc0bd8cad3b630b89f728d78b59652f31bbcf410 100644 --- a/include/aidge/backend/cpu/operator/PadImpl.hpp +++ b/include/aidge/backend/cpu/operator/PadImpl.hpp @@ -17,79 +17,46 @@ #include <tuple> #include <vector> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Pad.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { -// class Pad_Op; -// compute kernel registry for forward and backward -class PadImpl1DForward_cpu - : public Registrable<PadImpl1DForward_cpu, - std::tuple<DataType, DataType>, - void(const std::array<DimSize_t, 2>&, - const PadBorderType, - const double, - const std::array<DimSize_t, 3> &, - const void *, - void *)> {}; - -class PadImpl1D_cpu : public OperatorImpl { +class Pad_ProdConso_cpu : public ProdConso { public: - PadImpl1D_cpu(const Pad_Op<1> &op) : OperatorImpl(op, "cpu") {} + Pad_ProdConso_cpu(const Operator& op): ProdConso(op) {} - static std::unique_ptr<PadImpl1D_cpu> create(const Pad_Op<1> &op) { - return std::make_unique<PadImpl1D_cpu>(op); + static std::unique_ptr<ProdConso> defaultModel(const Operator& op) { + return std::make_unique<Pad_ProdConso_cpu>(op); } Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; }; -namespace { -// add cpu backend to Pad_Op<1> implementation registry -static Registrar<Pad_Op<1>> registrarPadImpl1D_cpu("cpu", Aidge::PadImpl1D_cpu::create); -} // namespace - - -// compute kernel registry for forward and backward -class PadImpl2DForward_cpu - : public Registrable<PadImpl2DForward_cpu, - std::tuple<DataType, DataType>, - void(const std::array<DimSize_t, 4>&, +// Operator implementation entry point for the backend +using Pad1D_Op = Pad_Op<1>; +using PadImpl1D_cpu = OperatorImpl_cpu<Pad_Op<1>, + void(const std::array<DimSize_t, 2>&, const PadBorderType, const double, - const std::array<DimSize_t, 4> &, + const std::array<DimSize_t, 3> &, const void *, - void *)> {}; -class PadImpl2DBackward_cpu - : public Registrable<PadImpl2DBackward_cpu, - std::tuple<DataType, DataType>, - void(const std::array<DimSize_t, 4>&, + void *)>; + +using Pad2D_Op = Pad_Op<2>; +using PadImpl2D_cpu = OperatorImpl_cpu<Pad_Op<2>, + void(const std::array<DimSize_t, 4>&, const PadBorderType, const double, const std::array<DimSize_t, 4> &, const void *, - void *)> {}; - -class PadImpl2D_cpu : public OperatorImpl { -public: - PadImpl2D_cpu(const Pad_Op<2> &op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<PadImpl2D_cpu> create(const Pad_Op<2> &op) { - return std::make_unique<PadImpl2D_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; -}; + void *)>; -namespace { -// add cpu backend to Pad_Op<2> implementation registry -static Registrar<Pad_Op<2>> registrarPadImpl2D_cpu("cpu", Aidge::PadImpl2D_cpu::create); -} // namespace +// Implementation entry point registration to Operator +REGISTRAR(Pad1D_Op, "cpu", Aidge::PadImpl1D_cpu::create); +REGISTRAR(Pad2D_Op, "cpu", Aidge::PadImpl2D_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_PADIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp similarity index 78% rename from include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/PadImpl_kernels.hpp index 26c873c8fe7f140b09b31d0f1a9d4125acbcf50f..a362be0944aa18c36dd74a2f0066aaa21a1fc4c0 100644 --- a/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_ #include <algorithm> // std::max, std::min #include <array> @@ -88,17 +88,16 @@ void PadImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorder } } -namespace { -static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, - PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float32>, cpptype_t<DataType::Float32>>); -static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, - PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Int32>, cpptype_t<DataType::Int32>>); -static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, - PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float64>, cpptype_t<DataType::Float64>>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(PadImpl1D_cpu, + {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float32>, cpptype_t<DataType::Float32>>, nullptr}); +REGISTRAR(PadImpl1D_cpu, + {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float64>, cpptype_t<DataType::Float64>>, nullptr}); +REGISTRAR(PadImpl1D_cpu, + {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, + {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Int32>, cpptype_t<DataType::Int32>>, nullptr}); /** @@ -131,7 +130,7 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder for (std::uint32_t oy = 0; oy < oySize; ++oy) { for (std::uint32_t ox = 0; ox < oxSize; ++ox) { - const std::size_t oIndexFull = oIndex + ox*oySize + oy; + const std::size_t oIndexFull = oIndex + oy*oxSize + ox; O outputValue = static_cast<O>(borderValue); @@ -140,14 +139,14 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1]); if (ix >= 0 && ix < static_cast<std::int32_t>(dims[3]) && iy >= 0 && iy < static_cast<std::int32_t>(dims[2])) { - outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; + outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)]; } } else if (borderType == PadBorderType::Edge) { std::int32_t ix = std::max(0, std::min(static_cast<std::int32_t>(dims[3]) - 1, static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3]))); std::int32_t iy = std::max(0, std::min(static_cast<std::int32_t>(dims[2]) - 1, static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1]))); - outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; + outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)]; } else if (borderType == PadBorderType::Reflect) { std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3]); @@ -162,13 +161,13 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder if (iy >= static_cast<std::int32_t>(dims[2])) iy = static_cast<std::int32_t>(dims[2]) - iy; - outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; + outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)]; } else if (borderType == PadBorderType::Wrap) { std::int32_t ix = (static_cast<std::int32_t>(dims[3]) + static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3])) % static_cast<std::int32_t>(dims[3]); std::int32_t iy = (static_cast<std::int32_t>(dims[2]) + static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1])) % static_cast<std::int32_t>(dims[2]); - outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; + outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)]; } output[oIndexFull] = outputValue; @@ -178,17 +177,16 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder } } -namespace { -static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, - Aidge::PadImpl2D_cpu_forward_kernel<float, float>); -static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, - Aidge::PadImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t>); -static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, - Aidge::PadImpl2D_cpu_forward_kernel<double, double>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(PadImpl2D_cpu, + {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}}, + {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Float32>, cpptype_t<DataType::Float32>>, nullptr}); +REGISTRAR(PadImpl2D_cpu, + {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}}, + {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Float64>, cpptype_t<DataType::Float64>>, nullptr}); +REGISTRAR(PadImpl2D_cpu, + {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}}, + {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Int32>, cpptype_t<DataType::Int32>>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/PowImpl.hpp b/include/aidge/backend/cpu/operator/PowImpl.hpp index 514e63af5ae5d1d1d00f7f328f5367df2bfa163d..cfbb8173d1f83162519016a8f2b3c3166977a5b7 100644 --- a/include/aidge/backend/cpu/operator/PowImpl.hpp +++ b/include/aidge/backend/cpu/operator/PowImpl.hpp @@ -12,7 +12,7 @@ #ifndef AIDGE_CPU_OPERATOR_POWIMPL_H_ #define AIDGE_CPU_OPERATOR_POWIMPL_H_ -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Pow.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" @@ -21,32 +21,14 @@ #include <vector> namespace Aidge { -// class Pow_Op; +// Operator implementation entry point for the backend +using PowImpl_cpu = OperatorImpl_cpu<Pow_Op, + void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*), + void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, const void*, void*, void*)>; -// compute kernel registry for forward and backward -class PowImplForward_cpu - : public Registrable<PowImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)> { -}; -class PowImplBackward_cpu - : public Registrable<PowImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)> { -}; -class PowImpl_cpu : public OperatorImpl { -public: - PowImpl_cpu(const Pow_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<PowImpl_cpu> create(const Pow_Op& op) { - return std::make_unique<PowImpl_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; - void backward() override; -}; - -namespace { -static Registrar<Pow_Op> registrarPowImpl_cpu("cpu", Aidge::PowImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(Pow_Op, "cpu", Aidge::PowImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_POWIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp b/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ab9b2ccc7b823842decd044b90a5c6364cedc9c9 --- /dev/null +++ b/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp @@ -0,0 +1,95 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_POWIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_POWIMPL_KERNELS_H_ + +#include "aidge/utils/Registrar.hpp" +#include <cmath> + +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/operator/PowImpl.hpp" + +namespace Aidge { +template <class I1, class I2, class O> +void PowImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, + const std::vector<std::size_t>& input2Dims, + const std::vector<std::size_t>& outputDims, + const void* input1_, + const void* input2_, + void* output_) { + + const I1* input_1 = static_cast<const I1*>(input1_); + const I2* input_2 = static_cast<const I2*>(input2_); + O* output = static_cast<O*>(output_); + + std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) + { + std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex); + + std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); + std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); + + output[oIndex] = std::pow(input_1[idx1], input_2[idx2]); + } +} + +template <class I1, class I2, class O> +void PowImpl_cpu_backward_kernel(const std::vector<std::size_t>& input0Dims, + const std::vector<std::size_t>& input1Dims, + const std::vector<std::size_t>& outputDims, + const void* input0_, + const void* input1_, + const void* gradOutput_, + void* gradientInput0_, + void* gradientInput1_) { + const I1* input0 = static_cast<const I1*>(input0_); + I1* grad0 = static_cast<I1*>(gradientInput0_); + const I2* input1 = static_cast<const I2*>(input1_); + I2* grad1 = static_cast<I2*>(gradientInput1_); + const O* gradOut = static_cast<const O*>(gradOutput_); + + // Fill input grads with zeros + std::size_t input0Elements = std::accumulate(input0Dims.cbegin(), input0Dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + std::fill(grad0, grad0 + input0Elements, I1(0)); + std::size_t input1Elements = std::accumulate(input1Dims.cbegin(), input1Dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + std::fill(grad1, grad1 + input1Elements, I2(0)); + + std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + for (size_t oIndex = 0; oIndex < totalElements; ++oIndex) + { + // Compute indexes in inputs 0 and 1 to support broadcasting + std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex); + std::size_t idx0 = getFlattenedIndex(input0Dims, indexes); + std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); + + // grad0 = grad_output * (input1 * pow(input0, (input1 -1))) + grad0[idx0] += gradOut[oIndex]*input1[idx1]* std::pow(input0[idx0], input1[idx1]-1); + + // grad1 = grad_output * (output * ln(input0)) + grad1[idx1] += gradOut[oIndex] * std::pow(input0[idx0], input1[idx1]) * std::log(input0[idx0]); + } +} + +// Kernels registration to implementation entry point +REGISTRAR(PowImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<float, float, float>, Aidge::PowImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(PowImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<double, double, double>, Aidge::PowImpl_cpu_backward_kernel<double, double, double>}); +REGISTRAR(PowImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<int32_t, int32_t, int32_t>, Aidge::PowImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_POWIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReLUImpl.hpp b/include/aidge/backend/cpu/operator/ReLUImpl.hpp index e2ebf44616db876b462157db650ff48362dd7bac..5b900618abce83ff1c3822d4f61cc62c93f5081f 100644 --- a/include/aidge/backend/cpu/operator/ReLUImpl.hpp +++ b/include/aidge/backend/cpu/operator/ReLUImpl.hpp @@ -17,40 +17,19 @@ #include <tuple> // std::tuple #include <vector> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/ReLU.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" namespace Aidge { -// class ReLU_Op; +// Operator implementation entry point for the backend +using ReLUImpl_cpu = OperatorImpl_cpu<ReLU_Op, + void(const std::size_t, const void*, void*), + void(const std::size_t, const void*, const void*, void*)>; -// compute kernel registry for forward and backward -class ReLUImplForward_cpu - : public Registrable<ReLUImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { -}; -class ReLUImplBackward_cpu - : public Registrable<ReLUImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const void*, const void*, void*)> { -}; - -class ReLUImpl_cpu : public OperatorImpl { -public: - ReLUImpl_cpu(const ReLU_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<ReLUImpl_cpu> create(const ReLU_Op& op) { - return std::make_unique<ReLUImpl_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - - void forward() override final; - - void backward() override final; -}; - -namespace { -static Registrar<ReLU_Op> registrarReLUImpl_cpu("cpu", Aidge::ReLUImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(ReLU_Op, "cpu", Aidge::ReLUImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_RELUIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp deleted file mode 100644 index 1bd932e43608d98f737cc9046aed74b2fec6abc6..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_RELUIMPL_BACKWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_RELUIMPL_BACKWARD_KERNEL_H_ - -#include <cstddef> // std::size_t - -#include "aidge/backend/cpu/operator/ReLUImpl.hpp" -#include "aidge/utils/Registrar.hpp" - -namespace Aidge { -template <class I, class GI, class GO> -void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght, - const void* input_, const void* grad_output_, - void* grad_input_) { - const I* input = static_cast<const I*>(input_); - const GO* grad_output = static_cast<const GO*>(grad_output_); - GI* grad_input = static_cast<GI*>(grad_input_); - for (std::size_t i = 0; i < inputLenght; ++i) { - grad_input[i] = (input[i] > 0) ? grad_output[i] : 0; - } -} - -namespace { -static Registrar<ReLUImplBackward_cpu> registrarReLUImplBackward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::ReLUImpl_cpu_backward_kernel<float, float, float>); -static Registrar<ReLUImplBackward_cpu> registrarReLUImplBackward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::ReLUImpl_cpu_backward_kernel<int, int, int>); -static Registrar<ReLUImplBackward_cpu> registrarReLUImplBackward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::ReLUImpl_cpu_backward_kernel<double, double, double>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_RELUIMPL_BACKWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp deleted file mode 100644 index af9c65590c7182185c9d79669dde49e592cbeb5d..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp +++ /dev/null @@ -1,44 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_ - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/backend/cpu/operator/ReLUImpl.hpp" - -namespace Aidge { -template <class I, class O> -void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght, - const void* input_, - void* output_) { - - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); - -//#pragma omp parallel for if (inputLenght > 1024) - for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = (input[i] > 0) ? input[i] : 0; - } -} - -namespace { -static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::ReLUImpl_cpu_forward_kernel<float, float>); -static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::ReLUImpl_cpu_forward_kernel<int, int>); -static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::ReLUImpl_cpu_forward_kernel<double, double>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e39e9b7decd91e392c5db7e9e9bc4ed0f366829d --- /dev/null +++ b/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp @@ -0,0 +1,66 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_ + +#include <cstddef> // std::size_t +#include <memory> +#include <tuple> // std::tuple +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/ReLUImpl.hpp" +#include "aidge/operator/ReLU.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Kernels +template <class I, class O> +void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + +//#pragma omp parallel for if (inputLenght > 1024) + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = (input[i] > 0) ? input[i] : 0; + } +} + +template <class I, class GI, class GO> +void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght, + const void* input_, const void* grad_output_, + void* grad_input_) { + const I* input = static_cast<const I*>(input_); + const GO* grad_output = static_cast<const GO*>(grad_output_); + GI* grad_input = static_cast<GI*>(grad_input_); + for (std::size_t i = 0; i < inputLenght; ++i) { + grad_input[i] = (input[i] > 0) ? grad_output[i] : 0; + } +} + +// Kernels registration to implementation entry point +REGISTRAR(ReLUImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<float, float>, Aidge::ReLUImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(ReLUImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<double, double>, Aidge::ReLUImpl_cpu_backward_kernel<double, double, double>}); +REGISTRAR(ReLUImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<int32_t, int32_t>, Aidge::ReLUImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp index 8d784c38dc006ea82f040dfe83b4bef05908dd68..1c50805d5af768dfc160488fda1e8fadfa798454 100644 --- a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp +++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp @@ -17,116 +17,22 @@ #include <tuple> #include <vector> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/ReduceMean.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" namespace Aidge { -// class ReduceMean_Op; - -// Every DIM -class ReduceMeanImplForward_cpu - : public Registrable<ReduceMeanImplForward_cpu, - std::tuple<DataType, DataType>, - void(const std::vector<std::int32_t>&, +// Operator implementation entry point for the backend +using ReduceMeanImpl_cpu = OperatorImpl_cpu<ReduceMean_Op, + void(const std::vector<std::int32_t>&, DimSize_t, const std::vector<DimSize_t>&, const void *, - void *)> {}; -class ReduceMeanImpl1DBackward_cpu - : public Registrable<ReduceMeanImpl1DBackward_cpu, - std::tuple<DataType, DataType>, - void(const std::vector<std::int32_t>&, - DimSize_t, - const std::vector<DimSize_t>&, - const void *, - void *)> {}; - -class ReduceMeanImpl_cpu : public OperatorImpl { - public: - ReduceMeanImpl_cpu(const ReduceMean_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<ReduceMeanImpl_cpu> create(const ReduceMean_Op &op) { - return std::make_unique<ReduceMeanImpl_cpu>(op); - } - - public: - void forward() override; -}; - -// // compute kernel registry for forward and backward -// // DIM 1 -// class ReduceMeanImpl1DForward_cpu -// : public Registrable<ReduceMeanImpl1DForward_cpu, -// std::tuple<DataType, DataType>, -// void(const ReduceMean_Op<1>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {}; -// class ReduceMeanImpl1DBackward_cpu -// : public Registrable<ReduceMeanImpl1DBackward_cpu, -// std::tuple<DataType, DataType>, -// void(const ReduceMean_Op<1>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {}; - -// // DIM 2 -// class ReduceMeanImpl2DForward_cpu -// : public Registrable<ReduceMeanImpl2DForward_cpu, -// std::tuple<DataType, DataType>, -// void(const ReduceMean_Op<2>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {}; -// class ReduceMeanImpl2DBackward_cpu -// : public Registrable<ReduceMeanImpl2DBackward_cpu, -// std::tuple<DataType, DataType>, -// void(const ReduceMean_Op<2>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {}; -// // DIM 3 -// class ReduceMeanImpl3DForward_cpu -// : public Registrable<ReduceMeanImpl3DForward_cpu, -// std::tuple<DataType, DataType>, -// void(const ReduceMean_Op<3>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {}; -// class ReduceMeanImpl3DBackward_cpu -// : public Registrable<ReduceMeanImpl3DBackward_cpu, -// std::tuple<DataType, DataType>, -// void(const ReduceMean_Op<3>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {}; - -// class ReduceMeanImpl1D_cpu : public OperatorImpl { -// public: -// ReduceMeanImpl1D_cpu(const ReduceMean_Op<1>& op) : OperatorImpl(op, "cpu") {} - -// static std::unique_ptr<ReduceMeanImpl1D_cpu> create(const ReduceMean_Op<1> &op) { -// return std::make_unique<ReduceMeanImpl1D_cpu>(op); -// } - -// public: -// void forward() override; -// }; - -// class ReduceMeanImpl2D_cpu : public OperatorImpl { -// public: -// ReduceMeanImpl2D_cpu(const ReduceMean_Op<2>& op) : OperatorImpl(op, "cpu") {} - -// static std::unique_ptr<ReduceMeanImpl2D_cpu> create(const ReduceMean_Op<2> &op) { -// return std::make_unique<ReduceMeanImpl2D_cpu>(op); -// } - -// public: -// void forward() override; -// }; - -// class ReduceMeanImpl3D_cpu : public OperatorImpl { -// public: -// ReduceMeanImpl3D_cpu(const ReduceMean_Op<3>& op) : OperatorImpl(op, "cpu") {} - -// static std::unique_ptr<ReduceMeanImpl3D_cpu> create(const ReduceMean_Op<3> &op) { -// return std::make_unique<ReduceMeanImpl3D_cpu>(op); -// } + void *)>; -// public: -// void forward() override; -// }; -namespace { -// add cpu backend to ReduceMean_Op<2> implementation registry -static Registrar<ReduceMean_Op> registrarReduceMeanImpl_cpu("cpu", Aidge::ReduceMeanImpl_cpu::create); -// static Registrar<ReduceMean_Op<1>> registrarReduceMeanImpl1D_cpu("cpu", Aidge::ReduceMeanImpl1D_cpu::create); -// static Registrar<ReduceMean_Op<2>> registrarReduceMeanImpl2D_cpu("cpu", Aidge::ReduceMeanImpl2D_cpu::create); -// static Registrar<ReduceMean_Op<3>> registrarReduceMeanImpl3D_cpu("cpu", Aidge::ReduceMeanImpl3D_cpu::create); -} // namespace +// Implementation entry point registration to Operator +REGISTRAR(ReduceMean_Op, "cpu", Aidge::ReduceMeanImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp similarity index 63% rename from include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp index bba355e16958bb1a22bde1d24304d992a658ade8..5a143164d7e4fa2585ea72c38eaaa123f215d21a 100644 --- a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_KERNELS_H_ #include <algorithm> // std::for_each #include <cstddef> // std::size_t @@ -38,7 +38,10 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, const std::size_t nb_dims = inputDims.size(); const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>()); - if (axes.size() == 1) { + if (axes.empty()){ + std::copy_n(input,totalElements, output); + } + else if (axes.size() == 1) { const std::size_t stride_pre = std::accumulate(inputDims.cbegin(), inputDims.cbegin() + axes[0], 1, std::multiplies<std::size_t>()); const std::size_t stride_post = std::accumulate(inputDims.crbegin(), inputDims.crbegin() + nb_dims -1 - axes[0], 1, std::multiplies<std::size_t>()); @@ -104,38 +107,16 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, } } -namespace { -static Registrar<ReduceMeanImplForward_cpu> registrarReduceMeanImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float>); -static Registrar<ReduceMeanImplForward_cpu> registrarReduceMeanImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int>); -static Registrar<ReduceMeanImplForward_cpu> registrarReduceMeanImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double>); - -// // DIM = 1 -// static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Float32( -// {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,1>); -// static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Int32( -// {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,1>); -// static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Float64( -// {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,1>); - -// // DIM = 2 -// static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Float32( -// {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,2>); -// static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Int32( -// {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,2>); -// static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Float64( -// {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,2>); - -// // DIM = 3 -// static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Float32( -// {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,3>); -// static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Int32( -// {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,3>); -// static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Float64( -// {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,3>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(ReduceMeanImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(ReduceMeanImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(ReduceMeanImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::ReduceMeanImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp b/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4138c62c24149c15cfad5e85e8f50889b2b6a433 --- /dev/null +++ b/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp @@ -0,0 +1,38 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_REDUCESUMIMPL_H_ +#define AIDGE_CPU_OPERATOR_REDUCESUMIMPL_H_ + +#include <array> +#include <memory> +#include <tuple> +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/ReduceSum.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Operator implementation entry point for the backend +using ReduceSumImpl_cpu = OperatorImpl_cpu<ReduceSum_Op, + void(const std::vector<std::int32_t>&, + DimSize_t, + const std::vector<DimSize_t>&, + const void *, + void *)>; + +// Implementation entry point registration to Operator +REGISTRAR(ReduceSum_Op, "cpu", Aidge::ReduceSumImpl_cpu::create); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_REDUCESUMIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..72671421796a0d5e799e6f762dfcaf02457220f3 --- /dev/null +++ b/include/aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp @@ -0,0 +1,120 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_REDUCESUMIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_REDUCESUMIMPL_KERNELS_H_ + +#include <algorithm> // std::for_each +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t +#include <functional> //std::multiplies +#include <numeric> //std::accumulate +#include <vector> + +#include "aidge/backend/cpu/operator/ReduceSumImpl.hpp" +#include "aidge/data/Data.hpp" +#include "aidge/operator/ReduceSum.hpp" +#include "aidge/utils/Registrar.hpp" + +namespace Aidge { +template <class I, class O> +void ReduceSumImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, + DimSize_t /*keepDims*/, + const std::vector<DimSize_t>& inputDims, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + const std::size_t nb_dims = inputDims.size(); + const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>()); + + if (axes.empty()){ + std::copy_n(input,totalElements, output); + } + else if (axes.size() == 1) { + const std::size_t stride_pre = std::accumulate(inputDims.cbegin(), inputDims.cbegin() + axes[0], 1, std::multiplies<std::size_t>()); + const std::size_t stride_post = std::accumulate(inputDims.crbegin(), inputDims.crbegin() + nb_dims -1 - axes[0], 1, std::multiplies<std::size_t>()); + + const std::size_t dim_i = inputDims[axes[0]]; + for (std::size_t pre = 0; pre < stride_pre; ++pre) { + for (std::size_t post = 0; post < stride_post; ++post) { + const std::size_t idx_i = pre * dim_i * stride_post + post; + const std::size_t idx_o = pre * stride_post + post; + O sum = 0; + for (std::size_t i = 0; i < dim_i; ++i) { + sum +=input[idx_i + i*stride_post]; + } + output[idx_o] = sum; + } + } + } else { + std::size_t outputElements = totalElements; + + auto stride_post = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]); + stride_post[nb_dims - 1] = 1; + for (std::size_t i = nb_dims-2; i != static_cast<std::size_t>(-1); --i) { + stride_post[i] = stride_post[i+1]*inputDims[i+1]; + } + auto stride_pre = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]); + stride_pre[0] = 1; + for (std::size_t i = 1; i < nb_dims; ++i) { + stride_pre[i] = stride_pre[i-1]*inputDims[i-1]; + } + + const I* inputAccumulation = input; + I* outputAccumulation = nullptr; + + for (const auto& axisInt : axes) { + const std::size_t a = static_cast<std::size_t>(axisInt); + outputElements /= inputDims[a]; + outputAccumulation = new I[outputElements]; + const std::size_t dim_i = inputDims[a]; + for (std::size_t pre = 0; pre < stride_pre[a]; ++pre) { + for (std::size_t post = 0; post < stride_post[a]; ++post) { + const std::size_t idx_i = pre * dim_i * stride_post[a] + post; + const std::size_t idx_o = pre * stride_post[a] + post; + I sum = 0; + for (std::size_t i = 0; i < dim_i; ++i) { + sum += inputAccumulation[idx_i + i*stride_post[a]]; + } + outputAccumulation[idx_o] = sum; + } + } + std::for_each(stride_pre.get()+a+1, stride_pre.get()+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; }); + if (inputAccumulation != input) { + delete[] inputAccumulation; + } + inputAccumulation = outputAccumulation; + } + + // Copy elements from inputAccumulation to output while dividing by divisor + std::copy(inputAccumulation, inputAccumulation + outputElements, output); + if (outputAccumulation) { + delete[] outputAccumulation; + } + } +} + +// Kernels registration to implementation entry point +REGISTRAR(ReduceSumImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::ReduceSumImpl_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(ReduceSumImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::ReduceSumImpl_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(ReduceSumImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::ReduceSumImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_REDUCESUMIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/ScalingImpl.hpp b/include/aidge/backend/cpu/operator/ScalingImpl.hpp index 8590169272818a225fe4299150f873733cdd9cd9..c1cc247c548701d43e01b1e92d02f42a11cfc710 100644 --- a/include/aidge/backend/cpu/operator/ScalingImpl.hpp +++ b/include/aidge/backend/cpu/operator/ScalingImpl.hpp @@ -12,7 +12,7 @@ #ifndef __AIDGE_CPU_OPERATOR_ScalingIMPL_H__ #define __AIDGE_CPU_OPERATOR_ScalingIMPL_H__ -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Scaling.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" @@ -22,43 +22,17 @@ #include <array> namespace Aidge { -// class Scaling_Op; - -// compute kernel registry for forward and backward -class ScalingImplForward_cpu - : public Registrable<ScalingImplForward_cpu, - std::tuple<DataType, DataType>, - void(const float, - const std::size_t, - const bool, - std::size_t, - const void*, - void*)> {}; -class ScalingImplBackward_cpu - : public Registrable<ScalingImplBackward_cpu, - std::tuple<DataType, DataType>, - void(const float, - const std::size_t, - const bool, - std::size_t, - const void*, - void*)> {}; - -class ScalingImpl_cpu : public OperatorImpl { -public: - ScalingImpl_cpu(const Scaling_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<ScalingImpl_cpu> create(const Scaling_Op& op) { - return std::make_unique<ScalingImpl_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; -}; - -namespace { -static Registrar<Scaling_Op> registrarScalingImpl_cpu("cpu", Aidge::ScalingImpl_cpu::create); -} +// Operator implementation entry point for the backend +using ScalingImpl_cpu = OperatorImpl_cpu<Scaling_Op, + void(const float, + const std::size_t, + const bool, + std::size_t, + const void*, + void*)>; + +// Implementation entry point registration to Operator +REGISTRAR(Scaling_Op, "cpu", Aidge::ScalingImpl_cpu::create); } // namespace Aidge #endif /* __AIDGE_CPU_OPERATOR_ScalingIMPL_H__ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp similarity index 79% rename from include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp index c654265dd6f650129201037976d89da4b0f39d96..c758c9cf39e76bb370c6d03c28e3a670c280eefc 100644 --- a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_ #include <cmath> #include <cstddef> @@ -92,14 +92,16 @@ void ScalingImpl_cpu_forward_kernel(const float scalingFactor, } } -namespace { -static Registrar<ScalingImplForward_cpu> registrarScalingImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::ScalingImpl_cpu_forward_kernel<float, float>); -static Registrar<ScalingImplForward_cpu> registrarScalingImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::ScalingImpl_cpu_forward_kernel<int, int>); -static Registrar<ScalingImplForward_cpu> registrarScalingImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::ScalingImpl_cpu_forward_kernel<double, double>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(ScalingImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(ScalingImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(ScalingImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H_ */ \ No newline at end of file +#endif /* AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl.hpp index 34340e6166a48b465c7723e85d91c195bfb42277..ee1c36edecbe50cc1765da59737509a2b6333caf 100644 --- a/include/aidge/backend/cpu/operator/SigmoidImpl.hpp +++ b/include/aidge/backend/cpu/operator/SigmoidImpl.hpp @@ -12,7 +12,7 @@ #ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_ #define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_ -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Sigmoid.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" @@ -21,34 +21,13 @@ #include <vector> namespace Aidge { -// class Sigmoid_Op; +// Operator implementation entry point for the backend +using SigmoidImpl_cpu = OperatorImpl_cpu<Sigmoid_Op, + void(const std::size_t, const void*, void*), + void(const std::size_t, const void*, const void*, void*)>; -// compute kernel registry for forward and backward -class SigmoidImplForward_cpu - : public Registrable<SigmoidImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { -}; -class SigmoidImplBackward_cpu - : public Registrable<SigmoidImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const void*, const void*, void*)> { -}; - -class SigmoidImpl_cpu : public OperatorImpl { -public: - SigmoidImpl_cpu(const Sigmoid_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<SigmoidImpl_cpu> create(const Sigmoid_Op& op) { - return std::make_unique<SigmoidImpl_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - - void forward() override final; - - void backward() override final; -}; - -namespace { -static Registrar<Sigmoid_Op> registrarSigmoidImpl_cpu("cpu", Aidge::SigmoidImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(Sigmoid_Op, "cpu", Aidge::SigmoidImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp deleted file mode 100644 index 4ceb3bd7ed9a3fb739591eee488f8035770fef18..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp +++ /dev/null @@ -1,43 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_BACKWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_BACKWARD_KERNEL_H_ - -#include <cstddef> // std::size_t - -#include "aidge/backend/cpu/operator/SigmoidImpl.hpp" -#include "aidge/utils/Registrar.hpp" - -namespace Aidge { -template <class O, class GI, class GO> -void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLenght, - const void* output_, const void* grad_output_, - void* grad_input_) { - const O* output = static_cast<const O*>(output_); - const GO* grad_output = static_cast<const GO*>(grad_output_); - GI* grad_input = static_cast<GI*>(grad_input_); - for (std::size_t i = 0; i < inputLenght; ++i) { - grad_input[i] = output[i] * (O(1) - output[i]) * grad_output[i]; - } -} - -namespace { -static Registrar<SigmoidImplBackward_cpu> registrarSigmoidImplBackward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::SigmoidImpl_cpu_backward_kernel<float, float, float>); -static Registrar<SigmoidImplBackward_cpu> registrarSigmoidImplBackward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::SigmoidImpl_cpu_backward_kernel<double, double, double>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_BACKWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp deleted file mode 100644 index 24ba11a0bca7f3fa15f9ac1e2c13e29f88eaf074..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_FORWARD_KERNEL_H_ - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/backend/cpu/operator/SigmoidImpl.hpp" - -namespace Aidge { -template <class I, class O> -void SigmoidImpl_cpu_forward_kernel(std::size_t inputLenght, - const void* input_, - void* output_) { - - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); - -//#pragma omp parallel for if (inputLenght > 1024) - for (std::size_t i = 0; i < inputLenght; ++i) { - if (input[i] > I(0)) { - output[i] = O(1) / (O(1) + std::exp(-input[i])); - } else { - output[i] = std::exp(input[i]) / (O(1) + std::exp(input[i])); - } - } -} - -namespace { -static Registrar<SigmoidImplForward_cpu> registrarSigmoidImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::SigmoidImpl_cpu_forward_kernel<float, float>); -static Registrar<SigmoidImplForward_cpu> registrarSigmoidImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::SigmoidImpl_cpu_forward_kernel<double, double>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..dfd71ce0a878efbeb779f3a67ad4ccc762bb8363 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp @@ -0,0 +1,59 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/SigmoidImpl.hpp" + +namespace Aidge { +template <class I, class O> +void SigmoidImpl_cpu_forward_kernel(std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + +//#pragma omp parallel for if (inputLenght > 1024) + for (std::size_t i = 0; i < inputLenght; ++i) { + if (input[i] > I(0)) { + output[i] = O(1) / (O(1) + std::exp(-input[i])); + } else { + output[i] = std::exp(input[i]) / (O(1) + std::exp(input[i])); + } + } +} + +template <class O, class GI, class GO> +void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLenght, + const void* output_, const void* grad_output_, + void* grad_input_) { + const O* output = static_cast<const O*>(output_); + const GO* grad_output = static_cast<const GO*>(grad_output_); + GI* grad_input = static_cast<GI*>(grad_input_); + for (std::size_t i = 0; i < inputLenght; ++i) { + grad_input[i] = output[i] * (O(1) - output[i]) * grad_output[i]; + } +} + +// Kernels registration to implementation entry point +REGISTRAR(SigmoidImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::SigmoidImpl_cpu_forward_kernel<float, float>, Aidge::SigmoidImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(SigmoidImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::SigmoidImpl_cpu_forward_kernel<double, double>, Aidge::SigmoidImpl_cpu_backward_kernel<double, double, double>}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/SliceImpl.hpp b/include/aidge/backend/cpu/operator/SliceImpl.hpp index 61aed1553bfbd2e67fc837ec6ea8d80b26ef3558..fd98b38d7117eaa14e35fe3cb89abf95b2913997 100644 --- a/include/aidge/backend/cpu/operator/SliceImpl.hpp +++ b/include/aidge/backend/cpu/operator/SliceImpl.hpp @@ -16,52 +16,25 @@ #include <vector> #include <array> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Slice.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { -// class Slice_Op; - -// compute kernel registry for forward and backward -class SliceImplForward_cpu - : public Registrable<SliceImplForward_cpu, - std::tuple<DataType, DataType>, - void(const std::vector<std::int64_t>&, +// Operator implementation entry point for the backend +using SliceImpl_cpu = OperatorImpl_cpu<Slice_Op, + void(const std::vector<std::int64_t>&, const std::vector<std::int64_t>&, const std::vector<std::int8_t>&, const std::vector<std::int64_t>&, const std::vector<DimSize_t>&, const void*, - void*)> {}; -class SliceImplBackward_cpu - : public Registrable<SliceImplBackward_cpu, - std::tuple<DataType, DataType>, - void(const std::vector<std::int64_t>&, - const std::vector<std::int64_t>&, - const std::vector<std::int8_t>&, - const std::vector<std::int64_t>&, - const std::vector<DimSize_t>&, - const void*, - void*)> {}; - -class SliceImpl_cpu : public OperatorImpl { -public: - SliceImpl_cpu(const Slice_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<SliceImpl_cpu> create(const Slice_Op& op) { - return std::make_unique<SliceImpl_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; -}; + void*)>; -namespace { -static Registrar<Slice_Op> registrarSliceImpl_cpu("cpu", Aidge::SliceImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(Slice_Op, "cpu", Aidge::SliceImpl_cpu::create); } // namespace Aidge #endif /* __AIDGE_CPU_OPERATOR_SLICEIMPL_H__ */ diff --git a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp similarity index 84% rename from include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp index 31e409369cc640bbda9f54c54652af7f72b509b6..1bf4c491723c570fa8bfd1774beca1630d2de9be 100644 --- a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_SLICEIMPL_KERNELS_H_ #include <algorithm> #include <cmath> @@ -88,14 +88,15 @@ void SliceImpl_cpu_forward_kernel(const std::vector<std::int64_t>& starts, } } -namespace { -static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float, float>); -static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int, int>); -static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double, double>); -} // namespace +REGISTRAR(SliceImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(SliceImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(SliceImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp index 2b2fab485656efdc37ee134cb4ae574b6b403405..ec2c2696ed6e2ba8cad1536519298d9331921c07 100644 --- a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp +++ b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp @@ -12,7 +12,7 @@ #ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ #define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Softmax.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" @@ -21,31 +21,12 @@ #include <vector> namespace Aidge { -// class Softmax_Op; +// Operator implementation entry point for the backend +using SoftmaxImpl_cpu = OperatorImpl_cpu<Softmax_Op, + void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)>; -// compute kernel registry for forward and backward -class SoftmaxImplForward_cpu - : public Registrable<SoftmaxImplForward_cpu, std::tuple<DataType, DataType>, void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)> { -}; -class SoftmaxImplBackward_cpu - : public Registrable<SoftmaxImplBackward_cpu, std::tuple<DataType, DataType>, void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)> { -}; - -class SoftmaxImpl_cpu : public OperatorImpl { -public: - SoftmaxImpl_cpu(const Softmax_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<SoftmaxImpl_cpu> create(const Softmax_Op& op) { - return std::make_unique<SoftmaxImpl_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; -}; - -namespace { -static Registrar<Softmax_Op> registrarSoftmaxImpl_cpu("cpu", Aidge::SoftmaxImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(Softmax_Op, "cpu", Aidge::SoftmaxImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp similarity index 64% rename from include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp index cc384c38e34d01887fc328d11de383aeef39fb8e..07486a48f1b8cf29f6a6ef8aa934a9decdbafef7 100644 --- a/include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_ #include "aidge/utils/Registrar.hpp" #include <cstddef> @@ -39,30 +39,37 @@ void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSi for (std::size_t i = 0; i < preAxisElems; ++i) { for (std::size_t j = 0; j < postAxisElems; ++j) { + I maxVal = input[i * inputDims[axisIdx] * postAxisElems + j]; + for (std::size_t k = 1; k < inputDims[axisIdx]; ++k) { + std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j; + maxVal = std::max(maxVal, input[inIdx]); + } + // Calculate sum of exponentials within the axis I sumExp = 0; for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) { std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j; - sumExp += std::exp(input[inIdx]); + sumExp += std::exp(input[inIdx] - maxVal); } // Calculate softmax for the current slice along the axis for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) { std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j; - output[inIdx] = std::exp(input[inIdx]) / sumExp; + output[inIdx] = std::exp(input[inIdx] - maxVal) / sumExp; } } } } -namespace { -static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>); -static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::SoftmaxImpl_cpu_forward_kernel<int, int>); -static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>); -} // namespace +REGISTRAR(SoftmaxImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>, nullptr}); +REGISTRAR(SoftmaxImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>, nullptr}); +REGISTRAR(SoftmaxImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/SqrtImpl.hpp b/include/aidge/backend/cpu/operator/SqrtImpl.hpp index 1691d951678509274736d558360c8110958820a9..dba75d1c58fb19ab2284ee0e98a32bff7ac58557 100644 --- a/include/aidge/backend/cpu/operator/SqrtImpl.hpp +++ b/include/aidge/backend/cpu/operator/SqrtImpl.hpp @@ -17,39 +17,19 @@ #include <tuple> #include <vector> -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Sqrt.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" namespace Aidge { +// Operator implementation entry point for the backend +using SqrtImpl_cpu = OperatorImpl_cpu<Sqrt_Op, + void(const std::size_t, const void*, void*), + void(const std::size_t, const void*, void*)>; -// compute kernel registry for forward and backward -class SqrtImplForward_cpu - : public Registrable<SqrtImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { -}; -class SqrtImplBackward_cpu - : public Registrable<SqrtImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { -}; - -class SqrtImpl_cpu : public OperatorImpl { -public: - SqrtImpl_cpu(const Sqrt_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<SqrtImpl_cpu> create(const Sqrt_Op& op) { - return std::make_unique<SqrtImpl_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - - void forward() override final; - - void backward() override final; -}; - -namespace { -static Registrar<Sqrt_Op> registrarSqrtImpl_cpu("cpu", Aidge::SqrtImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(Sqrt_Op, "cpu", Aidge::SqrtImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SqrtImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/SqrtImpl_backward_kernels.hpp deleted file mode 100644 index 9cf5118a5ac81520d7a180b6aba22417ca512890..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/SqrtImpl_backward_kernels.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_BACKWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_SQRTIMPL_BACKWARD_KERNEL_H_ - -#include <cmath> // std::sqrt -#include <cstddef> // std::size_t - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/backend/cpu/operator/SqrtImpl.hpp" - -namespace Aidge { -template <class I, class O> -void SqrtImpl_cpu_backward_kernel(const std::size_t inputLenght, - const void* input_, - void* output_) { - - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); - - for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = static_cast<O>(0.5/(std::sqrt(static_cast<float>(input[i])))); - } -} - -namespace { -static Registrar<SqrtImplBackward_cpu> registrarSqrtImplBackward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::SqrtImpl_cpu_backward_kernel<float, float>); -static Registrar<SqrtImplBackward_cpu> registrarSqrtImplBackward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::SqrtImpl_cpu_backward_kernel<int, int>); -static Registrar<SqrtImplBackward_cpu> registrarSqrtImplBackward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::SqrtImpl_cpu_backward_kernel<double, double>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_BACKWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp deleted file mode 100644 index 886b978c2345ce555d229d684ba83f952be9e00e..0000000000000000000000000000000000000000 --- a/include/aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/******************************************************************************** - * Copyright (c) 2023 CEA-List - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * SPDX-License-Identifier: EPL-2.0 - * - ********************************************************************************/ - -#ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_ - -#include <cmath> // std::sqrt -#include <cstddef> // std::size_t - -#include "aidge/utils/Registrar.hpp" - -#include "aidge/backend/cpu/operator/SqrtImpl.hpp" - -namespace Aidge { -template <class I, class O> -void SqrtImpl_cpu_forward_kernel(const std::size_t inputLenght, - const void* input_, - void* output_) { - - const I* input = static_cast<const I*>(input_); - O* output = static_cast<O*>(output_); - - for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = static_cast<O>(std::sqrt(static_cast<float>(input[i]))); - } -} - -namespace { -static Registrar<SqrtImplForward_cpu> registrarSqrtImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::SqrtImpl_cpu_forward_kernel<float, float>); -static Registrar<SqrtImplForward_cpu> registrarSqrtImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::SqrtImpl_cpu_forward_kernel<int, int>); -static Registrar<SqrtImplForward_cpu> registrarSqrtImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::SqrtImpl_cpu_forward_kernel<double, double>); -} // namespace -} // namespace Aidge - -#endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0464119cad60742bc58c79da984b30776bc7932f --- /dev/null +++ b/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp @@ -0,0 +1,60 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_ + +#include <cmath> // std::sqrt +#include <cstddef> // std::size_t + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/SqrtImpl.hpp" + +namespace Aidge { +template <class I, class O> +void SqrtImpl_cpu_forward_kernel(const std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = static_cast<O>(std::sqrt(static_cast<float>(input[i]))); + } +} + +template <class I, class O> +void SqrtImpl_cpu_backward_kernel(const std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = static_cast<O>(0.5/(std::sqrt(static_cast<float>(input[i])))); + } +} + +REGISTRAR(SqrtImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<float, float>, Aidge::SqrtImpl_cpu_backward_kernel<float, float>}); +REGISTRAR(SqrtImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<double, double>, Aidge::SqrtImpl_cpu_backward_kernel<double, double>}); +REGISTRAR(SqrtImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<int32_t, int32_t>, Aidge::SqrtImpl_cpu_backward_kernel<int32_t, int32_t>}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/SubImpl.hpp b/include/aidge/backend/cpu/operator/SubImpl.hpp index 15c028ae6289f39e0b6e6fd74e51e138b1f2675c..2bb22bda74edf7db09404fd5613b6714ddcdf513 100644 --- a/include/aidge/backend/cpu/operator/SubImpl.hpp +++ b/include/aidge/backend/cpu/operator/SubImpl.hpp @@ -12,7 +12,7 @@ #ifndef AIDGE_CPU_OPERATOR_SUBIMPL_H_ #define AIDGE_CPU_OPERATOR_SUBIMPL_H_ -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Sub.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" @@ -21,31 +21,12 @@ #include <vector> namespace Aidge { -// class Sub_Op; +// Operator implementation entry point for the backend +using SubImpl_cpu = OperatorImpl_cpu<Sub_Op, + void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)>; -// compute kernel registry for forward and backward -class SubImplForward_cpu - : public Registrable<SubImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)> { -}; -class SubImplBackward_cpu - : public Registrable<SubImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)> { -}; - -class SubImpl_cpu : public OperatorImpl { -public: - SubImpl_cpu(const Sub_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<SubImpl_cpu> create(const Sub_Op& op) { - return std::make_unique<SubImpl_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; -}; - -namespace { -static Registrar<Sub_Op> registrarSubImpl_cpu("cpu", Aidge::SubImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(Sub_Op, "cpu", Aidge::SubImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_SUBIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp similarity index 62% rename from include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp rename to include/aidge/backend/cpu/operator/SubImpl_kernels.hpp index 10e6f58bb44b63f2d8712dc0aa64e0660f3356b2..0486ed2105b23e95f9cdfcda578e14900fcb2c8e 100644 --- a/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp @@ -9,8 +9,8 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_SUBIMPL_FORWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_SUBIMPL_FORWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_ #include "aidge/utils/Registrar.hpp" @@ -49,20 +49,19 @@ void SubImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, } } -namespace { -static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::SubImpl_cpu_forward_kernel<float, float, float>); -static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::SubImpl_cpu_forward_kernel<double, double, double>); -static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::SubImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>); -static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Int64( - {DataType::Int64, DataType::Int64, DataType::Int64}, - Aidge::SubImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(SubImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<float, float, float>, nullptr}); +REGISTRAR(SubImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<double, double, double>, nullptr}); +REGISTRAR(SubImpl_cpu, + {DataType::Int32}, + {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr}); +REGISTRAR(SubImpl_cpu, + {DataType::Int64}, + {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>, nullptr}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_SUBIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_ */ diff --git a/include/aidge/backend/cpu/operator/TanhImpl.hpp b/include/aidge/backend/cpu/operator/TanhImpl.hpp index 0bf851e77d94c160c0362301df33d682347daf0c..b1c2217bd29805eca2cf7b7906316756b75a74e0 100644 --- a/include/aidge/backend/cpu/operator/TanhImpl.hpp +++ b/include/aidge/backend/cpu/operator/TanhImpl.hpp @@ -12,7 +12,7 @@ #ifndef AIDGE_CPU_OPERATOR_TANHIMPL_H_ #define AIDGE_CPU_OPERATOR_TANHIMPL_H_ -#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" #include "aidge/operator/Tanh.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" @@ -21,34 +21,13 @@ #include <vector> namespace Aidge { -// class Tanh_Op; +// Operator implementation entry point for the backend +using TanhImpl_cpu = OperatorImpl_cpu<Tanh_Op, + void(const std::size_t, const void*, void*), + void(const std::size_t, const void*, const void*, void*)>; -// compute kernel registry for forward and backward -class TanhImplForward_cpu - : public Registrable<TanhImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { -}; -class TanhImplBackward_cpu - : public Registrable<TanhImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const void*, const void*, void*)> { -}; - -class TanhImpl_cpu : public OperatorImpl { -public: - TanhImpl_cpu(const Tanh_Op& op) : OperatorImpl(op, "cpu") {} - - static std::unique_ptr<TanhImpl_cpu> create(const Tanh_Op& op) { - return std::make_unique<TanhImpl_cpu>(op); - } - - Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - - void forward() override final; - - void backward() override final; -}; - -namespace { -static Registrar<Tanh_Op> registrarTanhImpl_cpu("cpu", Aidge::TanhImpl_cpu::create); -} +// Implementation entry point registration to Operator +REGISTRAR(Tanh_Op, "cpu", Aidge::TanhImpl_cpu::create); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_TANHIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp similarity index 51% rename from include/aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp rename to include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp index 3a13c2cad21c35822fc6248590550e4716ee046d..fdcac210484b11f2220dcc2a6813efed503d1913 100644 --- a/include/aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp @@ -9,15 +9,28 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_TANHIMPL_BACKWARD_KERNEL_H_ -#define AIDGE_CPU_OPERATOR_TANHIMPL_BACKWARD_KERNEL_H_ +#ifndef AIDGE_CPU_OPERATOR_TANHIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_TANHIMPL_KERNELS_H_ -#include <cstddef> // std::size_t +#include "aidge/utils/Registrar.hpp" #include "aidge/backend/cpu/operator/TanhImpl.hpp" -#include "aidge/utils/Registrar.hpp" namespace Aidge { +template <class I, class O> +void TanhImpl_cpu_forward_kernel(std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + +//#pragma omp parallel for if (inputLenght > 1024) + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = std::tanh(input[i]); + } +} + template <class O, class GI, class GO> void TanhImpl_cpu_backward_kernel(const std::size_t inputLenght, const void* output_, const void* grad_output_, @@ -30,14 +43,13 @@ void TanhImpl_cpu_backward_kernel(const std::size_t inputLenght, } } -namespace { -static Registrar<TanhImplBackward_cpu> registrarTanhImplBackward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::TanhImpl_cpu_backward_kernel<float, float, float>); -static Registrar<TanhImplBackward_cpu> registrarTanhImplBackward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::TanhImpl_cpu_backward_kernel<double, double, double>); -} // namespace +// Kernels registration to implementation entry point +REGISTRAR(TanhImpl_cpu, + {DataType::Float32}, + {ProdConso::inPlaceModel, Aidge::TanhImpl_cpu_forward_kernel<float, float>, Aidge::TanhImpl_cpu_backward_kernel<float, float, float>}); +REGISTRAR(TanhImpl_cpu, + {DataType::Float64}, + {ProdConso::inPlaceModel, Aidge::TanhImpl_cpu_forward_kernel<double, double>, Aidge::TanhImpl_cpu_backward_kernel<double, double, double>}); } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_TANHIMPL_BACKWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_TANHIMPL_KERNELS_H_ */ diff --git a/project_name.txt b/project_name.txt deleted file mode 100644 index f8a086fc063978638db5a0fcfe1dc2e5c9d0c1b7..0000000000000000000000000000000000000000 --- a/project_name.txt +++ /dev/null @@ -1 +0,0 @@ -aidge_backend_cpu \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..9dbdbede6083ea2ededd5a861449a2dfbea6f40e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,605 @@ +[project] +name = "aidge_backend_cpu" +description="CPU implementation of operators of the AIDGE framework" +dependencies = [ + "numpy", +] +requires-python = ">= 3.7" +readme = "README.md" +license = { file = "LICENSE" } +classifiers = [ + "Development Status :: 2 - Pre-Alpha", + "Programming Language :: Python :: 3" + ] +dynamic = ["version"] # defined in tool.setuptools_scm + +[build-system] +requires = [ + "setuptools>=64", + "setuptools_scm[toml]==7.1.0", + "cmake>=3.18.4.post1" +] +build-backend = "setuptools.build_meta" + +##################################################### +# SETUPTOOLS +[tool.setuptools] +[tool.setuptools.packages.find] +where = ["."] # list of folders that contain the packages (["."] by default) +include = ["aidge_backend_cpu*"] # package names should match these glob patterns (["*"] by default) +exclude = ["aidge_backend_cpu.unit_tests*"] # exclude packages matching these glob patterns (empty by default) +namespaces = false # to disable scanning PEP 420 namespaces (true by default) +# SETUPTOOLS_SCM +[tool.setuptools_scm] +write_to = "aidge_backend_cpu/_version.py" + +##################################################### +# CIBUILDWHEEL +[tool.cibuildwheel] +build-frontend = "build" +test-requires = "pytest" +test-command = "pytest {project}/aidge_backend_cpu/unit_tests" +# uncomment to run cibuildwheel locally on selected distros +#build=[ +# "cp38-manylinux_x86_64", +# "cp39-manylinux_x86_64", +# "cp310-manylinux_x86_64" +# "cp38-win_amd64", +#] +### AIDGE DEPENDENCIES DECLARATION +[tool.cibuildwheel.environment] +AIDGE_DEPENDENCIES = "aidge_core" # format => "dep_1 dep_2 ... dep_n" +AIDGE_INSTALL="/AIDGE_INSTALL_CIBUILDWHEEL" +[tool.cibuildwheel.linux] +before-build = [ + "bash .gitlab/ci/cibuildwheel_build_deps_before_build_wheel.sh /host" +] +before-test = [ + "bash .gitlab/ci/cibuildwheel_build_deps_before_build_wheel.sh /host" +] +[tool.cibuildwheel.windows] +before-build = [ + "powershell -File .\\.gitlab\\ci\\cibuildwheel_build_deps_before_build_wheel.ps1" +] +before-test = [ + "powershell -File .\\.gitlab\\ci\\cibuildwheel_build_deps_before_build_wheel.ps1" +] + + +##################################################### +# PYLINT +[tool.pylint.main] +# Analyse import fallback blocks. This can be used to support both Python 2 and 3 +# compatible code, which means that the block might have code that exists only in +# one or another interpreter, leading to false positives when analysed. +# analyse-fallback-blocks = + +# Clear in-memory caches upon conclusion of linting. Useful if running pylint in +# a server-like mode. +# clear-cache-post-run = + +# Always return a 0 (non-error) status code, even if lint errors are found. This +# is primarily useful in continuous integration scripts. +# exit-zero = + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list = ["aidge_core", "aidge_backend_cpu", "torch", "tensorflow"] + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +# extension-pkg-whitelist = + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +# fail-on = + +# Specify a score threshold under which the program will exit with error. +# fail-under = + +# Interpret the stdin as a python script, whose filename needs to be passed as +# the module_or_package argument. +# from-stdin = + +# Files or directories to be skipped. They should be base names, not paths. +ignore = ["CVS"] + +# Add files or directories matching the regular expressions patterns to the +# ignore-list. The regex matches against paths and can be in Posix or Windows +# format. Because '\\' represents the directory delimiter on Windows systems, it +# can't be used as an escape character. +# ignore-paths = + +# Files or directories matching the regular expression patterns are skipped. The +# regex matches against base names, not paths. The default value ignores Emacs +# file locks +# ignore-patterns = + +# List of module names for which member attributes should not be checked (useful +# for modules/projects where namespaces are manipulated during runtime and thus +# existing member attributes cannot be deduced by static analysis). It supports +# qualified module names, as well as Unix pattern matching. +ignored-modules = ["aidge_core", "aidge_backend_cpu"] + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +# init-hook = + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. +jobs = 1 +# Control the amount of potential inferred values when inferring a single object. +# This can help the performance when dealing with large functions or complex, +# nested conditions. +limit-inference-results = 100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +# load-plugins = + +# Pickle collected data for later comparisons. +persistent = true + +# Minimum Python version to use for version dependent checks. Will default to the +# version used to run pylint. +py-version = "3.11" + +# Discover python modules and packages in the file system subtree. +# recursive = + +# Add paths to the list of the source roots. Supports globbing patterns. The +# source root is an absolute path or a path relative to the current working +# directory used to determine a package namespace for modules located under the +# source root. +# source-roots = + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode = true + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +# unsafe-load-any-extension = + +[tool.pylint.basic] +# Naming style matching correct argument names. +argument-naming-style = "snake_case" + +# Regular expression matching correct argument names. Overrides argument-naming- +# style. If left empty, argument names will be checked with the set naming style. +# argument-rgx = + +# Naming style matching correct attribute names. +attr-naming-style = "snake_case" + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. If left empty, attribute names will be checked with the set naming +# style. +# attr-rgx = + +# Bad variable names which should always be refused, separated by a comma. +bad-names = ["foo", "bar", "baz", "toto", "tutu", "tata"] + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +# bad-names-rgxs = + +# Naming style matching correct class attribute names. +class-attribute-naming-style = "any" + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. If left empty, class attribute names will be checked +# with the set naming style. +# class-attribute-rgx = + +# Naming style matching correct class constant names. +class-const-naming-style = "UPPER_CASE" + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. If left empty, class constant names will be checked with +# the set naming style. +# class-const-rgx = + +# Naming style matching correct class names. +class-naming-style = "PascalCase" + +# Regular expression matching correct class names. Overrides class-naming-style. +# If left empty, class names will be checked with the set naming style. +# class-rgx = + +# Naming style matching correct constant names. +const-naming-style = "UPPER_CASE" + +# Regular expression matching correct constant names. Overrides const-naming- +# style. If left empty, constant names will be checked with the set naming style. +# const-rgx = + +# Minimum line length for functions/classes that require docstrings, shorter ones +# are exempt. +docstring-min-length = -1 + +# Naming style matching correct function names. +function-naming-style = "snake_case" + +# Regular expression matching correct function names. Overrides function-naming- +# style. If left empty, function names will be checked with the set naming style. +# function-rgx = + +# Good variable names which should always be accepted, separated by a comma. +good-names = ["i", "j", "k", "ex", "Run", "_"] + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +# good-names-rgxs = + +# Include a hint for the correct naming format with invalid-name. +# include-naming-hint = + +# Naming style matching correct inline iteration names. +inlinevar-naming-style = "any" + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. If left empty, inline iteration names will be checked +# with the set naming style. +# inlinevar-rgx = + +# Naming style matching correct method names. +method-naming-style = "snake_case" + +# Regular expression matching correct method names. Overrides method-naming- +# style. If left empty, method names will be checked with the set naming style. +# method-rgx = + +# Naming style matching correct module names. +module-naming-style = "snake_case" + +# Regular expression matching correct module names. Overrides module-naming- +# style. If left empty, module names will be checked with the set naming style. +# module-rgx = + +# Colon-delimited sets of names that determine each other's naming style when the +# name regexes allow several styles. +# name-group = + +# Regular expression which should only match function or class names that do not +# require a docstring. +no-docstring-rgx = "^_" + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. These +# decorators are taken in consideration only for invalid-name. +property-classes = ["abc.abstractproperty"] + +# Regular expression matching correct type alias names. If left empty, type alias +# names will be checked with the set naming style. +# typealias-rgx = + +# Regular expression matching correct type variable names. If left empty, type +# variable names will be checked with the set naming style. +# typevar-rgx = + +# Naming style matching correct variable names. +variable-naming-style = "snake_case" + +# Regular expression matching correct variable names. Overrides variable-naming- +# style. If left empty, variable names will be checked with the set naming style. +# variable-rgx = + +[tool.pylint.classes] +# Warn about protected attribute access inside special methods +# check-protected-access-in-special-methods = + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods = ["__init__", "__new__", "setUp", "__post_init__"] + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected = ["_asdict", "_fields", "_replace", "_source", "_make"] + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg = ["cls"] + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg = ["cls"] + +[tool.pylint.design] +# List of regular expressions of class ancestor names to ignore when counting +# public methods (see R0903) +# exclude-too-few-public-methods = + +# List of qualified class names to ignore when counting class parents (see R0901) +# ignored-parents = + +# Maximum number of arguments for function / method. +max-args = 5 + +# Maximum number of attributes for a class (see R0902). +max-attributes = 7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr = 5 + +# Maximum number of branch for function / method body. +max-branches = 12 + +# Maximum number of locals for function / method body. +max-locals = 15 + +# Maximum number of parents for a class (see R0901). +max-parents = 7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods = 20 + +# Maximum number of return / yield for function / method body. +max-returns = 6 + +# Maximum number of statements in function / method body. +max-statements = 50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods = 2 + +[tool.pylint.exceptions] +# Exceptions that will emit a warning when caught. +overgeneral-exceptions = ["BaseException", "Exception"] + +[tool.pylint.format] +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +# expected-line-ending-format = + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines = "^\\s*(# )?<?https?://\\S+>?$" + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren = 4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string = " " + +# Maximum number of characters on a single line. +max-line-length = 200 + +# Maximum number of lines in a module. +max-module-lines = 1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +# single-line-class-stmt = + +# Allow the body of an if to be on the same line as the test if there is no else. +# single-line-if-stmt = + +[tool.pylint.imports] +# List of modules that can be imported at any level, not just the top level one. +# allow-any-import-level = + +# Allow explicit reexports by alias from a package __init__. +# allow-reexport-from-package = + +# Allow wildcard imports from modules that define __all__. +# allow-wildcard-with-all = + +# Deprecated modules which should not be used, separated by a comma. +# deprecated-modules = + +# Output a graph (.gv or any supported image format) of external dependencies to +# the given file (report RP0402 must not be disabled). +# ext-import-graph = + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be disabled). +# import-graph = + +# Output a graph (.gv or any supported image format) of internal dependencies to +# the given file (report RP0402 must not be disabled). +# int-import-graph = + +# Force import order to recognize a module as part of the standard compatibility +# libraries. +# known-standard-library = + +# Force import order to recognize a module as part of a third party library. +known-third-party = ["enchant"] + +# Couples of modules and preferred modules, separated by a comma. +# preferred-modules = + +[tool.pylint.logging] +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style = "old" + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules = ["logging"] + +[tool.pylint."messages control"] +# Only show warnings with the listed confidence levels. Leave empty to show all. +# Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, UNDEFINED. +confidence = ["HIGH", "CONTROL_FLOW", "INFERENCE", "INFERENCE_FAILURE", "UNDEFINED"] +# Disable the message, report, category or checker with the given id(s). You can +# either give multiple identifiers separated by comma (,) or put this option +# multiple times (only on the command line, not in the configuration file where +# it should appear only once). You can also use "--disable=all" to disable +# everything first and then re-enable specific checks. For example, if you want +# to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable = ["raw-checker-failed", "bad-inline-option", "locally-disabled", "file-ignored", "suppressed-message", "useless-suppression", "deprecated-pragma", "use-symbolic-message-instead", "use-implicit-booleaness-not-comparison-to-string", "use-implicit-booleaness-not-comparison-to-zero", "too-many-locals", "missing-class-docstring", "missing-function-docstring", "too-many-arguments", "protected-access", "too-many-branches", "too-many-ancestors", "wrong-import-order", "wrong-import-position"] +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where it +# should appear only once). See also the "--disable" option for examples. +enable = ["c-extension-no-member"] +[tool.pylint.method_args] +# List of qualified names (i.e., library.method) which require a timeout +# parameter e.g. 'requests.api.get,requests.api.post' +timeout-methods = ["requests.api.delete", "requests.api.get", "requests.api.head", "requests.api.options", "requests.api.patch", "requests.api.post", "requests.api.put", "requests.api.request"] + +[tool.pylint.miscellaneous] +# List of note tags to take in consideration, separated by a comma. +notes = ["FIXME", "XXX", "TODO"] + +# Regular expression of note tags to take in consideration. +# notes-rgx = + +[tool.pylint.refactoring] +# Maximum number of nested blocks for function / method body +max-nested-blocks = 5 + +# Complete name of functions that never returns. When checking for inconsistent- +# return-statements if a never returning function is called then it will be +# considered as an explicit return statement and no message will be printed. +never-returning-functions = ["sys.exit", "argparse.parse_error"] + +# Let 'consider-using-join' be raised when the separator to join on would be non- +# empty (resulting in expected fixes of the type: ``"- " + " - ".join(items)``) +suggest-join-with-non-empty-separator = true + +[tool.pylint.reports] +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each category, +# as well as 'statement' which is the total number of statements analyzed. This +# score is used by the global evaluation report (RP0004). +evaluation = "10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)" + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +# msg-template = + +# Set the output format. Available formats are: text, parseable, colorized, json2 +# (improved json format), json (old json format) and msvs (visual studio). You +# can also give a reporter class, e.g. mypackage.mymodule.MyReporterClass. +# output-format = + +# Tells whether to display a full report or only the messages. +# reports = + +# Activate the evaluation score. +score = true + +[tool.pylint.similarities] +# Comments are removed from the similarity computation +ignore-comments = true + +# Docstrings are removed from the similarity computation +ignore-docstrings = true + +# Imports are removed from the similarity computation +# ignore-imports = + +# Signatures are removed from the similarity computation +# ignore-signatures = + +# Minimum lines number of a similarity. +min-similarity-lines = 4 + +[tool.pylint.spelling] +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions = 4 + +# Spelling dictionary name. No available dictionaries : You need to install both +# the python package and the system dependency for enchant to work. +# spelling-dict = + +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives = "fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:" + +# List of comma separated words that should not be checked. +# spelling-ignore-words = + +# A path to a file that contains the private dictionary; one word per line. +# spelling-private-dict-file = + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +# spelling-store-unknown-words = + +[tool.pylint.typecheck] +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators = ["contextlib.contextmanager"] + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +# generated-members = + +# Tells whether missing members accessed in mixin class should be ignored. A +# class is considered mixin if its name matches the mixin-class-rgx option. +# Tells whether to warn about missing members when the owner of the attribute is +# inferred to be None. +ignore-none = true +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference can +# return multiple potential results while evaluating a Python object, but some +# branches might not be evaluated, which results in partial inference. In that +# case, it might be useful to still emit no-member and other checks for the rest +# of the inferred objects. +ignore-on-opaque-inference = true + +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins = ["no-member", "not-async-context-manager", "not-context-manager", "attribute-defined-outside-init"] + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes = ["optparse.Values", "thread._local", "_thread._local", "aidge.global_variables", "aidge.cells.abstract_cell.Trainable", "torch", "tensorflow"] + +# Show a hint with possible names when a member name was not found. The aspect of +# finding the hint is based on edit distance. +missing-member-hint = true + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance = 1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices = 1 + +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx = ".*[Mm]ixin" + +# List of decorators that change the signature of a decorated function. +# signature-mutators = + +[tool.pylint.variables] +# List of additional names supposed to be defined in builtins. Remember that you +# should avoid defining new builtins when possible. +# additional-builtins = + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables = true + +# List of names allowed to shadow builtins +# allowed-redefined-builtins = + +# List of strings which can identify a callback function by name. A callback name +# must start or end with one of those strings. +callbacks = ["cb_", "_cb"] + +# A regular expression matching the name of dummy variables (i.e. expected to not +# be used). +dummy-variables-rgx = "_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_" + +# Argument names that match this expression will be ignored. +ignored-argument-names = "_.*|^ignored_|^unused_" + +# Tells whether we should check for unused import in __init__ files. +# init-import = + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules = ["six.moves", "past.builtins", "future.builtins", "builtins", "io"] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 24ce15ab7ead32f98c7ac3edcd34bb2010ff4326..0000000000000000000000000000000000000000 --- a/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -numpy diff --git a/setup.py b/setup.py index 80500f3165dd87eb7b6dd73c78b89806cc8a874a..22cbd9732c8b9e1099c3e322032e8377f6d4506b 100644 --- a/setup.py +++ b/setup.py @@ -1,51 +1,39 @@ -#!/usr/bin/env python3 -""" Aidge - -#TODO To change -POC of the next framework named Aidge -""" - -DOCLINES = (__doc__ or '').split("\n") - +#! /usr/bin/env python3 import sys import os -# Python supported version checks -if sys.version_info[:2] < (3, 7): - raise RuntimeError("Python version >= 3.7 required.") - - -CLASSIFIERS = """\ -Development Status :: 2 - Pre-Alpha -""" - import shutil import pathlib -import subprocess import multiprocessing from math import ceil from setuptools import setup, Extension -from setuptools import find_packages from setuptools.command.build_ext import build_ext -def get_project_name() -> str: - return open(pathlib.Path().absolute() / "project_name.txt", "r").read() -def get_project_version() -> str: - aidge_root = pathlib.Path().absolute() - version = open(aidge_root / "version.txt", "r").read().strip() - return version +PROJECT_NAME = "aidge_backend_cpu" +SETUP_DIR = pathlib.Path(__file__).parent -class CMakeExtension(Extension): +class AidgeBuildExtension(Extension): def __init__(self, name): super().__init__(name, sources=[]) -class CMakeBuild(build_ext): + +class AidgePkgBuild(build_ext): + def __init__(self, dist, *args, **kwargs): + super().__init__(dist, *args, **kwargs) + # Detect editable_mode for old versions of setuptools + if not hasattr(self, "editable_mode"): + if hasattr(dist, "commands"): + self.editable_mode = "develop" in dist.commands + else: + self.editable_mode = False def run(self): + #################################### + # BUILD PACKAGE # This lists the number of processors available on the machine # The compilation will use half of them max_jobs = str(ceil(multiprocessing.cpu_count() / 2)) @@ -60,55 +48,54 @@ class CMakeBuild(build_ext): if not build_lib.exists(): build_lib.mkdir(parents=True, exist_ok=True) - os.chdir(str(build_temp)) + package_prefix = build_lib if not self.editable_mode else SETUP_DIR + pybind_install_prefix = (package_prefix / PROJECT_NAME).absolute() - # Impose to use the executable of the python - # used to launch setup.py to setup PythonInterp - param_py = "-DPYTHON_EXECUTABLE=" + sys.executable + os.chdir(str(build_temp)) - compile_type = 'Debug' - install_path = os.path.join(sys.prefix, "lib", "libAidge") if "AIDGE_INSTALL" not in os.environ else os.environ["AIDGE_INSTALL"] + compile_type = os.environ.get("AIDGE_PYTHON_BUILD_TYPE", "Release") + install_path = ( + os.path.join(sys.prefix, "lib", "libAidge") + if "AIDGE_INSTALL" not in os.environ + else os.environ["AIDGE_INSTALL"] + ) + build_gen = os.environ.get("AIDGE_BUILD_GEN", "") + build_gen_opts = ( + ["-G", build_gen] + if build_gen + else [] + ) + test_onoff = os.environ.get("AIDGE_BUILD_TEST", "OFF") + + self.spawn( + [ + "cmake", + *build_gen_opts, + str(cwd), + f"-DTEST={test_onoff}", + f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}", + f"-DCMAKE_BUILD_TYPE={compile_type}", + "-DPYBIND=ON", + f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}", + "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", + "-DCOVERAGE=OFF", + ] + ) - self.spawn(['cmake', str(cwd), param_py, '-DTEST=OFF', f'-DCMAKE_INSTALL_PREFIX:PATH={install_path}', f'-DCMAKE_BUILD_TYPE={compile_type}']) if not self.dry_run: - self.spawn(['cmake', '--build', '.', '--config', compile_type, '-j', max_jobs]) - self.spawn(['cmake', '--install', '.', '--config', compile_type]) + self.spawn( + ["cmake", "--build", ".", "--config", compile_type, "-j", max_jobs] + ) + self.spawn(["cmake", "--install", ".", "--config", compile_type]) os.chdir(str(cwd)) - aidge_package = build_lib / (get_project_name()) - - # Get "aidge core" package - # ext_lib = build_temp - print(build_temp.absolute()) - # Copy all shared object files from build_temp/lib to aidge_package - for root, _, files in os.walk(build_temp.absolute()): - for file in files: - if (file.endswith('.so') or file.endswith('.pyd')) and (root != str(aidge_package.absolute())): - currentFile=os.path.join(root, file) - shutil.copy(currentFile, str(aidge_package.absolute())) - - # Copy version.txt in aidge_package - os.chdir(os.path.dirname(__file__)) - shutil.copy("version.txt", str(aidge_package.absolute())) - - -if __name__ == '__main__': +if __name__ == "__main__": setup( - name=get_project_name(), - version=get_project_version(), - python_requires='>=3.7', - description=DOCLINES[0], - long_description_content_type="text/markdown", - long_description="\n".join(DOCLINES[2:]), - classifiers=[c for c in CLASSIFIERS.split('\n') if c], - packages=find_packages(where="."), include_package_data=True, - ext_modules=[CMakeExtension(get_project_name())], + ext_modules=[AidgeBuildExtension(PROJECT_NAME)], cmdclass={ - 'build_ext': CMakeBuild, + "build_ext": AidgePkgBuild, }, - install_requires=['aidge_core'], zip_safe=False, - ) diff --git a/src/operator/AbsImpl.cpp b/src/operator/AbsImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..130d6cf7a64e1e75b8ef128974101a477f802caf --- /dev/null +++ b/src/operator/AbsImpl.cpp @@ -0,0 +1,40 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/AbsImpl.hpp" + +#include <memory> +#include <vector> + +#include "aidge/backend/cpu/operator/AbsImpl_kernels.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Abs.hpp" +#include "aidge/utils/Types.h" + +template <> +void Aidge::AbsImpl_cpu::forward() { + const Abs_Op& op = static_cast<const Abs_Op&>(mOp); + + // Find the correct kernel type + const auto impl = Registrar<AbsImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward( + op.getInput(0)->size(), + op.getInput(0)->getImpl()->rawPtr(), + op.getOutput(0)->getImpl()->rawPtr() + ); +} + +template <> +void Aidge::AbsImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Abs_Op on backend cpu"); +} diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp index d6d75a608e4da7d8b9ed8a28912ff2eb1751e042..457a0b17e531fac35ff873f9eedca7bbbe82d459 100644 --- a/src/operator/AddImpl.cpp +++ b/src/operator/AddImpl.cpp @@ -16,69 +16,57 @@ #include <vector> #include "aidge/backend/cpu/data/GetCPUPtr.h" -#include "aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/AddImpl_kernels.hpp" #include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/utils/Types.h" #include "aidge/utils/ErrorHandling.hpp" -Aidge::Elts_t Aidge::AddImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} - +template <> void Aidge::AddImpl_cpu::forward() { - const auto& opTensor = static_cast<const OperatorTensor&>(mOp); - AIDGE_ASSERT(opTensor.getInput(0)->hasImpl(), "cannot run Add forward because the 0-th input has no implementation."); - assert(opTensor.getInput(0) && "missing input in Add operator"); - DataType datatypeFirstInput = opTensor.getInput(0)->dataType(); - for (IOIndex_t i = 1; i < opTensor.nbInputs(); ++i) { - AIDGE_ASSERT(opTensor.getInput(i)->hasImpl(), "cannot run Add forward because the {}-th input has no implementation.", i); - assert(opTensor.getInput(i) && "missing input in Add operator"); - assert(opTensor.getInput(i)->dataType() == datatypeFirstInput); + const Add_Op& op = static_cast<const Add_Op&>(mOp); + // Check inputs + AIDGE_ASSERT(op.getInput(0), "missing input in Add operator"); + AIDGE_ASSERT(op.getInput(0)->hasImpl(), "cannot run Add forward because the 0-th input has no implementation."); + DataType datatypeFirstInput = op.getInput(0)->dataType(); + for (IOIndex_t i = 1; i < op.nbInputs(); ++i) { + AIDGE_ASSERT(op.getInput(i), "missing input in Add operator"); + AIDGE_ASSERT(op.getInput(i)->hasImpl(), "cannot run Add forward because the {}-th input has no implementation.", i); + AIDGE_ASSERT(op.getInput(i)->dataType() == datatypeFirstInput, "Cannot add inputs with two differents data type."); } // Find the correct kernel type - const auto outputDataType = opTensor.getOutput(0)->dataType(); - const Registrar<AddImplForward_cpu>::registrar_key registrarKey = { - datatypeFirstInput, - outputDataType}; - - Registrar<AddImplForward_cpu>::registrar_type kernelFunc; - if (Registrar<AddImplForward_cpu>::exists(registrarKey)) { - // One exists with the right inputs/output types - kernelFunc = Registrar<AddImplForward_cpu>::create(registrarKey); - } - else { - // Otherwise, fallback to the kernel with all types matching output type - kernelFunc = Registrar<AddImplForward_cpu>::create({ - outputDataType, outputDataType}); - } + const auto impl = Registrar<AddImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each // call to forward(). We might put the following shared_ptr as members of // this class to avoid that. - const std::size_t nbDims = opTensor.getOutput(0)->nbDims(); + const std::size_t nbDims = op.getOutput(0)->nbDims(); std::vector<std::vector<std::size_t>> inputsDims; std::vector<const void*> opInputs; - std::vector<std::shared_ptr<Tensor>> inputsFallback(opTensor.nbInputs()); - for (IOIndex_t i = 0; i < opTensor.nbInputs(); ++i) { + std::vector<std::shared_ptr<Tensor>> inputsFallback(op.nbInputs()); + for (IOIndex_t i = 0; i < op.nbInputs(); ++i) { std::vector<std::size_t> inputDims(nbDims, 1); - auto dims = opTensor.getInput(i)->dims(); + auto dims = op.getInput(i)->dims(); for(std::size_t j=dims.size()-1; j+1>0; --j) { std::size_t idx = nbDims - (dims.size()-j); inputDims[idx] = dims[j]; } inputsDims.push_back(inputDims); - const auto& input = opTensor.getInput(i)->refCastFrom(inputsFallback[i], *opTensor.getOutput(0)); + const auto& input = op.getInput(i)->refCastFrom(inputsFallback[i], *op.getOutput(0)); opInputs.push_back(input.getImpl()->rawPtr()); } - kernelFunc(opInputs, + impl.forward(opInputs, inputsDims, - opTensor.getOutput(0)->size(), - opTensor.getOutput(0)->dims(), - getCPUPtr(opTensor.getRawOutput(0))); + op.getOutput(0)->size(), + op.getOutput(0)->dims(), + getCPUPtr(op.getRawOutput(0))); +} + +template <> +void Aidge::AddImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Add_Op on backend cpu"); } diff --git a/src/operator/AndImpl.cpp b/src/operator/AndImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2e0f59769ad86f6e4143ab59d089706e34792244 --- /dev/null +++ b/src/operator/AndImpl.cpp @@ -0,0 +1,49 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/operator/And.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" + +#include "aidge/backend/cpu/operator/AndImpl.hpp" +#include "aidge/backend/cpu/operator/AndImpl_kernels.hpp" + +template <> +void Aidge::AndImpl_cpu::forward() { + const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); + const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); + + + // Find the correct kernel type + const auto impl = Registrar<AndImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward(inputDims0, + inputDims1, + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); +} + +template <> +void Aidge::AndImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for And_Op on backend cpu"); +} diff --git a/src/operator/ArgMaxImpl.cpp b/src/operator/ArgMaxImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b8fb85a7cd86a788cda69307d5ed8f363619f9f0 --- /dev/null +++ b/src/operator/ArgMaxImpl.cpp @@ -0,0 +1,39 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/ArgMaxImpl.hpp" + +#include <memory> +#include <vector> + +#include "aidge/utils/Types.h" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/backend/cpu/operator/ArgMaxImpl_kernels.hpp" + +template <> +void Aidge::ArgMaxImpl_cpu::forward() { + const ArgMax_Op& op_ = dynamic_cast<const ArgMax_Op&>(mOp); + + // Find the correct kernel type + const auto impl = Registrar<ArgMaxImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward(op_.axis(), + op_.selectLastIndex(), + op_.getInput(0)->dims(), + op_.getInput(0)->getImpl()->rawPtr(), + op_.getOutput(0)->getImpl()->rawPtr()); +} + +template <> +void Aidge::ArgMaxImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ArgMax_Op on backend cpu"); +} diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp index feaa7e67a8d0bc726462aed99e557493d3b8d0c6..01a5e8cf1772161f5cf98d3a8bd52f43ac7a1d0d 100644 --- a/src/operator/AvgPoolingImpl.cpp +++ b/src/operator/AvgPoolingImpl.cpp @@ -16,29 +16,29 @@ #include <vector> #include "aidge/backend/cpu/data/GetCPUPtr.h" -#include "aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/AvgPooling.hpp" #include "aidge/utils/Types.h" -Aidge::Elts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} - +template <> void Aidge::AvgPoolingImpl2D_cpu::forward() { const auto& op_ = dynamic_cast<const AvgPooling_Op<2>&>(mOp); assert(op_.getInput(0) && "missing input #0"); // Find the correct kernel type - auto kernelFunc = Registrar<AvgPoolingImpl2DForward_cpu>::create( - {op_.getInput(0)->dataType(), - op_.getOutput(0)->dataType()}); + const auto impl = Registrar<AvgPoolingImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(op_.strideDims(), + impl.forward(op_.strideDims(), op_.kernelDims(), op_.getInput(0)->template dims<4>(), getCPUPtr(op_.getInput(0)), getCPUPtr(op_.getOutput(0))); } + +template <> +void Aidge::AvgPoolingImpl2D_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for AvgPooling_Op<2> on backend cpu"); +} + diff --git a/src/operator/BatchNormImpl.cpp b/src/operator/BatchNormImpl.cpp index 3046eea9bd241732daf39cce1783b5ee50de01c7..9f1d986e63f14e6038c80054e5e3bc631ec24224 100644 --- a/src/operator/BatchNormImpl.cpp +++ b/src/operator/BatchNormImpl.cpp @@ -19,13 +19,9 @@ #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/BatchNorm.hpp" -#include "aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp" - -Aidge::Elts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} +#include "aidge/backend/cpu/operator/BatchNormImpl_kernels.hpp" +template <> void Aidge::BatchNormImpl2D_cpu::forward() { const auto& op_ = dynamic_cast<const BatchNorm_Op<2>&>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0 for BatchNorm Operator"); @@ -35,14 +31,12 @@ void Aidge::BatchNormImpl2D_cpu::forward() { AIDGE_ASSERT(op_.getInput(4), "missing input #4 for BatchNorm Operator"); AIDGE_ASSERT(op_.getOutput(0)->nbDims() == 4, ""); + // Find the correct kernel type - auto kernelFunc = - Registrar<BatchNormImpl2DForward_cpu>::create({op_.getInput(0)->dataType(), - op_.getInput(1)->dataType(), - op_.getOutput(0)->dataType()}); + const auto impl = Registrar<BatchNormImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(op_.epsilon(), + impl.forward(op_.epsilon(), op_.momentum(), op_.getInput(0)->template dims<4>(), getCPUPtr(op_.getRawInput(0)), @@ -53,3 +47,8 @@ void Aidge::BatchNormImpl2D_cpu::forward() { getCPUPtr(op_.getRawOutput(0)), true); } + +template <> +void Aidge::BatchNormImpl2D_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for BatchNorm_Op<2> on backend cpu"); +} diff --git a/src/operator/BitShiftImpl.cpp b/src/operator/BitShiftImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1e0f79fd29fd140f0b41c64d245b9b240da80028 --- /dev/null +++ b/src/operator/BitShiftImpl.cpp @@ -0,0 +1,57 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> +#include <thread> // std::this_thread::sleep_for +#include <vector> + + +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/Broadcasting.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" + +#include "aidge/backend/cpu/operator/BitShiftImpl.hpp" +#include "aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp" + +template<> +void Aidge::BitShiftImpl_cpu::forward() { + + const auto& op_ = dynamic_cast<const BitShift_Op&>(mOp); + + + const auto impl = Registrar<BitShiftImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + + const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); + const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); + + BitShift_Op::BitShiftDirection direction = op_.direction(); + + // Call kernel + impl.forward( + direction, + inputDims0, + inputDims1, + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); + +} + +template <> +void Aidge::BitShiftImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for BitShift_Op on backend cpu"); +} \ No newline at end of file diff --git a/src/operator/ConstantOfShapeImpl.cpp b/src/operator/ConstantOfShapeImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..16e4b762ba04e5f01bfccf965f6de3650fa2e734 --- /dev/null +++ b/src/operator/ConstantOfShapeImpl.cpp @@ -0,0 +1,44 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp" + +#include <functional> +#include <memory> +#include <vector> + +#include "aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp" +#include "aidge/data/Data.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/ConstantOfShape.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +template <> +void Aidge::ConstantOfShapeImpl_cpu::forward() { + const ConstantOfShape_Op &op_ = static_cast<const ConstantOfShape_Op &>(mOp); + // Check if input is provided + AIDGE_ASSERT(op_.getInput(0), "{} : Missing input 0", __func__); + + // Find the correct kernel type + const auto impl = Registrar<ConstantOfShapeImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward(op_.getOutput(0)->dims(), + op_.value(), + op_.getOutput(0)->getImpl()->rawPtr()); +} + +template <> +void Aidge::ConstantOfShapeImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ConstantOfShape_Op on backend cpu"); +} diff --git a/src/operator/ConvDepthWiseImpl.cpp b/src/operator/ConvDepthWiseImpl.cpp index 591e8a0637d1e52c75193ac1750a210a08815ccc..d86bba8d1abf348eb25e2d9c69d04b5c33a8a176 100644 --- a/src/operator/ConvDepthWiseImpl.cpp +++ b/src/operator/ConvDepthWiseImpl.cpp @@ -15,18 +15,13 @@ #include <vector> #include "aidge/backend/cpu/data/GetCPUPtr.h" -#include "aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/ConvDepthWise.hpp" #include "aidge/utils/Log.hpp" #include "aidge/utils/Types.h" - -Aidge::Elts_t Aidge::ConvDepthWiseImpl1D_cpu::getNbRequiredProtected(Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} - +template <> void Aidge::ConvDepthWiseImpl1D_cpu::forward() { const auto& op_ = dynamic_cast<const ConvDepthWise_Op<1>&>(mOp); @@ -36,23 +31,7 @@ void Aidge::ConvDepthWiseImpl1D_cpu::forward() { AIDGE_ASSERT((op_.getInput(0)->nbDims() == 3), "support for 4-dimensions tensors only"); // Find the correct kernel type - const auto outputDataType = op_.getOutput(0)->dataType(); - const Registrar<ConvDepthWiseImpl1DForward_cpu>::registrar_key registrarKey = { - op_.getInput(0)->dataType(), - op_.getInput(1)->dataType(), - ((op_.getInput(2)) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()), - outputDataType}; - - Registrar<ConvDepthWiseImpl1DForward_cpu>::registrar_type kernelFunc; - if (Registrar<ConvDepthWiseImpl1DForward_cpu>::exists(registrarKey)) { - // One exists with the right inputs/output types - kernelFunc = Registrar<ConvDepthWiseImpl1DForward_cpu>::create(registrarKey); - } - else { - // Otherwise, fallback to the kernel with all types matching output type - kernelFunc = Registrar<ConvDepthWiseImpl1DForward_cpu>::create({ - outputDataType, outputDataType, outputDataType, outputDataType}); - } + const auto impl = Registrar<ConvDepthWiseImpl1D_cpu>::create(getBestMatch(getRequiredSpec())); // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each @@ -64,7 +43,7 @@ void Aidge::ConvDepthWiseImpl1D_cpu::forward() { const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); // Call kernel - kernelFunc(op_.strideDims(), + impl.forward(op_.strideDims(), op_.dilationDims(), op_.kernelDims(), // Conv attributes op_.getInput(0)->template dims<3>(), // input dimensions @@ -75,11 +54,12 @@ void Aidge::ConvDepthWiseImpl1D_cpu::forward() { ); } -Aidge::Elts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); +template <> +void Aidge::ConvDepthWiseImpl1D_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ConvDepthWise_Op<1> on backend cpu"); } +template <> void Aidge::ConvDepthWiseImpl2D_cpu::forward() { const auto& op_ = dynamic_cast<const ConvDepthWise_Op<2>&>(mOp); @@ -90,11 +70,7 @@ void Aidge::ConvDepthWiseImpl2D_cpu::forward() { AIDGE_ASSERT((op_.getInput(0)->nbDims() == 4), "support for 4-dimensions tensors only"); // Find the correct kernel type - auto kernelFunc = Registrar<ConvDepthWiseImpl2DForward_cpu>::create( - {op_.getInput(0)->dataType(), - op_.getInput(1)->dataType(), - op_.getInput(2)->dataType(), - op_.getOutput(0)->dataType()}); + const auto impl = Registrar<ConvDepthWiseImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each @@ -106,7 +82,7 @@ void Aidge::ConvDepthWiseImpl2D_cpu::forward() { const auto& input2 = op_.getInput(2) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); // Call kernel - kernelFunc(op_.strideDims(), + impl.forward(op_.strideDims(), op_.dilationDims(), op_.kernelDims(), op_.getInput(0)->template dims<4>(), @@ -115,3 +91,8 @@ void Aidge::ConvDepthWiseImpl2D_cpu::forward() { op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, getCPUPtr(op_.getRawOutput(0))); } + +template <> +void Aidge::ConvDepthWiseImpl2D_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ConvDepthWise_Op<2> on backend cpu"); +} diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index 0be31befe2019d70b628db878443f14b1d622f1c..fdfe19fbf4bf3e71c86aa28b966cfb21a1b5ba40 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -10,6 +10,7 @@ ********************************************************************************/ #include "aidge/backend/cpu/operator/ConvImpl.hpp" +#include "aidge/backend/cpu/operator/ConvImpl_kernels.hpp" #include <cassert> #include <chrono> // std::chrono::milliseconds @@ -18,40 +19,19 @@ #include <vector> #include "aidge/backend/cpu/data/GetCPUPtr.h" -#include "aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp" #include "aidge/operator/Conv.hpp" #include "aidge/utils/Types.h" -Aidge::Elts_t Aidge::ConvImpl1D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} - +template <> void Aidge::ConvImpl1D_cpu::forward() { const auto& op_ = static_cast<const Conv_Op<1>&>(mOp); // FIXME: uncomment the following code once memory handling will work -AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); // Find the correct kernel type - const auto outputDataType = op_.getOutput(0)->dataType(); - const Registrar<ConvImpl1DForward_cpu>::registrar_key registrarKey = { - op_.getInput(0)->dataType(), - op_.getInput(1)->dataType(), - (op_.getInput(2) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()), - outputDataType}; - - Registrar<ConvImpl1DForward_cpu>::registrar_type kernelFunc; - if (Registrar<ConvImpl1DForward_cpu>::exists(registrarKey)) { - // One exists with the right inputs/output types - kernelFunc = Registrar<ConvImpl1DForward_cpu>::create(registrarKey); - } - else { - // Otherwise, fallback to the kernel with all types matching output type - kernelFunc = Registrar<ConvImpl1DForward_cpu>::create({ - outputDataType, outputDataType, outputDataType, outputDataType}); - } + const auto impl = Registrar<ConvImpl1D_cpu>::create(getBestMatch(getRequiredSpec())); // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each @@ -63,11 +43,11 @@ AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); // Call kernel - kernelFunc(op_.strideDims(), + impl.forward(op_.strideDims(), op_.dilationDims(), op_.kernelDims(), op_.getInput(0)->template dims<3>(), // input dimensions - dynamic_cast<const Conv_Op<2>&>(mOp).outChannels(), // outChannels + dynamic_cast<const Conv_Op<1>&>(mOp).outChannels(), // outChannels input0.getImpl()->rawPtr(), // input input1.getImpl()->rawPtr(), // weight op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias @@ -75,11 +55,12 @@ AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); ); } -Aidge::Elts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); +template <> +void Aidge::ConvImpl1D_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Conv_Op<1> on backend cpu"); } +template <> void Aidge::ConvImpl2D_cpu::forward() { const auto& op_ = dynamic_cast<const Conv_Op<2>&>(mOp); @@ -88,23 +69,7 @@ void Aidge::ConvImpl2D_cpu::forward() { AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); // Find the correct kernel type - const auto outputDataType = op_.getOutput(0)->dataType(); - const Registrar<ConvImpl2DForward_cpu>::registrar_key registrarKey = { - op_.getInput(0)->dataType(), - op_.getInput(1)->dataType(), - (op_.getInput(2) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()), - outputDataType}; - - Registrar<ConvImpl2DForward_cpu>::registrar_type kernelFunc; - if (Registrar<ConvImpl2DForward_cpu>::exists(registrarKey)) { - // One exists with the right inputs/output types - kernelFunc = Registrar<ConvImpl2DForward_cpu>::create(registrarKey); - } - else { - // Otherwise, fallback to the kernel with all types matching output type - kernelFunc = Registrar<ConvImpl2DForward_cpu>::create({ - outputDataType, outputDataType, outputDataType, outputDataType}); - } + const auto impl = Registrar<ConvImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each @@ -116,7 +81,7 @@ void Aidge::ConvImpl2D_cpu::forward() { const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); // Call kernel - kernelFunc(op_.strideDims(), + impl.forward(op_.strideDims(), op_.dilationDims(), op_.kernelDims(), op_.getInput(0)->template dims<4>(), // input dimensions @@ -127,3 +92,8 @@ void Aidge::ConvImpl2D_cpu::forward() { getCPUPtr(mOp.getRawOutput(0)) // output ); } + +template <> +void Aidge::ConvImpl2D_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Conv_Op<2> on backend cpu"); +} diff --git a/src/operator/DivImpl.cpp b/src/operator/DivImpl.cpp index cfd74be45b29852c89e4a27035ce2d38fc7266cc..135b32b5005a961e55910e758f9b7102ca51b63c 100644 --- a/src/operator/DivImpl.cpp +++ b/src/operator/DivImpl.cpp @@ -15,15 +15,11 @@ #include "aidge/backend/cpu/data/Broadcasting.hpp" #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/DivImpl.hpp" -#include "aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/DivImpl_kernels.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/utils/Types.h" -Aidge::Elts_t Aidge::DivImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} - +template <> void Aidge::DivImpl_cpu::forward() { // Find the correct kernel type // auto kernelFunc = Registrar<DivImplForward_cpu>::create({ @@ -60,10 +56,7 @@ void Aidge::DivImpl_cpu::forward() { const auto& opTensor = static_cast<const Div_Op&>(mOp); // Find the correct kernel type - auto kernelFunc = Registrar<DivImplForward_cpu>::create({ - opTensor.getInput(0)->dataType(), - opTensor.getInput(1)->dataType(), - opTensor.getOutput(0)->dataType()}); + const auto impl = Registrar<DivImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Compute compatible input dimensions std::vector<std::size_t> dims0 = opTensor.getInput(0)->dims(); @@ -73,7 +66,7 @@ void Aidge::DivImpl_cpu::forward() { // special case for equal dimensions, the kernel is called with the entire arrays at once if (dims0 == dims1) { const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>()); - kernelFunc(input0_contiguous_size, input0_contiguous_size, input0_contiguous_size, + impl.forward(input0_contiguous_size, input0_contiguous_size, input0_contiguous_size, getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)), getCPUPtr(mOp.getRawOutput(0))); @@ -139,7 +132,7 @@ void Aidge::DivImpl_cpu::forward() { std::size_t dim = contiguousIdx - 1; const std::size_t nbStacks = std::accumulate(outDims.cbegin(), outDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>()); for (std::size_t stack = 0; stack < nbStacks;) { - kernelFunc(input0_contiguous_size, input1_contiguous_size, output_contiguous_size, + impl.forward(input0_contiguous_size, input1_contiguous_size, output_contiguous_size, getCPUPtr(mOp.getRawInput(0), offsetIn0*input0_contiguous_size), getCPUPtr(mOp.getRawInput(1), offsetIn1*input1_contiguous_size), getCPUPtr(mOp.getRawOutput(0), offsetOut*output_contiguous_size)); @@ -156,3 +149,8 @@ void Aidge::DivImpl_cpu::forward() { } } } + +template <> +void Aidge::DivImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Div_Op on backend cpu"); +} diff --git a/src/operator/ErfImpl.cpp b/src/operator/ErfImpl.cpp index ace098468c05b80c4116e6f85d00b5fabaf754cd..42c6ce878abe227f74d7df4a9bf31ebc4c63eb88 100644 --- a/src/operator/ErfImpl.cpp +++ b/src/operator/ErfImpl.cpp @@ -14,29 +14,27 @@ #include <memory> #include <vector> -#include "aidge/backend/cpu/operator/ErfImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/ErfImpl_kernels.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/Erf.hpp" #include "aidge/utils/Types.h" -Aidge::Elts_t Aidge::ErfImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} - +template <> void Aidge::ErfImpl_cpu::forward() { const Erf_Op& op = static_cast<const Erf_Op&>(mOp); // Find the correct kernel type - auto kernelFunc = Registrar<ErfImplForward_cpu>::create({ - op.getInput(0)->dataType(), - op.getOutput(0)->dataType() - }); + const auto impl = Registrar<ErfImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc( + impl.forward( op.getInput(0)->size(), op.getInput(0)->getImpl()->rawPtr(), op.getOutput(0)->getImpl()->rawPtr() ); } + +template <> +void Aidge::ErfImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Erf_Op on backend cpu"); +} diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp index f7eebb7b21512fb3b388b6927409fba9a1d92b34..359452712f94be078122266089cc1da89baf50d5 100644 --- a/src/operator/FCImpl.cpp +++ b/src/operator/FCImpl.cpp @@ -17,37 +17,20 @@ #include <tuple> #include "aidge/backend/cpu/data/GetCPUPtr.h" -#include "aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp" -#include "aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/FCImpl_kernels.hpp" #include "aidge/operator/FC.hpp" #include "aidge/utils/ErrorHandling.hpp" #include "aidge/utils/Types.h" +template <> void Aidge::FCImpl_cpu::forward() { const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0"); AIDGE_ASSERT(op_.getInput(1), "missing input #1"); - // Find the correct kernel type - const auto outputDataType = op_.getOutput(0)->dataType(); - const Registrar<FCImplForward_cpu>::registrar_key registrarKey = { - op_.getInput(0)->dataType(), - op_.getInput(1)->dataType(), - ((op_.getInput(2)) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()), - outputDataType}; - - Registrar<FCImplForward_cpu>::registrar_type kernelFunc; - if (Registrar<FCImplForward_cpu>::exists(registrarKey)) { - // One exists with the right inputs/output types - kernelFunc = Registrar<FCImplForward_cpu>::create(registrarKey); - } - else { - // Otherwise, fallback to the kernel with all types matching output type - kernelFunc = Registrar<FCImplForward_cpu>::create({ - outputDataType, outputDataType, outputDataType, outputDataType}); - } + const auto impl = Registrar<FCImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each @@ -60,7 +43,7 @@ void Aidge::FCImpl_cpu::forward() // Call kernel const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1; - kernelFunc(batchSize, + impl.forward(batchSize, input1.dims()[1], // nb input features input1.dims()[0], // nb output features input0.getImpl()->rawPtr(), @@ -69,6 +52,7 @@ void Aidge::FCImpl_cpu::forward() getCPUPtr(mOp.getRawOutput(0))); } +template <> void Aidge::FCImpl_cpu::backward() { const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp); @@ -77,23 +61,7 @@ void Aidge::FCImpl_cpu::backward() AIDGE_ASSERT(op_.getInput(0)->grad(), "missing input #0 gradient"); AIDGE_ASSERT(op_.getInput(1)->grad(), "missing input #1 gradient"); - // Find the correct kernel type - const Registrar<FCImplBackward_cpu>::registrar_key registrarKey = { - fc_grad->dataType(), - op_.getInput(1)->grad()->dataType(), - (op_.getInput(2)) ? op_.getInput(2)->grad()->dataType() : op_.getInput(1)->grad()->dataType(), - op_.getInput(0)->grad()->dataType()}; - - Registrar<FCImplBackward_cpu>::registrar_type kernelFunc; - if (Registrar<FCImplBackward_cpu>::exists(registrarKey)) { - // One exists with the right inputs/output types - kernelFunc = Registrar<FCImplBackward_cpu>::create(registrarKey); - } - else { - // Otherwise, fallback to the kernel with all types matching output type - kernelFunc = Registrar<FCImplBackward_cpu>::create({ - fc_grad->dataType(), fc_grad->dataType(), fc_grad->dataType(), fc_grad->dataType()}); - } + const auto impl = Registrar<FCImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Convert input data (no overhead if not needed!) // TODO: right now, if needed, memory will be allocated/deallocated at each @@ -106,7 +74,7 @@ void Aidge::FCImpl_cpu::backward() // Call kernel const auto batchSize = (input0grad.dims().size() > 1) ? input0grad.dims()[0] : 1; - kernelFunc(batchSize, + impl.backward(batchSize, input1grad.dims()[1], // nb input features input1grad.dims()[0], // nb output features getCPUPtr(fc_grad), diff --git a/src/operator/FoldImpl.cpp b/src/operator/FoldImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..10f3d7b50bac9a1fbfc403609bdccb67a79cceac --- /dev/null +++ b/src/operator/FoldImpl.cpp @@ -0,0 +1,46 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/operator/Conv.hpp" + +#include "aidge/backend/cpu/operator/FoldImpl.hpp" +#include "aidge/backend/cpu/operator/FoldImpl_kernels.hpp" + +template <> +void Aidge::FoldImpl2D_cpu::forward() { + const auto& op_ = static_cast<const Fold_Op<2>&>(mOp); + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); + + // Find the correct kernel type + const auto impl = Registrar<FoldImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward(op_.outputDims(), + op_.strideDims(), + op_.dilationDims(), + op_.kernelDims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); +} + +template <> +void Aidge::FoldImpl2D_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Fold_Op<2> on backend cpu"); +} diff --git a/src/operator/GlobalAveragePoolingImpl.cpp b/src/operator/GlobalAveragePoolingImpl.cpp index f7280360a4486fe5db6c4dfdd4c492bbe6ba302b..c53f92e199aee30d55ddafe39b5ef121979acbf7 100644 --- a/src/operator/GlobalAveragePoolingImpl.cpp +++ b/src/operator/GlobalAveragePoolingImpl.cpp @@ -15,7 +15,7 @@ #include <memory> #include <vector> -#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp" #include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/GlobalAveragePooling.hpp" @@ -24,18 +24,23 @@ #include "aidge/utils/Types.h" +template <> void Aidge::GlobalAveragePoolingImpl_cpu::forward() { const GlobalAveragePooling_Op& op_ = static_cast<const GlobalAveragePooling_Op&>(mOp); // Check if input is provided AIDGE_ASSERT(op_.getInput(0), "missing input 0"); - // Create the forward kernal with the wanted types - auto kernelFunc = Registrar<GlobalAveragePoolingImplForward_cpu>::create({op_.getInput(0)->dataType(), - op_.getOutput(0)->dataType()}); + // Find the correct kernel type + const auto impl = Registrar<GlobalAveragePoolingImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(op_.getInput(0)->dims(), + impl.forward(op_.getInput(0)->dims(), op_.getInput(0)->getImpl()->rawPtr(), op_.getOutput(0)->getImpl()->rawPtr()); -} \ No newline at end of file +} + +template <> +void Aidge::GlobalAveragePoolingImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for GlobalAveragePooling_Op on backend cpu"); +} diff --git a/src/operator/GridSampleImpl.cpp b/src/operator/GridSampleImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5b87390fc3de21d5d406d893e4827e80cce06c35 --- /dev/null +++ b/src/operator/GridSampleImpl.cpp @@ -0,0 +1,48 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/GridSampleImpl.hpp" + +#include <functional> +#include <vector> + +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/backend/cpu/operator/GridSampleImpl_kernels.hpp" +#include "aidge/operator/GridSample.hpp" +#include "aidge/utils/Types.h" + +template <> +void Aidge::GridSampleImpl_cpu::forward() { + const auto& op_ = static_cast<const GridSample_Op&>(mOp); + + // Find the correct kernel type + const auto impl = Registrar<GridSampleImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback; + const auto& input0 = std::make_shared<Tensor>(op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0))); + const auto& input1 = std::make_shared<Tensor>(op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0))); + + // Call kernel + impl.forward(op_, + input0, // input + input1, // grid + op_.getOutput(0) // output + ); +} + +template <> +void Aidge::GridSampleImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for GridSample_Op on backend cpu"); +} diff --git a/src/operator/LeakyReLUImpl.cpp b/src/operator/LeakyReLUImpl.cpp index 9d4f2a7edcdf263751ec1d9cea10cd4d60055610..6c0802dd967d2a20b34a2f1ca91fc0640c063c83 100644 --- a/src/operator/LeakyReLUImpl.cpp +++ b/src/operator/LeakyReLUImpl.cpp @@ -14,20 +14,14 @@ #include <vector> #include "aidge/backend/cpu/data/GetCPUPtr.h" -#include "aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp" -#include "aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp" +#include "aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/LeakyReLU.hpp" #include "aidge/utils/Log.hpp" #include "aidge/utils/Types.h" #include "aidge/utils/Registrar.hpp" - -Aidge::Elts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} - +template <> void Aidge::LeakyReLUImpl_cpu::forward() { const LeakyReLU_Op& op_ = dynamic_cast<const LeakyReLU_Op&>(mOp); @@ -36,17 +30,16 @@ void Aidge::LeakyReLUImpl_cpu::forward() { AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - auto kernelFunc = Registrar<LeakyReLUImplForward_cpu>::create({ - in0->dataType(), - out0->dataType()}); + const auto impl = Registrar<LeakyReLUImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(op_.negativeSlope(), + impl.forward(op_.negativeSlope(), in0->size(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } +template <> void Aidge::LeakyReLUImpl_cpu::backward() { // reversing in and out Data for backprop const LeakyReLU_Op& op_ = dynamic_cast<const LeakyReLU_Op&>(mOp); @@ -55,12 +48,10 @@ void Aidge::LeakyReLUImpl_cpu::backward() { AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - auto kernelFunc = Registrar<LeakyReLUImplForward_cpu>::create({ - in0->dataType(), - out0->dataType()}); + const auto impl = Registrar<LeakyReLUImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(op_.negativeSlope(), + impl.backward(op_.negativeSlope(), in0->size(), getCPUPtr(in0), getCPUPtr(out0)); diff --git a/src/operator/LnImpl.cpp b/src/operator/LnImpl.cpp index 12885a944be46a977463e900af4047319bb1c8b2..79df733963ea8826439530d3adccde6affc9dfa8 100644 --- a/src/operator/LnImpl.cpp +++ b/src/operator/LnImpl.cpp @@ -20,14 +20,9 @@ #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/LnImpl.hpp" -#include "aidge/backend/cpu/operator/LnImpl_forward_kernels.hpp" -#include "aidge/backend/cpu/operator/LnImpl_backward_kernels.hpp" - -Aidge::Elts_t Aidge::LnImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} +#include "aidge/backend/cpu/operator/LnImpl_kernels.hpp" +template <> void Aidge::LnImpl_cpu::forward() { const Ln_Op& op_ = static_cast<const Ln_Op&>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); @@ -35,16 +30,15 @@ void Aidge::LnImpl_cpu::forward() { AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - auto kernelFunc = Registrar<LnImplForward_cpu>::create({ - in0->dataType(), - out0->dataType()}); + const auto impl = Registrar<LnImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(in0->size(), + impl.forward(in0->size(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } +template <> void Aidge::LnImpl_cpu::backward() { const Ln_Op& op_ = dynamic_cast<const Ln_Op&>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); @@ -54,12 +48,8 @@ void Aidge::LnImpl_cpu::backward() { AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); // Find the correct kernel type - auto kernelFunc = Registrar<LnImplBackward_cpu>::create({ - in0->dataType(), - gra_int0->dataType(), - gra_out0->dataType() - }); + const auto impl = Registrar<LnImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(gra_int0->size(), getCPUPtr(in0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); + impl.backward(gra_int0->size(), getCPUPtr(in0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); } diff --git a/src/operator/MatMulImpl.cpp b/src/operator/MatMulImpl.cpp index e716726886225f703e7cf482d0bfcfb9ec733948..ccd3265ed230e4f9cdc5ad85785a6473d9f131f0 100644 --- a/src/operator/MatMulImpl.cpp +++ b/src/operator/MatMulImpl.cpp @@ -19,17 +19,16 @@ #include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/MatMulImpl.hpp" -#include "aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/MatMulImpl_kernels.hpp" +template <> void Aidge::MatMulImpl_cpu::forward() { assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) && "missing input #1"); // Find the correct kernel type - auto kernelFunc = Registrar<MatMulImplForward_cpu>::create( - {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + const auto impl = Registrar<MatMulImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Compute compatible input dimensions std::vector<std::size_t> dims0 = static_cast<const MatMul_Op&>(mOp).getInput(0)->dims(); @@ -91,7 +90,7 @@ void Aidge::MatMulImpl_cpu::forward() const std::size_t matrix1Size = k*m; const std::size_t matrixOutSize = n*m; for (std::size_t stack = 0; stack < nbMatrices;) { - kernelFunc(n, k, m, + impl.forward(n, k, m, getCPUPtr(mOp.getRawInput(0), offsetIn0*matrix0Size), getCPUPtr(mOp.getRawInput(1), offsetIn1*matrix1Size), getCPUPtr(mOp.getRawOutput(0), offsetOut*matrixOutSize)); @@ -126,3 +125,8 @@ void Aidge::MatMulImpl_cpu::forward() // getCPUPtr(mOp.getRawInput(1)), // getCPUPtr(mOp.getRawOutput(0))); // } + +template <> +void Aidge::MatMulImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for MatMul_Op on backend cpu"); +} diff --git a/src/operator/MaxPoolingImpl.cpp b/src/operator/MaxPoolingImpl.cpp index 2e6d67abbdd6776a1f75449a0f4562143cbaae87..90075a397be3f082ef95fd4df074c99d926fd385 100644 --- a/src/operator/MaxPoolingImpl.cpp +++ b/src/operator/MaxPoolingImpl.cpp @@ -14,32 +14,29 @@ #include <vector> #include "aidge/backend/cpu/data/GetCPUPtr.h" -#include "aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp" #include "aidge/operator/MaxPooling.hpp" #include "aidge/utils/Log.hpp" #include "aidge/utils/Types.h" - -Aidge::Elts_t Aidge::MaxPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} - +template <> void Aidge::MaxPoolingImpl2D_cpu::forward() { const auto& op_ = dynamic_cast<const MaxPooling_Op<2>&>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0 in MaxPooling Operator."); // Find the correct kernel type - auto kernelFunc = Registrar<MaxPoolingImpl2DForward_cpu>::create({ - op_.getInput(0)->dataType(), - op_.getOutput(0)->dataType() - }); + const auto impl = Registrar<MaxPoolingImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(op_.strideDims(), + impl.forward(op_.strideDims(), op_.kernelDims(), op_.ceilMode(), op_.getInput(0)->template dims<4>(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } + +template <> +void Aidge::MaxPoolingImpl2D_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for MaxPooling_Op<2> on backend cpu"); +} diff --git a/src/operator/MulImpl.cpp b/src/operator/MulImpl.cpp index d7feb9b76e25a0e874b3682cdc5b3e53bf8e9228..ea5e3d3ab8ac24934a0cb6f9042858fa094700af 100644 --- a/src/operator/MulImpl.cpp +++ b/src/operator/MulImpl.cpp @@ -21,30 +21,49 @@ #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/MulImpl.hpp" -#include "aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp" - -Aidge::Elts_t Aidge::MulImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} +#include "aidge/backend/cpu/operator/MulImpl_kernels.hpp" +template <> void Aidge::MulImpl_cpu::forward() { - // Find the correct kernel type - auto kernelFunc = Registrar<MulImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); + // Find the correct kernel type + const auto impl = Registrar<MulImpl_cpu>::create(getBestMatch(getRequiredSpec())); + // Call kernel - kernelFunc(inputDims0, + impl.forward(inputDims0, inputDims1, std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)), getCPUPtr(mOp.getRawOutput(0))); } + +template <> +void Aidge::MulImpl_cpu::backward() { + const Mul_Op& op_ = dynamic_cast<const Mul_Op&>(mOp); + + auto in0 = op_.getInput(0); + auto in1 = op_.getInput(1); + auto in0grad = op_.getInput(0)->grad(); + auto in1grad = op_.getInput(1)->grad(); + auto out0grad = op_.getOutput(0)->grad(); + + // Find the correct kernel type + const auto impl = Registrar<MulImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.backward(/* input0Length */ in0grad->size(), + /* input1Length */ in1grad->size(), + /* grad0Length */ out0grad->size(), + /* input0Dims */ in0->dims(), + /* input1Dims */ in1->dims(), + getCPUPtr(in0), + getCPUPtr(in1), + getCPUPtr(out0grad), + getCPUPtr(in0grad), + getCPUPtr(in1grad)); +} diff --git a/src/operator/PadImpl.cpp b/src/operator/PadImpl.cpp index b4b52d6be855b6a1f8c0a71a6a9169ee9690f34c..cdae21f8ed2757128f6a36b661b0897a4ba65f89 100644 --- a/src/operator/PadImpl.cpp +++ b/src/operator/PadImpl.cpp @@ -16,9 +16,9 @@ #include "aidge/operator/Conv.hpp" #include "aidge/backend/cpu/operator/PadImpl.hpp" -#include "aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/PadImpl_kernels.hpp" -Aidge::Elts_t Aidge::PadImpl1D_cpu::getNbRequiredProtected(Aidge::IOIndex_t inputIdx) const { +Aidge::Elts_t Aidge::Pad_ProdConso_cpu::getNbRequiredProtected(Aidge::IOIndex_t inputIdx) const { AIDGE_ASSERT(inputIdx == 0, "input index out of range." "{} Operator has only one input", mOp.type()); (void) inputIdx; @@ -31,17 +31,16 @@ Aidge::Elts_t Aidge::PadImpl1D_cpu::getNbRequiredProtected(Aidge::IOIndex_t inpu return Elts_t::DataElts(outputSize - inputSize); } +template <> void Aidge::PadImpl1D_cpu::forward() { const auto& op_ = dynamic_cast<const Pad_Op<1>&>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Pad Operator."); // Find the correct kernel type - auto kernelFunc = Registrar<PadImpl1DForward_cpu>::create({ - op_.getInput(0)->dataType(), - op_.getOutput(0)->dataType()}); + const auto impl = Registrar<PadImpl1D_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(op_.beginEndBorders(), + impl.forward(op_.beginEndBorders(), op_.borderType(), op_.borderValue(), op_.getInput(0)->template dims<3>(), @@ -49,32 +48,29 @@ void Aidge::PadImpl1D_cpu::forward() { getCPUPtr(mOp.getRawOutput(0))); } -Aidge::Elts_t Aidge::PadImpl2D_cpu::getNbRequiredProtected(Aidge::IOIndex_t inputIdx) const { - AIDGE_ASSERT(inputIdx == 0, "input index out of range." - "{} Operator has only one input", mOp.type()); - (void) inputIdx; - - // Padding cannot be in-place! - // We must ensure that we do not override data that has not been consummed yet. - const auto inputSize = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(); - const auto outputSize = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->size(); - return Elts_t::DataElts(outputSize - inputSize); +template <> +void Aidge::PadImpl1D_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Pad_Op<1> on backend cpu"); } +template <> void Aidge::PadImpl2D_cpu::forward() { const auto& op_ = dynamic_cast<const Pad_Op<2>&>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Pad Operator."); // Find the correct kernel type - auto kernelFunc = Registrar<PadImpl2DForward_cpu>::create({ - op_.getInput(0)->dataType(), - op_.getOutput(0)->dataType()}); + const auto impl = Registrar<PadImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(op_.beginEndBorders(), + impl.forward(op_.beginEndBorders(), op_.borderType(), op_.borderValue(), op_.getInput(0)->template dims<4>(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } + +template <> +void Aidge::PadImpl2D_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Pad_Op<2> on backend cpu"); +} diff --git a/src/operator/PowImpl.cpp b/src/operator/PowImpl.cpp index 811d13804cffdd2477fc830f1779b0fb6271eb0b..74a7be71e176ba8e1cb8851050e575d6aa7465df 100644 --- a/src/operator/PowImpl.cpp +++ b/src/operator/PowImpl.cpp @@ -21,27 +21,20 @@ #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/PowImpl.hpp" -#include "aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp" - -Aidge::Elts_t Aidge::PowImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} +#include "aidge/backend/cpu/operator/PowImpl_kernels.hpp" +template <> void Aidge::PowImpl_cpu::forward() { - // Find the correct kernel type - auto kernelFunc = Registrar<PowImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); + // Find the correct kernel type + const auto impl = Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec())); + // Call kernel - kernelFunc(inputDims0, + impl.forward(inputDims0, inputDims1, std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), getCPUPtr(mOp.getRawInput(0)), @@ -49,24 +42,31 @@ void Aidge::PowImpl_cpu::forward() { getCPUPtr(mOp.getRawOutput(0))); } +template <> void Aidge::PowImpl_cpu::backward() { - // Find the correct kernel type const Pow_Op& op_ = dynamic_cast<const Pow_Op&>(mOp); - auto kernelFunc = Registrar<PowImplForward_cpu>::create({ - op_.getOutput(0)->grad()->dataType(), - op_.getInput(0)->grad()->dataType(), - op_.getInput(1)->grad()->dataType()}); - const std::vector<std::size_t> input0gradDims = getBroadcastedDims(op_.getInput(0)->grad()->dims(), - op_.getOutput(0)->grad()->dims()); - const std::vector<std::size_t> input1gradDims = getBroadcastedDims(op_.getInput(1)->grad()->dims(), - op_.getOutput(0)->grad()->dims()); + auto in0 = op_.getInput(0); + auto in1 = op_.getInput(1); + auto in0grad = op_.getInput(0)->grad(); + auto in1grad = op_.getInput(1)->grad(); + auto out0grad = op_.getOutput(0)->grad(); + + const std::vector<std::size_t> input0gradDims = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->grad()->dims()); + const std::vector<std::size_t> input1gradDims = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->grad()->dims()); + + // Find the correct kernel type + const auto impl = Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(op_.getOutput(0)->grad()->dims(), - input0gradDims, - input1gradDims, - getCPUPtr(mOp.getRawOutput(0)), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawInput(1))); + impl.backward(input0gradDims, + input1gradDims, + out0grad->dims(), + getCPUPtr(in0), + getCPUPtr(in1), + getCPUPtr(out0grad), + getCPUPtr(in0grad), + getCPUPtr(in1grad)); } \ No newline at end of file diff --git a/src/operator/ReLUImpl.cpp b/src/operator/ReLUImpl.cpp index 4a0fb9f5d929e2ce731a21b5553e1b9257a32daa..832f91aad347fc081439ec487d06b14b0e2fe8da 100644 --- a/src/operator/ReLUImpl.cpp +++ b/src/operator/ReLUImpl.cpp @@ -19,14 +19,9 @@ #include "aidge/utils/ErrorHandling.hpp" #include "aidge/backend/cpu/operator/ReLUImpl.hpp" -#include "aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp" -#include "aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp" - -Aidge::Elts_t Aidge::ReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} +#include "aidge/backend/cpu/operator/ReLUImpl_kernels.hpp" +template <> void Aidge::ReLUImpl_cpu::forward() { const ReLU_Op& op_ = dynamic_cast<const ReLU_Op&>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); @@ -34,16 +29,15 @@ void Aidge::ReLUImpl_cpu::forward() { AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - auto kernelFunc = Registrar<ReLUImplForward_cpu>::create({ - in0->dataType(), - out0->dataType()}); + const auto impl = Registrar<ReLUImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(in0->size(), + impl.forward(in0->size(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } +template <> void Aidge::ReLUImpl_cpu::backward() { const ReLU_Op& op_ = dynamic_cast<const ReLU_Op&>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); @@ -53,12 +47,8 @@ void Aidge::ReLUImpl_cpu::backward() { AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); // Find the correct kernel type - auto kernelFunc = Registrar<ReLUImplBackward_cpu>::create({ - in0->dataType(), - gra_int0->dataType(), - gra_out0->dataType() - }); + const auto impl = Registrar<ReLUImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(gra_int0->size(), getCPUPtr(in0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); + impl.backward(gra_int0->size(), getCPUPtr(in0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); } diff --git a/src/operator/ReduceMeanImpl.cpp b/src/operator/ReduceMeanImpl.cpp index b4cd8ffa9b46aaa1c1d7a2eca947ed0254947fef..622672569372ff4e9f135e36255095f4246d5920 100644 --- a/src/operator/ReduceMeanImpl.cpp +++ b/src/operator/ReduceMeanImpl.cpp @@ -16,23 +16,29 @@ #include "aidge/utils/Types.h" #include "aidge/operator/ReduceMean.hpp" -#include "aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp" +template <> void Aidge::ReduceMeanImpl_cpu::forward() { const ReduceMean_Op& op_ = dynamic_cast<const ReduceMean_Op&>(mOp); + // Find the correct kernel type - auto kernelFunc = Registrar<ReduceMeanImplForward_cpu>::create({ - op_.getInput(0)->dataType(), - op_.getOutput(0)->dataType()}); + const auto impl = Registrar<ReduceMeanImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(op_.axes(), + impl.forward(op_.axes(), op_.keepDims(), op_.getInput(0)->dims(), op_.getInput(0)->getImpl()->rawPtr(), op_.getOutput(0)->getImpl()->rawPtr()); } +template <> +void Aidge::ReduceMeanImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ReduceMean_Op on backend cpu"); +} + + // void Aidge::ReduceMeanImpl1D_cpu::forward() { // // Find the correct kernel type diff --git a/src/operator/ReduceSumImpl.cpp b/src/operator/ReduceSumImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..aad0801835a74ecefb046f3dc64729ae1f8bd8bb --- /dev/null +++ b/src/operator/ReduceSumImpl.cpp @@ -0,0 +1,39 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/ReduceSumImpl.hpp" + +#include <memory> +#include <vector> + +#include "aidge/utils/Types.h" +#include "aidge/operator/ReduceSum.hpp" +#include "aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp" + +template <> +void Aidge::ReduceSumImpl_cpu::forward() { + const ReduceSum_Op& op_ = dynamic_cast<const ReduceSum_Op&>(mOp); + + // Find the correct kernel type + const auto impl = Registrar<ReduceSumImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward(op_.axes(), + op_.keepDims(), + op_.getInput(0)->dims(), + op_.getInput(0)->getImpl()->rawPtr(), + op_.getOutput(0)->getImpl()->rawPtr()); +} + +template <> +void Aidge::ReduceSumImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ReduceSum_Op on backend cpu"); +} diff --git a/src/operator/ScalingImpl.cpp b/src/operator/ScalingImpl.cpp index db4670836e702f536243aadec36c5ba85b2344c8..1e7a408f267c5eb2d60d188f0ed2ba0394222561 100644 --- a/src/operator/ScalingImpl.cpp +++ b/src/operator/ScalingImpl.cpp @@ -17,29 +17,28 @@ #include "aidge/operator/Scaling.hpp" #include "aidge/backend/cpu/operator/ScalingImpl.hpp" -#include "aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/ScalingImpl_kernels.hpp" #include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" -Aidge::Elts_t Aidge::ScalingImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} - +template <> void Aidge::ScalingImpl_cpu::forward() { const auto& op_ = dynamic_cast<const Scaling_Op&>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Scaling Operator."); // Find the correct kernel type - auto kernelFunc = Registrar<ScalingImplForward_cpu>::create({ - op_.getInput(0)->dataType(), - op_.getOutput(0)->dataType()}); + const auto impl = Registrar<ScalingImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(op_.scalingFactor(), + impl.forward(op_.scalingFactor(), op_.quantizedNbBits(), op_.isOutputUnsigned(), op_.getInput(0)->size(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } + +template <> +void Aidge::ScalingImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Scaling_Op on backend cpu"); +} diff --git a/src/operator/SigmoidImpl.cpp b/src/operator/SigmoidImpl.cpp index ad69935c02e392d7aa1c9601acb827c5baf8970f..cdcbac85df3a38fea9b7100324e0618949262fc9 100644 --- a/src/operator/SigmoidImpl.cpp +++ b/src/operator/SigmoidImpl.cpp @@ -20,14 +20,9 @@ #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/SigmoidImpl.hpp" -#include "aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp" -#include "aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp" - -Aidge::Elts_t Aidge::SigmoidImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} +#include "aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp" +template <> void Aidge::SigmoidImpl_cpu::forward() { const Sigmoid_Op& op_ = dynamic_cast<const Sigmoid_Op&>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); @@ -35,16 +30,15 @@ void Aidge::SigmoidImpl_cpu::forward() { AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - auto kernelFunc = Registrar<SigmoidImplForward_cpu>::create({ - in0->dataType(), - out0->dataType()}); + const auto impl = Registrar<SigmoidImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(in0->size(), + impl.forward(in0->size(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } +template <> void Aidge::SigmoidImpl_cpu::backward() { const Sigmoid_Op& op_ = dynamic_cast<const Sigmoid_Op&>(mOp); std::shared_ptr<Tensor> out0 = op_.getOutput(0); @@ -53,12 +47,8 @@ void Aidge::SigmoidImpl_cpu::backward() { AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); // Find the correct kernel type - auto kernelFunc = Registrar<SigmoidImplBackward_cpu>::create({ - out0->dataType(), - gra_int0->dataType(), - gra_out0->dataType() - }); + const auto impl = Registrar<SigmoidImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); + impl.backward(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); } diff --git a/src/operator/SliceImpl.cpp b/src/operator/SliceImpl.cpp index 8ffe4dcdd97b58758885b013d0c1770bd98a83ba..945c1bc752feb8e6a194b1aff99b26f01a6a0e69 100644 --- a/src/operator/SliceImpl.cpp +++ b/src/operator/SliceImpl.cpp @@ -14,27 +14,21 @@ #include <vector> #include "aidge/backend/cpu/data/GetCPUPtr.h" -#include "aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/SliceImpl_kernels.hpp" #include "aidge/operator/Slice.hpp" #include "aidge/utils/Log.hpp" #include "aidge/utils/Types.h" -Aidge::Elts_t Aidge::SliceImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} - +template <> void Aidge::SliceImpl_cpu::forward() { const auto& op_ = dynamic_cast<const Slice_Op&>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Slice Operator."); // Find the correct kernel type - auto kernelFunc = Registrar<SliceImplForward_cpu>::create({ - op_.getInput(0)->dataType(), - op_.getOutput(0)->dataType()}); + const auto impl = Registrar<SliceImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(op_.starts(), + impl.forward(op_.starts(), op_.ends(), op_.axes(), op_.steps(), @@ -42,3 +36,8 @@ void Aidge::SliceImpl_cpu::forward() { getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } + +template <> +void Aidge::SliceImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Slice_Op on backend cpu"); +} diff --git a/src/operator/SoftmaxImpl.cpp b/src/operator/SoftmaxImpl.cpp index 5bc3699e2146e36a63b4a1602ca1cb86e3ff1e2f..8b6933f22f3673476f4a9f1e261fbcdc09857300 100644 --- a/src/operator/SoftmaxImpl.cpp +++ b/src/operator/SoftmaxImpl.cpp @@ -20,27 +20,25 @@ #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" -#include "aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp" - -Aidge::Elts_t Aidge::SoftmaxImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} +#include "aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp" +template <> void Aidge::SoftmaxImpl_cpu::forward() { const auto& op_ = dynamic_cast<const Softmax_Op&>(mOp); AIDGE_ASSERT(!op_.getInput(0)->empty(), "Softmax input empty"); + std::int32_t axis = (op_.axis() >= 0) ? op_.axis() : op_.getInput(0)->nbDims() + op_.axis(); // Find the correct kernel type - auto kernelFunc = Registrar<SoftmaxImplForward_cpu>::create({ - op_.getInput(0)->dataType(), - op_.getOutput(0)->dataType()}); - - std::int32_t axis = (op_.axis() >= 0) ? op_.axis() : op_.getInput(0)->nbDims() + op_.axis(); + const auto impl = Registrar<SoftmaxImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(static_cast<std::size_t>(axis), // axisIdx + impl.forward(static_cast<std::size_t>(axis), // axisIdx std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); } + +template <> +void Aidge::SoftmaxImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Softmax_Op on backend cpu"); +} diff --git a/src/operator/SqrtImpl.cpp b/src/operator/SqrtImpl.cpp index edb8858fc4ac07fa5725d24688b22d64134afb0e..25bdb42fd5140ef4f64d704fc3a5ccf237f17f81 100644 --- a/src/operator/SqrtImpl.cpp +++ b/src/operator/SqrtImpl.cpp @@ -19,30 +19,24 @@ #include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/SqrtImpl.hpp" -#include "aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp" -#include "aidge/backend/cpu/operator/SqrtImpl_backward_kernels.hpp" - -Aidge::Elts_t Aidge::SqrtImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} +#include "aidge/backend/cpu/operator/SqrtImpl_kernels.hpp" +template <> void Aidge::SqrtImpl_cpu::forward() { std::shared_ptr<Tensor> in0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0)); std::shared_ptr<Tensor> out0 = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)); AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - auto kernelFunc = Registrar<SqrtImplForward_cpu>::create({ - in0->dataType(), - out0->dataType()}); + const auto impl = Registrar<SqrtImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(in0->size(), + impl.forward(in0->size(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } +template <> void Aidge::SqrtImpl_cpu::backward() { // reversing in and out Data for backprop const Sqrt_Op& op_ = dynamic_cast<const Sqrt_Op&>(mOp); @@ -51,12 +45,10 @@ void Aidge::SqrtImpl_cpu::backward() { AIDGE_ASSERT(out0grad, "missing output #0"); // Find the correct kernel type - auto kernelFunc = Registrar<SqrtImplForward_cpu>::create({ - out0grad->dataType(), - in0grad->dataType()}); + const auto impl = Registrar<SqrtImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(out0grad->size(), + impl.backward(out0grad->size(), getCPUPtr(out0grad), getCPUPtr(in0grad)); } \ No newline at end of file diff --git a/src/operator/SubImpl.cpp b/src/operator/SubImpl.cpp index ffddb59ee3373c4a0a6c2653747744a43fd471d9..d43771b967889183801cb93418c967ce9d9c8453 100644 --- a/src/operator/SubImpl.cpp +++ b/src/operator/SubImpl.cpp @@ -21,31 +21,28 @@ #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/SubImpl.hpp" -#include "aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp" - -Aidge::Elts_t Aidge::SubImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} +#include "aidge/backend/cpu/operator/SubImpl_kernels.hpp" +template <> void Aidge::SubImpl_cpu::forward() { - - // Find the correct kernel type - auto kernelFunc = Registrar<SubImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()); const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); + // Find the correct kernel type + const auto impl = Registrar<SubImpl_cpu>::create(getBestMatch(getRequiredSpec())); + // Call kernel - kernelFunc(inputDims0, + impl.forward(inputDims0, inputDims1, std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)), getCPUPtr(mOp.getRawOutput(0))); } + +template <> +void Aidge::SubImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Sub_Op on backend cpu"); +} diff --git a/src/operator/TanhImpl.cpp b/src/operator/TanhImpl.cpp index a2469ed9b83679c0edf8d0a761abf9d3d046db6e..ed8dce08b9f710c9e5830b2c72ffef71013edb6e 100644 --- a/src/operator/TanhImpl.cpp +++ b/src/operator/TanhImpl.cpp @@ -20,14 +20,9 @@ #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/TanhImpl.hpp" -#include "aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp" -#include "aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp" - -Aidge::Elts_t Aidge::TanhImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return Elts_t::DataElts(0); -} +#include "aidge/backend/cpu/operator/TanhImpl_kernels.hpp" +template <> void Aidge::TanhImpl_cpu::forward() { const Tanh_Op& op_ = dynamic_cast<const Tanh_Op&>(mOp); std::shared_ptr<Tensor> in0 = op_.getInput(0); @@ -35,16 +30,15 @@ void Aidge::TanhImpl_cpu::forward() { AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type - auto kernelFunc = Registrar<TanhImplForward_cpu>::create({ - in0->dataType(), - out0->dataType()}); + const auto impl = Registrar<TanhImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(in0->size(), + impl.forward(in0->size(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } +template <> void Aidge::TanhImpl_cpu::backward() { const Tanh_Op& op_ = dynamic_cast<const Tanh_Op&>(mOp); std::shared_ptr<Tensor> out0 = op_.getOutput(0); @@ -53,13 +47,9 @@ void Aidge::TanhImpl_cpu::backward() { AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); // Find the correct kernel type - auto kernelFunc = Registrar<TanhImplBackward_cpu>::create({ - out0->dataType(), - gra_int0->dataType(), - gra_out0->dataType() - }); + const auto impl = Registrar<TanhImpl_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel - kernelFunc(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); + impl.backward(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); } diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt index 671cdd5ac1262ab61b35a70a234236aff4a3cc15..8178df93beb96a3a7538dae8d9a706380c06ecf8 100644 --- a/unit_tests/CMakeLists.txt +++ b/unit_tests/CMakeLists.txt @@ -12,7 +12,7 @@ file(GLOB_RECURSE src_files "*.cpp") add_executable(tests${module_name} ${src_files}) -target_link_libraries(tests${module_name} PUBLIC ${module_name}) +target_link_libraries(tests${module_name} PRIVATE ${module_name}) target_link_libraries(tests${module_name} PRIVATE Catch2::Catch2WithMain) diff --git a/unit_tests/operator/Test_AndImpl.cpp b/unit_tests/operator/Test_AndImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..053bb3ea4ed913bd388f3ae049c4d6402ad58d59 --- /dev/null +++ b/unit_tests/operator/Test_AndImpl.cpp @@ -0,0 +1,205 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/And.hpp" + +#include "aidge/backend/cpu.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { + SECTION("ForwardDims") + { + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0,1); + + SECTION("Same dimensions") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + } + + std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims); + myInput1->setBackend("cpu"); + myInput1->setDataType(DataType::Float32); + myInput1->zeros(); + std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims); + myInput2->setBackend("cpu"); + myInput2->setDataType(DataType::Float32); + myInput2->zeros(); + std::shared_ptr<Node> myAnd = And(); + auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); + op->associateInput(0,myInput1); + op->associateInput(1,myInput2); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == dims); + } + } + SECTION("Broadcasting") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims1(nbDims, 1); + std::vector<DimSize_t> dims2(nbDims, 1); + std::vector<DimSize_t> expectedOutDims; + for (std::size_t i = 0; i < nbDims; i++) { + DimSize_t dim = dimSizeDist(gen); + if (boolDist(gen)) { + dims1[i] = dim; + } + if (boolDist(gen)) { + dims2[i] = dim; + } + expectedOutDims.push_back(std::max(dims1[i],dims2[i])); + } + + + std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims1); + myInput1->setBackend("cpu"); + myInput1->setDataType(DataType::Float32); + myInput1->zeros(); + std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims2); + myInput2->setBackend("cpu"); + myInput2->setDataType(DataType::Float32); + myInput2->zeros(); + std::shared_ptr<Node> myAnd = And(); + auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); + op->associateInput(0,myInput1); + op->associateInput(1,myInput2); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } + } + SECTION("Same size inputs") { + std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { // + { // + {{20, 15},{31, 11},{22, 49}}, // + {{41, 10},{24, 51},{27, 52}}, // + {{26, 53},{27, 54},{28, 55}} // + }, // + { // + {{29, 56},{30, 57},{31, 58}}, // + {{32, 59},{33, 60},{34, 61}}, // + {{35, 62},{36, 63},{37, 64}} // + }, // + { // + {{38, 65},{39, 66},{40, 67}}, // + {{41, 68},{42, 69},{43, 70}}, // + {{44, 71},{45, 72},{46, 73}} // + } // + } // + }); // + std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { // + { // + {{20, 47},{21, 48},{22, 49}}, // + {{23, 50},{24, 51},{25, 52}}, // + {{17, 53},{27, 26},{14, 33}} // + }, // + { // + {{29, 56},{30, 57},{31, 58}}, // + {{72, 44},{33, 20},{27, 55}}, // + {{35, 24},{25, 63},{28, 64}} // + }, // + { // + {{32, 65},{39, 66},{40, 70}}, // + {{41, 53},{42, 60},{34, 70}}, // + {{44, 71},{30, 12},{46, 73}} // + } // + } // + }); // + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { + { + { + {{1, 0},{0, 0},{1, 1}}, + {{0, 0},{1, 1},{0, 1}}, + {{0, 1},{1, 0},{0, 0}} + }, + { + {{1, 1},{1, 1},{1, 1}}, + {{0, 0},{1, 0},{0, 0}}, + {{1, 0},{0, 1},{0, 1}} + }, + { + {{0, 1},{1, 1},{1, 0}}, + {{1, 0},{1, 0},{0, 1}}, + {{1, 1},{0, 0},{1, 1}} + } + } + }); + + std::shared_ptr<Node> myAnd = And(); + auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); + op->associateInput(0, input1); + op->associateInput(1, input2); + op->setBackend("cpu"); + op->setDataType(DataType::Int32); + myAnd->forward(); + + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + } + + SECTION("Broadcasting") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { + { // + { // + {{10, 20},{22, 23},{20, 20}}, // + {{10, 15},{10, 29},{20, 20}}, // + {{26, 25},{33, 20},{10, 20}} // + } // + } // + }); // + + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{10, 20}}); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { + { // + { // + {{ 1, 1},{ 0, 0},{ 0, 1}}, // + {{ 1, 0},{ 1, 0},{ 0, 1}}, // + {{ 0, 0},{ 0, 1},{ 1, 1}} // + } // + } // + }); // + + std::shared_ptr<Node> myAnd = And(); + auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator()); + op->associateInput(0, input_1); + op->associateInput(1, input_2); + op->setDataType(DataType::Int32); + op->setBackend("cpu"); + myAnd->forward(); + op->getOutput(0)->print(); + expectedOutput->print(); + REQUIRE(*op->getOutput(0) == *expectedOutput); + } +} \ No newline at end of file diff --git a/unit_tests/operator/Test_ArgMaxImpl.cpp b/unit_tests/operator/Test_ArgMaxImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9915d90423e976db1bdd2a694a2cfd7beb380cee --- /dev/null +++ b/unit_tests/operator/Test_ArgMaxImpl.cpp @@ -0,0 +1,227 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <memory> +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/ArgMax.hpp" +#include "aidge/operator/Conv.hpp" + +#include "aidge/backend/cpu.hpp" +#include "aidge/utils/TensorUtils.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") { + SECTION("ForwardDims") + { + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0,1); + + SECTION("KeepDims") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + std::vector<DimSize_t> expectedOutDims(nbDims); + std::uniform_int_distribution<std::int32_t> axisDist(std::int32_t(0), std::int32_t(nbDims-1)); + std::int32_t axis = axisDist(gen); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + if (i == axis) { + expectedOutDims[i] = 1; + } + else { + expectedOutDims[i] = dims[i]; + } + } + + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + myInput->zeros(); + std::shared_ptr<Node> myArgMax = ArgMax(axis); + auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } + SECTION("Not KeepDims") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + std::vector<DimSize_t> expectedOutDims; + std::uniform_int_distribution<std::int32_t> axisDist(std::int32_t(0), std::int32_t(nbDims-1)); + std::int32_t axis = axisDist(gen); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + if(i != axis) { + expectedOutDims.push_back(dims[i]); + } + } + if(expectedOutDims.empty()) { + expectedOutDims.push_back(1); + } + + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + std::shared_ptr<Node> myArgMax = ArgMax(axis, false); + auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } + } + SECTION("3D Tensor") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,2,3,4> { + { + { + { 1.0, 2.0, 3.0, 4.0}, + { 8.0, 0.0, 17.0, 1.0}, + { 5.0, 10.0, 6.0, 0.0} + }, + { + { 7.0, 1.0, 9.0, 4.0}, + { 0.0, 8.0, 4.0, 2.0}, + { 9.0, 2.0, 0.0, 5.0} + } + } + }); + SECTION("Axis 2") { + + Tensor myOutput = Tensor(Array3D<float,2,3, 1> { + { + { + {3.0}, + {2.0}, + {1.0} + }, + { + {2.0}, + {1.0}, + {0.0} + } + } + }); + + std::shared_ptr<Node> myArgMax = ArgMax(2); + auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myArgMax->forward(); + + REQUIRE(*(op->getOutput(0)) == myOutput); + } + SECTION("Axis 2 with keep_dims false") { + + Tensor myOutput = Tensor(Array2D<float,2,3> { + { + { 3.0, 2.0, 1.0 }, + { 2.0, 1.0, 0.0 } + } + }); + + std::shared_ptr<Node> myArgMax = ArgMax(2,0); + auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myArgMax->forward(); + + REQUIRE(*(op->getOutput(0)) == myOutput); + } + SECTION("Axis 1") { + Tensor myOutput = Tensor(Array3D<float,2,1,4> { + { + { + { 1.0, 2.0, 1.0, 0.0 } + }, + { + { 2.0, 1.0, 0.0, 2.0 } + } + } + }); + + std::shared_ptr<Node> myArgMax = ArgMax(1); + auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myArgMax->forward(); + + REQUIRE(*(op->getOutput(0)) == myOutput); + } + SECTION("Axis 0") { + Tensor myOutput = Tensor(Array3D<float,1,3,4> { + { + { + { 1.0, 0.0, 1.0, 0.0 }, + { 0.0, 1.0, 0.0, 1.0 }, + { 1.0, 0.0, 0.0, 1.0 } + } + } + }); + + std::shared_ptr<Node> myArgMax = ArgMax(0); + auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + std::cout << " ............... "<< std::endl; + myArgMax->forward(); + op->getOutput(0)->print(); + std::cout <<"------"<<std::endl; + myOutput.print(); + + REQUIRE(*(op->getOutput(0)) == myOutput); + } + } + SECTION("Select_Last_Index") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array1D<float,10> { + { + 1.0, 5.0, 9.0, 0.0, 6.0, 2.0, 9.0, 4.0, 3.0, 9.0 + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> {{9}}); + + std::shared_ptr<Node> myArgMax = ArgMax(0, 1, 1); + auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myArgMax->forward(); + op->getOutput(0)->print(); + + REQUIRE(*(op->getOutput(0)) == *myOutput); + + } +} \ No newline at end of file diff --git a/unit_tests/operator/Test_BitShift.cpp b/unit_tests/operator/Test_BitShift.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a52990bc7991a325ce151cf6634b0d5a831992c8 --- /dev/null +++ b/unit_tests/operator/Test_BitShift.cpp @@ -0,0 +1,245 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t +#include <chrono> +#include <iostream> +#include <memory> +#include <numeric> +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution +#include <iomanip> +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/BitShift.hpp" +#include "aidge/utils/TensorUtils.hpp" + +namespace Aidge { + +TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") { + constexpr std::uint16_t NBTRIALS = 15; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> valueDist(-15, 15); + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3)); + std::uniform_int_distribution<int> boolDist(0,1); + + BitShift_Op::BitShiftDirection direction = BitShift_Op::BitShiftDirection::left; + + if(valueDist(gen) % 2 == 0) + { + direction = BitShift_Op::BitShiftDirection::right; + } + + // Create BitShift Operator + std::shared_ptr<Node> myBitShift = BitShift(direction); + auto op = std::static_pointer_cast<OperatorTensor>(myBitShift-> getOperator()); + op->setDataType(DataType::Int32); + op->setBackend("cpu"); + + // Create 2 input Tensors + std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); + op->associateInput(0,T0); + T0->setDataType(DataType::Int32); + T0->setBackend("cpu"); + std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(); + op -> associateInput(1,T1); + T1->setDataType(DataType::Int32); + T1->setBackend("cpu"); + + // Create results Tensor + std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(); + Tres->setDataType(DataType::Int32); + Tres->setBackend("cpu"); + + // To measure execution time of 'BitShift_Op::forward()' member function call + std::chrono::time_point<std::chrono::system_clock> start; + + std::chrono::time_point<std::chrono::system_clock> end; + std::chrono::duration<double, std::micro> duration{}; + + SECTION("BitShiftImpl_cpu::forward()") { + SECTION("Test Forward Kernel with same dimensions") { + std::size_t number_of_operation = 0; + + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + // generate 2 random Tensors + const std::size_t nbDims = nbDimsDist(gen); + std::vector<std::size_t> dims; + for (std::size_t i = 0; i < nbDims; ++i) { + dims.push_back(dimSizeDist(gen)); + } + const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + number_of_operation += nb_elements; + + // without broadcasting + int* array0 = new int[nb_elements]; + int* array1 = new int[nb_elements]; + int* result = new int[nb_elements]; + + for (std::size_t i = 0; i < nb_elements; ++i) { + array0[i] = valueDist(gen); + array1[i] = std::abs(valueDist(gen)); // bitshift is impossible with negative value + if(direction == BitShift_Op::BitShiftDirection::left) + { + result[i] = array0[i] << array1[i]; + } + else + { + result[i] = array0[i] >> array1[i]; + } + } + + // input0 + T0->resize(dims); + T0 -> getImpl() -> setRawPtr(array0, nb_elements); + + // input1 + T1->resize(dims); + T1 -> getImpl() -> setRawPtr(array1, nb_elements); + + // results + Tres->resize(dims); + Tres -> getImpl() -> setRawPtr(result, nb_elements); + + op->forwardDims(); + start = std::chrono::system_clock::now(); + myBitShift->forward(); + end = std::chrono::system_clock::now(); + duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + + bool is_eq = approxEq<int>(*(op->getOutput(0)), *Tres); + + auto Output = *(op->getOutput(0)); + auto prt = Output.getImpl()->rawPtr(); + + REQUIRE(is_eq); + + delete[] array0; + delete[] array1; + delete[] result; + + + } + std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; + std::cout << "total time: " << duration.count() << "μs" << std::endl; + } + SECTION("Test BitShift kernels with Broadcasting") { + std::size_t number_of_operation = 0; + + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + // generate 2 random Tensors + // handle dimensions, replace some dimensions with '1' to get broadcasting + constexpr std::size_t nbDims = 4; + std::vector<std::size_t> dims; + for (std::size_t i = 0; i < nbDims; ++i) { + dims.push_back(dimSizeDist(gen)); + } + std::vector<std::size_t> dims0 = dims; + std::vector<std::size_t> dims1 = dims; + std::vector<std::size_t> dimsOut = dims; + for (std::size_t i = 0; i < nbDims; ++i) { + if (boolDist(gen)) { + dims0[i] = 1; + } + if (boolDist(gen)) { + dims1[i] = 1; + } + dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i]; + } + + // create arrays and fill them with random values + int* array0 = new int[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; + int* array1 = new int[dims1[0]*dims1[1]*dims1[2]*dims1[3]]; + int* result = new int[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; + + for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) { + array0[i] = valueDist(gen); + } + for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) { + array1[i] = std::abs(valueDist(gen)); + } + + //True result with broadcast + const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; + const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1}; + for (std::size_t a = 0; a < dimsOut[0]; ++a) { + for (std::size_t b = 0; b < dimsOut[1]; ++b) { + const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) + + strides0[1] * ((dims0[1] > 1) ? b : 0); + const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0) + + strides1[1] * ((dims1[1] > 1) ? b : 0); + for (std::size_t c = 0; c < dimsOut[2]; ++c) { + const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); + for (std::size_t d = 0; d < dimsOut[3]; ++d) { + std::size_t idx0 = idx0_0 + + strides0[2] * ((dims0[2] > 1) ? c : 0) + + ((dims0[3] > 1) ? d : 0); + std::size_t idx1 = idx1_0 + + strides1[2] * ((dims1[2] > 1) ? c : 0) + + ((dims1[3] > 1) ? d : 0); + if(direction == BitShift_Op::BitShiftDirection::left) + { + result[idx_out + d] = array0[idx0] << array1[idx1]; + } + else + { + result[idx_out + d] = array0[idx0] >> array1[idx1]; + } + } + } + } + } + + // conversion to Aidge::Tensors + // input0 + T0->resize(dims0); + T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]); + + // input1 + T1->resize(dims1); + T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]); + + // results + Tres->resize(dimsOut); + Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]); + + // compute result + op->forwardDims(); + start = std::chrono::system_clock::now(); + myBitShift->forward(); + end = std::chrono::system_clock::now(); + duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + + // comparison between truth and computed result + bool equiv = (approxEq<int>(*(op->getOutput(0)), *Tres)); + if(equiv == false) + { + std::cout << "Problem\n"; + } + REQUIRE(equiv); + + delete[] array0; + delete[] array1; + delete[] result; + + const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>()); + number_of_operation += nb_elements; + } + std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; + std::cout << "total time: " << duration.count() << "μs" << std::endl; + } + +} +} // namespace Aidge +} \ No newline at end of file diff --git a/unit_tests/operator/Test_ConstantOfShapeImpl.cpp b/unit_tests/operator/Test_ConstantOfShapeImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..42505d385fde7e72e09531f1607287ffc6978f75 --- /dev/null +++ b/unit_tests/operator/Test_ConstantOfShapeImpl.cpp @@ -0,0 +1,120 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <algorithm> +#include <chrono> +#include <cmath> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t +#include <iostream> +#include <memory> +#include <numeric> // std::accumulate +#include <ostream> +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution + +#include "catch2/internal/catch_compiler_capabilities.hpp" +#include "catch2/internal/catch_enforce.hpp" +#include <catch2/catch_test_macros.hpp> +#include <catch2/generators/catch_generators_random.hpp> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/ConstantOfShape.hpp" +#include "aidge/utils/TensorUtils.hpp" +#include <aidge/data/Data.hpp> +#include <aidge/data/half.hpp> +#include <aidge/filler/Filler.hpp> +#include <aidge/operator/OperatorTensor.hpp> +#include <aidge/operator/Reshape.hpp> +#include <aidge/utils/TensorUtils.hpp> +#include <aidge/utils/Types.h> + +namespace Aidge { +TEST_CASE("[cpu/operator] ConstantOfShape", "[ConstantOfShape][CPU]") { + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + auto random_seed = Catch::Generators::Detail::getSeed; + std::mt19937 gen(random_seed()); + std::uniform_real_distribution<float> valueDist( + 0.1f, 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<DimSize_t> input_tensor_size_dist( + std::size_t(1), std::size_t(10)); + std::uniform_int_distribution<int64_t> input_tensor_values_dist( + std::size_t(1), std::size_t(7)); + std::uniform_real_distribution<double> operator_attr_value_dist(-100., 100.); + + /////////////////////////////////////////////// + // SETUP FUNCTIONS + auto generate_input_tensor = + [&gen, &input_tensor_size_dist, + &input_tensor_values_dist]() -> std::shared_ptr<Tensor> { + std::vector<DimSize_t> input_dims; + input_dims.push_back(input_tensor_size_dist(gen)); + + auto result = std::make_shared<Tensor>(input_dims); + result->setDataType(DataType::Int64); + result->setBackend("cpu"); + for (DimSize_t i = 0; i < result->size(); ++i) { + result->set<int64_t>(i, input_tensor_values_dist(gen)); + } + return result; + }; + + auto generate_random_operator = + [&gen, + &operator_attr_value_dist]() -> std::shared_ptr<ConstantOfShape_Op> { + auto node = ConstantOfShape(Tensor(operator_attr_value_dist(gen))); + auto op = std::static_pointer_cast<ConstantOfShape_Op>(node->getOperator()); + op->setDataType(DataType::Float64); + op->setBackend("cpu"); + return op; + }; + + auto generate_output_tensor = [](std::shared_ptr<Tensor> input_tensor, + std::shared_ptr<ConstantOfShape_Op> op) { + std::vector<DimSize_t> output_dims; + output_dims.reserve(input_tensor->size()); + for (DimSize_t i = 0; i < input_tensor->size(); ++i) { + output_dims.push_back(input_tensor->get<int64_t>(i)); + } + auto result = std::make_shared<Tensor>(output_dims); + result->setDataType(op->value().dataType()); + result->setBackend("cpu"); + constantFiller(result, op->value().get<double>(0)); + return result; + }; + + ///////////////////////////////////// + // BENCHMARKING + std::chrono::time_point<std::chrono::system_clock> start; + std::chrono::time_point<std::chrono::system_clock> end; + std::chrono::duration<double, std::micro> duration{}; + int number_of_operation{0}; + + SECTION("ConstantOfShapeImpl_cpu::forward()") { + for (int i = 0; i < NBTRIALS; ++i) { + auto input_T = generate_input_tensor(); + std::shared_ptr<ConstantOfShape_Op> op = generate_random_operator(); + auto output_T = generate_output_tensor(input_T, op); + op->associateInput(0, input_T); + + REQUIRE(op->forwardDims(true)); + REQUIRE_NOTHROW(op->forward()); + + CHECK(output_T->nbDims() == op->getOutput(0)->nbDims()); + for (DimIdx_t i = 0; i < output_T->nbDims(); ++i) { + CHECK(output_T->dims().at(i) == op->getOutput(0)->dims().at(i)); + } + CHECK(approxEq<double>(*output_T, *op->getOutput(0))); + } + } +} +} // namespace Aidge + diff --git a/unit_tests/operator/Test_FoldImpl.cpp b/unit_tests/operator/Test_FoldImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6832f5a42d796d9261495794e0758ce1b6df0346 --- /dev/null +++ b/unit_tests/operator/Test_FoldImpl.cpp @@ -0,0 +1,178 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <cstdlib> +#include <memory> + +#include "aidge/data/Tensor.hpp" +#include "aidge/graph/GraphView.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/operator/Fold.hpp" +#include "aidge/operator/Unfold.hpp" +#include "aidge/operator/MatMul.hpp" +#include "aidge/operator/Reshape.hpp" + +#include "aidge/backend/cpu.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] Fold(forward)", "[Fold][CPU]") { + std::shared_ptr<Node> myUnfold = Unfold({3,3}, "myunfold"); + std::shared_ptr<Node> myReshape = Reshape({4, 27}, "myreshape"); + std::shared_ptr<Node> myMatMul = MatMul("mymatmul"); + std::shared_ptr<Node> myFold = Fold({3,3}, {1,1}, "myfold"); + myUnfold->addChild(myMatMul, 0, 1); + myReshape->addChild(myMatMul, 0, 0); + myMatMul->addChild(myFold, 0, 0); + + std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> { + { + { + {{ 0, 1, 2}, + { 3, 4, 5}, + { 6, 7, 8}}, + {{ 9, 10, 11}, + { 12, 13, 14}, + { 15, 16, 17}}, + {{ 18, 19, 20}, + { 21, 22, 23}, + { 24, 25, 26}} + }, + { + {{ 27, 28, 29}, + { 30, 31, 32}, + { 33, 34, 35}}, + {{ 36, 37, 38}, + { 39, 40, 41}, + { 42, 43, 44}}, + {{ 45, 46, 47}, + { 48, 49, 50}, + { 51, 52, 53}} + }, + { + {{ 54, 55, 56}, + { 57, 58, 59}, + { 60, 61, 62}}, + {{ 63, 64, 65}, + { 66, 67, 68}, + { 69, 70, 71}}, + {{ 72, 73, 74}, + { 75, 76, 77}, + { 78, 79, 80}} + }, + { + {{ 81, 82, 83}, + { 84, 85, 86}, + { 87, 88, 89}}, + {{ 90, 91, 92}, + { 93, 94, 95}, + { 96, 97, 98}}, + {{ 99, 100, 101}, + {102, 103, 104}, + {105, 106, 107}} + } + } + }); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW + { + { + {{ 0, 1, 2, 3, 4}, + { 5, 6, 7, 8, 9}, + { 10, 11, 12, 13, 14}, + { 15, 16, 17, 18, 19}, + { 20, 21, 22, 23, 24}}, + + {{ 25, 26, 27, 28, 29}, + { 30, 31, 32, 33, 34}, + { 35, 36, 37, 38, 39}, + { 40, 41, 42, 43, 44}, + { 45, 46, 47, 48, 49}}, + + {{ 50, 51, 52, 53, 54}, + { 55, 56, 57, 58, 59}, + { 60, 61, 62, 63, 64}, + { 65, 66, 67, 68, 69}, + { 70, 71, 72, 73, 74}} + }, + { + {{ 75, 76, 77, 78, 79}, + { 80, 81, 82, 83, 84}, + { 85, 86, 87, 88, 89}, + { 90, 91, 92, 93, 94}, + { 95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}} + } + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> { + { + { + {{ 15219, 15570, 15921}, + { 16974, 17325, 17676}, + { 18729, 19080, 19431}}, + {{ 37818, 38898, 39978}, + { 43218, 44298, 45378}, + { 48618, 49698, 50778}}, + {{ 60417, 62226, 64035}, + { 69462, 71271, 73080}, + { 78507, 80316, 82125}}, + {{ 83016, 85554, 88092}, + { 95706, 98244, 100782}, + { 108396, 110934, 113472}} + }, + { + {{ 41544, 41895, 42246}, + { 43299, 43650, 44001}, + { 45054, 45405, 45756}}, + {{ 118818, 119898, 120978}, + { 124218, 125298, 126378}, + { 129618, 130698, 131778}}, + {{ 196092, 197901, 199710}, + { 205137, 206946, 208755}, + { 214182, 215991, 217800}}, + {{ 273366, 275904, 278442}, + { 286056, 288594, 291132}, + { 298746, 301284, 303822}} + } + } + }); + + auto opUnfold = std::static_pointer_cast<OperatorTensor>(myUnfold -> getOperator()); + auto opReshape = std::static_pointer_cast<OperatorTensor>(myReshape -> getOperator()); + auto opMatMul = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator()); + auto opFold = std::static_pointer_cast<OperatorTensor>(myFold -> getOperator()); + opUnfold->associateInput(0,myInput); + opReshape->associateInput(0,myWeights); + + auto g = getConnectedGraphView(myMatMul); + g->setDataType(DataType::Int32); + g->setBackend("cpu"); + + g->forwardDims(); + g->save("unfold_matmul_fold"); + + SequentialScheduler scheduler(g); + scheduler.forward(); + //opFold->getOutput(0)->print(); + REQUIRE(*(opFold->getOutput(0)) == *myOutput); +} \ No newline at end of file diff --git a/unit_tests/operator/Test_MulImpl.cpp b/unit_tests/operator/Test_MulImpl.cpp index 9d592d31e1999f63fb0ebe3f5ad9d19e85c8645c..3378861d0d3d7e74e7867c2765a0b09069fa8caf 100644 --- a/unit_tests/operator/Test_MulImpl.cpp +++ b/unit_tests/operator/Test_MulImpl.cpp @@ -24,6 +24,337 @@ namespace Aidge { + TEST_CASE("[CPU/Operator] Mul Backward", "[Mul][CPU][Backward]") + { + std::shared_ptr<Node> myMul = Mul(); + auto op = std::static_pointer_cast<OperatorTensor>(myMul->getOperator()); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + SECTION("Case 1: 2D and 1D tensors") { + const auto T0 = std::make_shared<Tensor>(Array2D<float,2,3>( + { + { + {1,2,3},{4,5,6} + } + } + )); + + const auto T1 = std::make_shared<Tensor>(Array1D<float,3>( + {0.1,0.2,0.3} + )); + + T0->setDataType(DataType::Float32); + T0->setBackend("cpu"); + T1->setDataType(DataType::Float32); + T1->setBackend("cpu"); + + op->getOutput(0)->setGrad(std::make_shared<Tensor>(Array2D<float,2,3>({{{1.0,1.0,1.0},{1.0,1.0,1.0}}}))); + + op->associateInput(0,T0); + op->associateInput(1,T1); + op->forwardDims(); + + myMul->forward(); + myMul->backward(); + + auto T0Grad = std::make_shared<Tensor>(Array2D<float, 2,3>({{{0.1,0.2,0.3},{0.1, 0.2, 0.3}}})); + auto T1Grad = std::make_shared<Tensor>(Array1D<float, 3>({5,7,9})); + + REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *T0Grad)); + REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *T1Grad)); + } + + SECTION("Case 2: 3D and 1D tensors") { + const auto T0 = std::make_shared<Tensor>(Array3D<float,2,2,3>( + { + { + { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0} + }, + { + {7.0, 8.0, 9.0}, + {10.0, 11.0, 12.0} + } + } + } + )); + + const auto T1 = std::make_shared<Tensor>(Array1D<float, 3>({0.3,0.2,0.1})); + + const auto newGrad = std::make_shared<Tensor>(Array3D<float,2,2,3>( + { + { + { + {1, 1, 1}, + {1, 1, 1} + }, + { + {1, 1, 1}, + {1, 1, 1} + } + } + } + )); + + const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float,2,2,3>( + { + { + { + {0.3, 0.2, 0.1}, + {0.3, 0.2, 0.1} + }, + { + {0.3, 0.2, 0.1}, + {0.3, 0.2, 0.1} + } + } + } + )); + + const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float,3>( + {22.0, 26.0, 30.0} + )); + + for(auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1}) + { + T->setBackend("cpu") ; + T->setDataType(DataType::Float32); + } + + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(newGrad); + op->forwardDims(); + + myMul->backward(); + + REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0)); + REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1)); + } + + SECTION("Case 3: 4D and 2D tensors") { + const auto T0 = std::make_shared<Tensor>(Array4D<float,2, 2, 3, 3>( + { + { + { + { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0} + }, + { + {10.0, 11.0, 12.0}, + {13.0, 14.0, 15.0}, + {16.0, 17.0, 18.0} + } + }, + { + { + {19.0, 20.0, 21.0}, + {22.0, 23.0, 24.0}, + {25.0, 26.0, 27.0} + }, + { + {28.0, 29.0, 30.0}, + {31.0, 32.0, 33.0}, + {34.0, 35.0, 36.0} + } + } + } + } + )); + + const auto T1 = std::make_shared<Tensor>(Array2D<float, 3,3>( + { + { + {0.5,0.3,0.1}, + {0.4,0.2,0.6}, + {0.7,0.8,0.9} + } + } + )); + + const auto newGrad = std::make_shared<Tensor>(Array4D<float,2, 2, 3, 3>( + { + { + { + { + {1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0} + }, + { + {1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0} + } + }, + { + { + {1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0} + }, + { + {1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0} + } + } + } + } + )); + + const auto expectedGrad0 = std::make_shared<Tensor>(Array4D<float,2,2,3,3>( + { + { + { + { + {0.5, 0.3, 0.1}, + {0.4, 0.2, 0.6}, + {0.7, 0.8, 0.9} + }, + { + {0.5, 0.3, 0.1}, + {0.4, 0.2, 0.6}, + {0.7, 0.8, 0.9} + } + }, + { + { + {0.5, 0.3, 0.1}, + {0.4, 0.2, 0.6}, + {0.7, 0.8, 0.9} + }, + { + {0.5, 0.3, 0.1}, + {0.4, 0.2, 0.6}, + {0.7, 0.8, 0.9} + } + } + } + } + )); + + const auto expectedGrad1 = std::make_shared<Tensor>(Array2D<float,3, 3>( + { + { + {58.0, 62.0, 66.0}, + {70.0, 74.0, 78.0}, + {82.0, 86.0, 90.0} + } + } + )); + + for(const auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1}) + { + T->setBackend("cpu") ; + T->setDataType(DataType::Float32); + } + + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(newGrad); + op->forwardDims(); + + myMul->backward(); + + REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0)); + REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1)); + } + + SECTION("Case 4: 3D and 2D tensors") { + const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 3, 4>( + { + { + { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + }, + { + {13.0, 14.0, 15.0, 16.0}, + {17.0, 18.0, 19.0, 20.0}, + {21.0, 22.0, 23.0, 24.0}, + } + } + } + )); + + const auto T1 = std::make_shared<Tensor>(Array2D<float, 3, 4>( + { + { + {0.1, 0.2, 0.3, 0.4}, + {0.5, 0.6, 0.7, 0.8}, + {0.9, 1.0, 1.1, 1.2} + } + } + )); + + const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2,3,4>( + { + { + { + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + }, + { + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + {1.0, 1.0, 1.0, 1.0}, + } + } + } + )); + + const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float,2,3,4>( + { + { + { + {0.1, 0.2, 0.3, 0.4}, + {0.5, 0.6, 0.7, 0.8}, + {0.9, 1.0, 1.1, 1.2} + }, + { + {0.1, 0.2, 0.3, 0.4}, + {0.5, 0.6, 0.7, 0.8}, + {0.9, 1.0, 1.1, 1.2} + } + } + } + )); + + const auto expectedGrad1 = std::make_shared<Tensor>(Array2D<float,3, 4>( + { + { + {14.0, 16.0, 18.0, 20.0}, + {22.0, 24.0, 26.0, 28.0}, + {30.0, 32.0, 34.0, 36.0} + } + } + )); + + for(const auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1}) + { + T->setBackend("cpu") ; + T->setDataType(DataType::Float32); + } + + op->associateInput(0, T0); + op->associateInput(1, T1); + op->getOutput(0)->setGrad(newGrad); + op->forwardDims(); + + myMul->backward(); + + REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0)); + REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1)); + } + } + TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { constexpr std::uint16_t NBTRIALS = 10; // Create a random number generator @@ -31,7 +362,7 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { std::mt19937 gen(rd()); std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); - std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3)); std::uniform_int_distribution<int> boolDist(0,1); // Create MatMul Operator @@ -60,6 +391,7 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { std::chrono::time_point<std::chrono::system_clock> end; std::chrono::duration<double, std::micro> duration{}; + SECTION("MulImpl_cpu::forward()") { SECTION("Scalar / Scalar") { @@ -68,16 +400,20 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { } SECTION("+1-D Tensor / +1-D Tensor - same dimensions") { + std::size_t number_of_operation = 0; for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + // generate 2 random Tensors - const std::size_t nbDims = nbDimsDist(gen); - std::vector<std::size_t> dims; + const auto nbDims = nbDimsDist(gen); + auto dims = std::vector<std::size_t>{}; + for (std::size_t i = 0; i < nbDims; ++i) { dims.push_back(dimSizeDist(gen)); } - const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + + const auto nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); number_of_operation += nb_elements; // without broadcasting @@ -114,67 +450,101 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { delete[] array0; delete[] array1; delete[] result; - - // with broadcasting } std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; std::cout << "total time: " << duration.count() << "μs" << std::endl; } + SECTION("+1-D Tensor / +1-D Tensor - broadcasting") { std::size_t number_of_operation = 0; for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + // generate 2 random Tensors // handle dimensions, replace some dimensions with '1' to get broadcasting + constexpr std::size_t nbDims = 4; - std::vector<std::size_t> dims; - for (std::size_t i = 0; i < nbDims; ++i) { - dims.push_back(dimSizeDist(gen)); + std::vector<std::size_t> dimensions; + + for (std::size_t i = 0; i < nbDims; ++i) + { + dimensions.push_back(dimSizeDist(gen)); } - std::vector<std::size_t> dims0 = dims; - std::vector<std::size_t> dims1 = dims; - std::vector<std::size_t> dimsOut = dims; - for (std::size_t i = 0; i < nbDims; ++i) { - if (boolDist(gen)) { + + auto dims0 = dimensions; + auto dims1 = dimensions; + auto dimsOut = dimensions; + + for (std::size_t i = 0; i < nbDims; ++i) + { + if (boolDist(gen)) + { dims0[i] = 1; } - if (boolDist(gen)) { + + if (boolDist(gen)) + { dims1[i] = 1; } + dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i]; } + for(auto dim : dims0) + { + Log::info("Dimension of input 0 : {}", dim); + } + + for(auto dim : dims1) + { + Log::info("Dimension of input 1 : {}", dim); + } + // create arrays and fill them with random values float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]]; float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; - for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) { + + for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) + { array0[i] = valueDist(gen); } - for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) { + + for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) + { array1[i] = valueDist(gen); } // compute true result const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1}; - for (std::size_t a = 0; a < dimsOut[0]; ++a) { - for (std::size_t b = 0; b < dimsOut[1]; ++b) { + + for (std::size_t a = 0; a < dimsOut[0]; ++a) + { + for (std::size_t b = 0; b < dimsOut[1]; ++b) + { const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) + strides0[1] * ((dims0[1] > 1) ? b : 0); + const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0) + strides1[1] * ((dims1[1] > 1) ? b : 0); - for (std::size_t c = 0; c < dimsOut[2]; ++c) { + + for (std::size_t c = 0; c < dimsOut[2]; ++c) + { const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); - for (std::size_t d = 0; d < dimsOut[3]; ++d) { + + for (std::size_t d = 0; d < dimsOut[3]; ++d) + { std::size_t idx0 = idx0_0 + strides0[2] * ((dims0[2] > 1) ? c : 0) + ((dims0[3] > 1) ? d : 0); + std::size_t idx1 = idx1_0 + strides1[2] * ((dims1[2] > 1) ? c : 0) + ((dims1[3] > 1) ? d : 0); + result[idx_out + d] = array0[idx0] * array1[idx1]; // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " * " << array1[idx1] << " -> " << idx_out + d << std::endl; } diff --git a/unit_tests/operator/Test_PadImpl.cpp b/unit_tests/operator/Test_PadImpl.cpp index cdd3a5f979085f3782776ce69ddd92c0d53150c4..75233c0b97fc6f9812020d0e3d3c695d8cd388f0 100644 --- a/unit_tests/operator/Test_PadImpl.cpp +++ b/unit_tests/operator/Test_PadImpl.cpp @@ -134,7 +134,7 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") { SECTION("Asymmetric Pad") { const int pv = 0; // pad value - std::shared_ptr<Node> myPad = Pad<2>({1, 0, 0, 1}, "mypad", PadBorderType::Constant, static_cast<double>(pv)); + std::shared_ptr<Node> myPad = Pad<2>({0, 1, 1, 0}, "mypad", PadBorderType::Constant, static_cast<double>(pv)); auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator()); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW { diff --git a/unit_tests/operator/Test_PowImpl.cpp b/unit_tests/operator/Test_PowImpl.cpp index 3b85defb37ff76439b658faa84c3c7457a152d2f..cb5d8872c9c7242bb4aa4efca388d53b578417f9 100644 --- a/unit_tests/operator/Test_PowImpl.cpp +++ b/unit_tests/operator/Test_PowImpl.cpp @@ -313,5 +313,171 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") { std::cout << "total time: " << duration.count() << "μs" << std::endl; } } + + + SECTION("PowImpl_cpu::backward()") { + SECTION("3D Tensors") { + const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + {2.0, 3.0}, + {4.0, 5.0} + }, + { + {6.0, 7.0}, + {8.0, 9.0} + } + } + } + )); + const auto input1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + {1.0, 2.0}, + {3.0, 2.0} + }, + { + {2.0, 3.0}, + {1.0, 0.5} + } + } + } + )); + const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + {0.5, 1.0}, + {1.5, 2.0} + }, + { + {2.5, 3.0}, + {3.5, 4.0} + } + } + } + )); + const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + {0.50000000, 6.00000000}, + {72.00000000, 20.00000000} + }, + { + {30.00000000, 441.00000000}, + {3.50000000, 0.66666669} + } + } + } + )); + const auto expectedGrad1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>( + { + { + { + { 0.693147182, 9.88751030}, + {1.33084259e+02, 8.04718933e+01} + }, + { + {1.61258362e+02, 2.00234143e+03}, + {5.82243652e+01, 2.63666954e+01} + } + } + } + )); + for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1}) + { + T->setBackend("cpu") ; + T->setDataType(DataType::Float32); + } + std::shared_ptr<Node> powOp = Pow(); + auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator()); + opr->setDataType(DataType::Float32); + opr->setBackend("cpu"); + opr->associateInput(0, input0); + opr->associateInput(1, input1); + opr->getOutput(0)->setGrad(gradOut); + opr->forward(); + + powOp->backward(); + REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0)); + REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1)); + } + SECTION("Broadcasting") { + const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + { + { + { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0} + }, + { + {1.5, 2.5, 3.5}, + {4.5, 5.5, 6.5} + } + } + } + )); + const auto input1 = std::make_shared<Tensor>(Array1D<float, 3>( + { + {0.1, 0.2, 0.3} + } + )); + + const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + { + { + { + {1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0} + }, + { + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0} + } + } + } + )); + const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>( + { + { + { + {0.10000000, 0.22973967, 0.41711676}, + {0.11486985, 0.27594593, 0.51353097} + }, + { + {0.41655189, 0.48044977, 0.49926791}, + {0.07748720, 0.10227509, 0.08092485} + } + } + } + )); + const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float, 3>( + { + {14.14779854, 22.99299049, 33.56402588} + } + )); + + for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1}) + { + T->setBackend("cpu") ; + T->setDataType(DataType::Float32); + } + std::shared_ptr<Node> powOp = Pow(); + auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator()); + opr->setDataType(DataType::Float32); + opr->setBackend("cpu"); + opr->associateInput(0, input0); + opr->associateInput(1, input1); + opr->getOutput(0)->setGrad(gradOut); + powOp->forward(); + + powOp->backward(); + REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0)); + REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1)); + } + } } } // namespace Aidge diff --git a/unit_tests/operator/Test_ReduceMeanImpl.cpp b/unit_tests/operator/Test_ReduceMeanImpl.cpp index 0269622740b5a0282a093d509d4b565f7acc3e76..dd647c7ba3f90fe7f3554aae7133e97ffa9c99ba 100644 --- a/unit_tests/operator/Test_ReduceMeanImpl.cpp +++ b/unit_tests/operator/Test_ReduceMeanImpl.cpp @@ -11,6 +11,8 @@ #include <catch2/catch_test_macros.hpp> #include <memory> +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution #include "aidge/data/Tensor.hpp" #include "aidge/operator/ReduceMean.hpp" @@ -22,6 +24,129 @@ using namespace Aidge; TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { + SECTION("ForwardDims") + { + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0,1); + + SECTION("KeepDims") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + std::vector<DimSize_t> expectedOutDims(nbDims); + std::vector<std::int32_t> axes; + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + expectedOutDims[i] = dims[i]; + if(boolDist(gen)) { + axes.push_back(i); + expectedOutDims[i] = 1; + } + } + if (axes.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions + std::fill(expectedOutDims.begin(), expectedOutDims.end(), 1); + } + + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + myInput->zeros(); + std::shared_ptr<Node> myReduceMean = ReduceMean(axes, true); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } + SECTION("Not KeepDims") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + std::vector<DimSize_t> expectedOutDims; + std::vector<std::int32_t> axes; + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + if(boolDist(gen)) { + axes.push_back(i); + } + else { + expectedOutDims.push_back(dims[i]); + } + } + if (axes.empty() || expectedOutDims.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions + expectedOutDims = std::vector<DimSize_t>{1}; + } + + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + std::shared_ptr<Node> myReduceMean = ReduceMean(axes, false); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } + SECTION("NoopWithEmptyAxes") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + } + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + std::shared_ptr<Node> myReduceMean = ReduceMean(std::vector<int32_t>{}, false, true); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == dims); + } + } + SECTION("Not NoopWithEmptyAxes") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + } + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + std::shared_ptr<Node> myReduceMean = ReduceMean({}, false, false); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + REQUIRE(op->getOutput(0)->nbDims() == 1); + REQUIRE(op->getOutput(0)->size() == 1); + } + } + } SECTION("KeepDims") { SECTION("test 1") { std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { @@ -157,7 +282,7 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { {18.25} }); - std::shared_ptr<Node> myReduceMean = ReduceMean({0, 1, 2}, 0); + std::shared_ptr<Node> myReduceMean = ReduceMean({}, 0); auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); op->associateInput(0,myInput); op->setDataType(DataType::Float32); @@ -179,15 +304,42 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") { {0.1293547f} }); - std::shared_ptr<Node> myReduceMean = ReduceMean({0, 1}, 0); + std::shared_ptr<Node> myReduceMean = ReduceMean({}, 0); auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); op->associateInput(0,myInput); op->setDataType(DataType::Float32); op->setBackend("cpu"); myReduceMean->forward(); - op->getOutput(0)->print(); - // approxEq<float>(*(op->getOutput(0)), *myOutput); + REQUIRE(approxEq<float>(*(op->getOutput(0)), *myOutput)); } + SECTION("noop_with_empty_axes") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { + { + { + { 5.0, 1.0 }, + { 20.0, 2.0 } + }, + { + { 30.0, 1.0 }, + { 40.0, 2.0 } + }, + { + { 55.0, 1.0 }, + { 60.0, 2.0 } + } + } + }); + + std::shared_ptr<Node> myReduceMean = ReduceMean({}, 0, 1); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myReduceMean->forward(); + op->getOutput(0)->print(); + + REQUIRE(*(op->getOutput(0)) == *myInput); + } } } \ No newline at end of file diff --git a/unit_tests/operator/Test_ReduceSumImpl.cpp b/unit_tests/operator/Test_ReduceSumImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..49569d1f65ff6c51f9681632b16375605ab326e7 --- /dev/null +++ b/unit_tests/operator/Test_ReduceSumImpl.cpp @@ -0,0 +1,345 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <memory> +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/ReduceSum.hpp" +#include "aidge/operator/Conv.hpp" + +#include "aidge/backend/cpu.hpp" +#include "aidge/utils/TensorUtils.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") { + SECTION("ForwardDims") + { + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); + std::uniform_int_distribution<int> boolDist(0,1); + + SECTION("KeepDims") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + std::vector<DimSize_t> expectedOutDims(nbDims); + std::vector<std::int32_t> axes; + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + expectedOutDims[i] = dims[i]; + if(boolDist(gen)) { + axes.push_back(i); + expectedOutDims[i] = 1; + } + } + if (axes.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions + std::fill(expectedOutDims.begin(), expectedOutDims.end(), 1); + } + + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + myInput->zeros(); + std::shared_ptr<Node> myReduceSum = ReduceSum(axes, true); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } + SECTION("Not KeepDims") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + std::vector<DimSize_t> expectedOutDims; + std::vector<std::int32_t> axes; + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + if(boolDist(gen)) { + axes.push_back(i); + } + else { + expectedOutDims.push_back(dims[i]); + } + } + if (axes.empty() || expectedOutDims.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions + expectedOutDims = std::vector<DimSize_t>{1}; + } + + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + std::shared_ptr<Node> myReduceSum = ReduceSum(axes, false); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == expectedOutDims); + } + } + SECTION("NoopWithEmptyAxes") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + } + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + std::shared_ptr<Node> myReduceSum = ReduceSum(std::vector<int32_t>{}, false, true); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + const auto outputDims = op->getOutput(0)->dims(); + REQUIRE(outputDims == dims); + } + } + SECTION("Not NoopWithEmptyAxes") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + DimSize_t nbDims = nbDimsDist(gen); + std::vector<DimSize_t> dims(nbDims); + for (std::size_t i = 0; i < nbDims; i++) { + dims[i] = dimSizeDist(gen); + } + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims); + myInput->setBackend("cpu"); + myInput->setDataType(DataType::Float32); + std::shared_ptr<Node> myReduceSum = ReduceSum({}, false, false); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + + op->forwardDims(); + + REQUIRE(op->getOutput(0)->nbDims() == 1); + REQUIRE(op->getOutput(0)->size() == 1); + } + } + } + SECTION("KeepDims") { + SECTION("test 1") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { + { + { + { 5.0, 1.0 }, + { 20.0, 2.0 } + }, + { + { 30.0, 1.0 }, + { 40.0, 2.0 } + }, + { + { 55.0, 1.0 }, + { 60.0, 2.0 } + } + } + }); + Tensor myOutput = Tensor(Array3D<float,3,1,2> { + { + + {{ 25.0, 3.0 }}, + {{ 70.0, 3.0 }}, + {{ 115.0, 3.0 }} + } + }); + + std::shared_ptr<Node> myReduceSum = ReduceSum({1}, 1); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myReduceSum->forward(); + op->getOutput(0)->print(); + + REQUIRE(*(op->getOutput(0)) == myOutput); + } + SECTION("test 2") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,3,2> { + { + { + { 0.0, 0.0 }, + { 1.0, 1.0 }, + { 2.0, 2.0 } + }, + { + { 3.0, 3.0 }, + { 4.0, 4.0 }, + { 5.0, 5.0 } + }, + { + { 6.0, 6.0 }, + { 7.0, 7.0 }, + { 8.0, 8.0 } + } + } + }); + Tensor myOutput = Tensor(Array3D<float,3,1,1> { + { + + {{ 6.0 }}, + {{ 24.0 }}, + {{ 42.0 }} + } + }); + + std::shared_ptr<Node> myReduceSum = ReduceSum({1, 2}, 1); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myReduceSum->forward(); + myOutput.print(); + op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == myOutput); + } + } + SECTION("not_KeepDims") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { + { + { + { 5.0, 1.0 }, + { 20.0, 2.0 } + }, + { + { 30.0, 1.0 }, + { 40.0, 2.0 } + }, + { + { 55.0, 1.0 }, + { 60.0, 2.0 } + } + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<float,3,2> { + { + { 25.0, 3.0 }, + { 70.0, 3.0 }, + { 115.0, 3.0 } + } + }); + + std::shared_ptr<Node> myReduceSum = ReduceSum({1}, 0); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myReduceSum->forward(); + op->getOutput(0)->print(); + + REQUIRE(*(op->getOutput(0)) == *myOutput); + + } + SECTION("all_axes") { + SECTION("1") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { + { + { + { 5.0, 1.0 }, + { 20.0, 2.0 } + }, + { + { 30.0, 1.0 }, + { 40.0, 2.0 } + }, + { + { 55.0, 1.0 }, + { 60.0, 2.0 } + } + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> { + {219.0} + }); + + std::shared_ptr<Node> myReduceSum = ReduceSum({}, 0); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myReduceSum->forward(); + op->getOutput(0)->print(); + + REQUIRE(*(op->getOutput(0)) == *myOutput); + } + SECTION("2") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<float,5,4> { + {{ 0.004232f, 0.105120f, 0.045124f, 0.009205f}, + { 0.000766f, 0.272162f, 0.503560f, 0.044163f}, + { 0.049755f, 0.000305f, 0.143634f, 0.013253f}, + { 0.096258f, 0.311231f, 0.358143f, 0.000452f}, + { 0.468617f, 0.015693f, 0.145316f, 0.000105f}} + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> { + {2.587094f} + }); + + std::shared_ptr<Node> myReduceSum = ReduceSum({0, 1}, 0); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myReduceSum->forward(); + op->getOutput(0)->print(); + REQUIRE(approxEq<float>(*(op->getOutput(0)), *myOutput)); + } + SECTION("noop_with_empty_axes") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> { + { + { + { 5.0, 1.0 }, + { 20.0, 2.0 } + }, + { + { 30.0, 1.0 }, + { 40.0, 2.0 } + }, + { + { 55.0, 1.0 }, + { 60.0, 2.0 } + } + } + }); + + std::shared_ptr<Node> myReduceSum = ReduceSum({}, 0, 1); + auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator()); + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + myReduceSum->forward(); + op->getOutput(0)->print(); + + REQUIRE(*(op->getOutput(0)) == *myInput); + } + } +} \ No newline at end of file diff --git a/unit_tests/recipies/Test_ConvToMatMul.cpp b/unit_tests/recipies/Test_ConvToMatMul.cpp new file mode 100644 index 0000000000000000000000000000000000000000..05c5eef83394ba8c965dfabae2bcd8c2b4502c79 --- /dev/null +++ b/unit_tests/recipies/Test_ConvToMatMul.cpp @@ -0,0 +1,76 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/recipes/Recipes.hpp" +#include "aidge/operator/Conv.hpp" +#include "aidge/operator/Producer.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/filler/Filler.hpp" +#include "aidge/graph/OpArgs.hpp" +#include <cstddef> + +using namespace Aidge; + +TEST_CASE("[ConvToMatMul] conv") { + auto conv1 = Conv(3, 4, {3, 3}, "conv1"); + auto conv2 = Conv(4, 7, {3, 3}, "conv2", {1, 1}, {1, 1}, true); + auto conv3 = Conv(7, 10, {1, 1}, "conv3", {2, 2}); + + auto g1 = Sequential({ + Producer({2, 3, 13, 24}, "dataProvider"), + conv1, + conv2, + conv3 + }); + + g1->setBackend("cpu"); + g1->forwardDims(); + + // Random initialization of input and weights + uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv1->getOperator())->getInput(0), -10.0, 10.0); + uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv1->getOperator())->getInput(1), -10.0, 10.0); + uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv1->getOperator())->getInput(2), -10.0, 10.0); + uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv2->getOperator())->getInput(1), -10.0, 10.0); + uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv3->getOperator())->getInput(1), -10.0, 10.0); + uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv3->getOperator())->getInput(2), -10.0, 10.0); + + auto s1 = SequentialScheduler(g1); + s1.forward(); + + g1->save("convToMatMul_before"); + + auto g2 = g1->clone(); + g2->forwardDims(); + REQUIRE(convToMatMul(g2) == 3); + + g2->setBackend("cpu"); + + auto s2 = SequentialScheduler(g2); + s2.forward(); + + g2->save("convToMatMul_after"); + + auto g1OutOp = std::static_pointer_cast<OperatorTensor>((*g1->outputNodes().cbegin())->getOperator()); + auto g2OutOp = std::static_pointer_cast<OperatorTensor>((*g1->outputNodes().cbegin())->getOperator()); + REQUIRE(*(g1OutOp->getOutput(0)) == *(g2OutOp->getOutput(0))); + + // Simplify the graph: freeze parameters to allow reshaping of the Producers + for (auto node : g2->getNodes()) { + if (node->type() == Producer_Op::Type && node->name() != "dataProvider") { + std::static_pointer_cast<Producer_Op>(node->getOperator())->constant() = true; + } + } + + constantFolding(g2); + g2->save("convToMatMul_after_folding"); +} diff --git a/version.txt b/version.txt index 7179039691ce07a214e7a815893fee97a97b1422..0d91a54c7d439e84e3dd17d3594f1b2b6737f430 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.2.3 +0.3.0