Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • eclipse/aidge/aidge_backend_cuda
  • hrouis/aidge_backend_cuda
  • oantoni/aidge_backend_cuda
  • jeromeh/aidge_backend_cuda
  • lrakotoarivony/aidge_backend_cuda
  • silvanosky/aidge_backend_cuda
  • maab05/aidge_backend_cuda
  • noamzerah/aidge_backend_cuda
  • farnez/aidge_backend_cuda
  • axelfarr/aidge_backend_cuda
10 results
Show changes
Commits on Source (446)
Showing
with 699 additions and 570 deletions
# common
.cache
# C++ Build # C++ Build
build*/ build*/
install*/ install*/
include/aidge/backend/cuda_version.h
# VSCode # VSCode
.vscode .vscode
...@@ -10,6 +14,9 @@ install*/ ...@@ -10,6 +14,9 @@ install*/
__pycache__ __pycache__
*.pyc *.pyc
*.egg-info *.egg-info
dist*/
wheelhouse/*
_version.py
# Mermaid # Mermaid
*.mmd *.mmd
...@@ -18,4 +25,4 @@ __pycache__ ...@@ -18,4 +25,4 @@ __pycache__
xml*/ xml*/
# ONNX # ONNX
*.onnx *.onnx
\ No newline at end of file
################################################################################ ###############################################################################
# Pre-configured CI/CD for your Aidge module. # Aidge Continuous Integration and Deployment #
# # #
# Three stages are already pre-configured to run on Eclipse Aidge CI: ###############################################################################
# - build: ubuntu_cpp, ubuntu_python and windows_cpp;
# - test: ubuntu_cpp, ubuntu_python and windows_cpp;
# - coverage: ubuntu_cpp and ubuntu_python.
#
# If your project is pure C++ or pure Python, you can remove the "_python" or
# "_cpp" jobs respectively.
# "ubuntu" jobs require an Ubuntu runner with a docker executor with tag
# "docker".
# "windows" jobs require a Windows runner with a docker-windows executor with
# tag "windows".
#
# You can change the docker images in the YML scripts directly. The default
# images are:
# - nvidia/cuda:12.2.0-devel-ubuntu22.04 for Ubuntu jobs;
# - buildtools for Windows jobs, built on top of
# mcr.microsoft.com/windows/servercore:ltsc2022 with Microsoft Visual Studio
# 2022 BuildTools installed.
#
# See Aidge project wiki for more details on how to setup your own docker images
# and Gitlab runners.
################################################################################
stages: stages:
# Build - static_analysis
- build - build
# Unit test stage
- test - test
# Code coverage
- coverage - coverage
- release
- deploy
include: include:
- local: '/.gitlab/ci/_global.gitlab-ci.yml' - project: 'eclipse/aidge/gitlab_shared_files'
- local: '/.gitlab/ci/build.gitlab-ci.yml' ref: 'main'
- local: '/.gitlab/ci/test.gitlab-ci.yml' file:
- local: '/.gitlab/ci/coverage.gitlab-ci.yml' # choose which jobs to run by including the corresponding files.
- '.gitlab/ci/ubuntu_cpp.gitlab-ci.yml'
- '.gitlab/ci/ubuntu_python.gitlab-ci.yml'
- '.gitlab/ci/release/cibuildwheel_ubuntu.gitlab-ci.yml'
# - '.gitlab/ci/windows_cpp.gitlab-ci.yml'
# - '.gitlab/ci/windows_python.gitlab-ci.yml'
# - '.gitlab/ci/release/cibuildwheel_windows.gitlab-ci.yml'
build:ubuntu_python:
# Use cudnn image instead of the cuda one
image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
test:ubuntu_python:
# Use cudnn image instead of the cuda one
image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
coverage:ubuntu_python:
# Use cudnn image instead of the cuda one
image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
.build:ubuntu_cpp:template:
# Use cudnn image instead of the cuda one
image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
test:ubuntu_cpp:
# Use cudnn image instead of the cuda one
image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
coverage:ubuntu_cpp:
# Use cudnn image instead of the cuda one
image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
release:pip:ubuntu:
tags:
- release:cuda
variables:
DOCKER_HOST: unix:///var/run/docker.sock
CIBW_ENVIRONMENT: >-
BUILD_WITH_CUDA=1
AIDGE_DEPENDENCIES='aidge_core aidge_backend_cpu'
AIDGE_INSTALL='/AIDGE_INSTALL_CIBUILDWHEEL'
DOCKER_HOST='unix:///var/run/docker.sock'
ARCH='x86_64'
CUDNN_VERSION='9'
CUDA_MAJOR_VERSION='12'
CUDA_MINOR_VERSION='8'
SEARCH_PATH='/home/gitlab-runner/builds/$CI_RUNNER_SHORT_TOKEN/$CI_CONCURRENT_ID'
CIBW_REPAIR_WHEEL_COMMAND='auditwheel --verbose repair {wheel} -w {dest_dir} --exclude libcudart.so.12 --exclude libcudnn.so.9 --exclude libcublas.so.12 --exclude libcublasLt.so.12'
parallel:
matrix:
# - CIBW_BUILD: "cp38-manylinux_x86_64"
# - CIBW_BUILD: "cp39-manylinux_x86_64"
- CIBW_BUILD: "cp310-manylinux_x86_64"
- CIBW_BUILD: "cp311-manylinux_x86_64"
- CIBW_BUILD: "cp312-manylinux_x86_64"
- CIBW_BUILD: "cp313-manylinux_x86_64"
before_script:
# retrieve aidge dependencies
- DEPENDENCY_JOB="build:ubuntu_python"
- !reference [.ubuntu:download:repositories, before_script] # located in common.gitlab-ci.yml
script:
# - /home/ubuntu/.local/bin/cibuildwheel --output-dir wheelhouse
- cibuildwheel --output-dir wheelhouse
# after_script:
# # Ensure all files are owned by the correct user at the end of the job
# # Note: sudo requires the job to have privileged = true
# - sudo chown -R $(whoami):$(whoami) .
################################################################################
# Centralized definitions of common job parameter values. #
# Parameters with many optional configurations may be in separate files. #
# #
################################################################################
variables:
GIT_SUBMODULE_STRATEGY: recursive
OMP_NUM_THREADS: 4
GIT_SSL_NO_VERIFY: 1
DEBIAN_FRONTEND: noninteractive
# See https://docs.gitlab.com/ee/ci/yaml/workflow.html#switch-between-branch-pipelines-and-merge-request-pipelines
workflow:
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- if: $CI_COMMIT_BRANCH && $CI_OPEN_MERGE_REQUESTS
when: never
- if: $CI_COMMIT_BRANCH
default:
image: nvidia/cuda:12.2.0-devel-ubuntu22.04
before_script:
- apt update
- apt install -y cmake cppcheck python-is-python3 pip git gcovr unzip curl
- apt install -y libcudnn8-dev
include:
- remote: 'https://gitlab.eclipse.org/eclipse/aidge/gitlab_shared_files/-/raw/main/.gitlab/ci/shared_script.gitlab-ci.yml'
build:ubuntu_cpp:
stage: build
needs: []
tags:
- docker
script:
# Download dependencies
- DEPENDENCY_JOB="build:ubuntu_cpp"
# aidge_core
- DEPENDENCY_NAME="aidge_core"
- !reference [.download_dependency, script]
# aidge_backend_cpu
- DEPENDENCY_NAME="aidge_backend_cpu"
- !reference [.download_dependency, script]
# Build current module
- export CMAKE_PREFIX_PATH=../install_cpp
- mkdir -p build_cpp
- cd build_cpp
- cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON ..
- make -j4 all install
artifacts:
expire_in: 1 week
paths:
- build_cpp/
- install_cpp/
build:ubuntu_cpp_g++10:
stage: build
needs: []
tags:
- docker
script:
# Download dependencies
- DEPENDENCY_JOB="build:ubuntu_cpp"
# aidge_core
- DEPENDENCY_NAME="aidge_core"
- !reference [.download_dependency, script]
# aidge_backend_cpu
- DEPENDENCY_NAME="aidge_backend_cpu"
- !reference [.download_dependency, script
# Build current module
- export CMAKE_PREFIX_PATH=../install_cpp
- apt install -y g++-10
- mkdir -p build_cpp
- mkdir -p install_cpp
- cd build_cpp
- export CXX=/usr/bin/g++-10
- cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON ..
- make -j4 all install
build:ubuntu_cpp_g++12:
stage: build
needs: []
tags:
- docker
script:
# Download dependencies
- DEPENDENCY_JOB="build:ubuntu_cpp"
# aidge_core
- DEPENDENCY_NAME="aidge_core"
- !reference [.download_dependency, script]
# aidge_backend_cpu
- DEPENDENCY_NAME="aidge_backend_cpu"
- !reference [.download_dependency, script]
# Build current module
- export CMAKE_PREFIX_PATH=../install_cpp
- apt install -y g++-12
- mkdir -p build_cpp
- mkdir -p install_cpp
- cd build_cpp
- export CXX=/usr/bin/g++-12
- cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON ..
- make -j4 all install
build:ubuntu_cpp_clang12:
stage: build
needs: []
tags:
- docker
script:
# Download dependencies
- DEPENDENCY_JOB="build:ubuntu_cpp"
# aidge_core
- DEPENDENCY_NAME="aidge_core"
- !reference [.download_dependency, script]
# aidge_backend_cpu
- DEPENDENCY_NAME="aidge_backend_cpu"
- !reference [.download_dependency, script]
# Build current module
- export CMAKE_PREFIX_PATH=../install_cpp
- apt install -y clang-12
- mkdir -p build_cpp
- mkdir -p install_cpp
- cd build_cpp
- export CXX=/usr/bin/clang++-12
- cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON ..
- make -j4 all install
build:ubuntu_cpp_clang15:
stage: build
needs: []
tags:
- docker
script:
# Download dependencies
- DEPENDENCY_JOB="build:ubuntu_cpp"
# aidge_core
- DEPENDENCY_NAME="aidge_core"
- !reference [.download_dependency, script]
# aidge_backend_cpu
- DEPENDENCY_NAME="aidge_backend_cpu"
- !reference [.download_dependency, script]
# Build current module
- export CMAKE_PREFIX_PATH=../install_cpp
- apt install -y clang-15
- mkdir -p build_cpp
- mkdir -p install_cpp
- cd build_cpp
- export CXX=/usr/bin/clang++-15
- cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON ..
- make -j4 all install
build:ubuntu_python:
stage: build
needs: []
tags:
- docker
script:
# Download dependencies
- DEPENDENCY_JOB="build:ubuntu_python"
# aidge_core (python)
- DEPENDENCY_NAME="aidge_core"
- !reference [.download_dependency, script]
# aidge_backend_cpu (python)
- DEPENDENCY_NAME="aidge_backend_cpu"
- !reference [.download_dependency, script]
- python3 -m pip install virtualenv
- virtualenv venv
- source venv/bin/activate
- python3 -m pip install -r requirements.txt
- python3 -m pip install .
artifacts:
expire_in: 1 week
paths:
- venv/
# build:windows_cpp:
# stage: build
# needs: []
# tags:
# - windows
# image: buildtools
# before_script:
# # Install Chocolatey
# - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
# # Install dependencies
# - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
# - choco install git -Y
# - choco install python -Y
# - choco install cuda -Y
# # Update PATH
# - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
# script:
# # Download dependencies
# # aidge_core
# - 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_cpp" -o build_artifacts.zip'
# - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force
# - Remove-Item .\build_cpp\ -Recurse
# # aidge_backend_cpu
# - 'curl "https://gitlab.eclipse.org/api/v4/projects/5140/jobs/artifacts/master/download?job=build:windows_cpp" -o build_artifacts.zip'
# - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force
# - Remove-Item .\build_cpp\ -Recurse
# - $env:CMAKE_PREFIX_PATH = '../install_cpp'
# - mkdir -p build_cpp
# - cd build_cpp
# - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug ..
# - cmake --build . -j2
# - cmake --install . --config Debug
# artifacts:
# expire_in: 1 week
# paths:
# - build_cpp/
# - install_cpp/
# build:windows_python:
# stage: build
# needs: []
# tags:
# - windows
# image: buildtools
# before_script:
# # Install Chocolatey
# - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
# # Install dependencies
# - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
# - choco install git -Y
# - choco install python -Y
# - choco install cuda -Y
# # Update PATH
# - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
# script:
# # Download dependencies
# # aidge_core (Python)
# - 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_python" -o build_artifacts.zip'
# - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force
# # aidge_backend_cpu (Python)
# - 'curl "https://gitlab.eclipse.org/api/v4/projects/5140/jobs/artifacts/master/download?job=build:windows_python" -o build_artifacts.zip'
# - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force
# - python -m pip install virtualenv
# - virtualenv venv
# - venv\Scripts\Activate.ps1
# - python -m pip install -r requirements.txt
# - python -m pip install .
# artifacts:
# expire_in: 1 week
# paths:
# - venv/
$ErrorActionPreference = "Stop"
# Retrieve and clean the dependencies string from the environment variable
$AIDGE_DEPENDENCIES = $env:AIDGE_DEPENDENCIES -split ' '
Write-Host "Aidge dependencies : $AIDGE_DEPENDENCIES"
if ( $($AIDGE_DEPENDENCIES.Length) -eq 0) {
Write-Host "- No dependencies provided for current repsitory"
New-Item -ItemType Directory -Force -Path ".\build" | Out-Null
Remove-Item -Path ".\build\*" -Recurse -Force
} else {
Write-Host "Retrieving given dependencies to build current package : $AIDGE_DEPENDENCIES"
foreach ($dep in $($AIDGE_DEPENDENCIES -split " ")) {
Write-Host "Retrieving : $dep"
$curr_loc=$(Get-Location)
Set-Location ../$dep
Get-Location
Get-ChildItem .
New-Item -Path ".\build" -ItemType Directory -Force | Out-Null
Get-ChildItem -Path ".\build" -File | Remove-Item -Force
python -m pip install . -v
Set-Location $curr_loc
}
}
#!/bin/bash
set -e
if [[ "$1" == "" ]]; then
echo "build aidge deps in cibuildwheel container before building wheel."
echo "search path defines where the dependencies will be searched."
echo "Hint : In wheel containers, files are mounted on /host by default."
echo "\nusage : ./cibuildwheel_build_deps_before_build_wheel.sh $search_path"
fi
set -x
if [[ $AIDGE_DEPENDENCIES == "" ]]; then # case for aidge_ core
mkdir -p build # creating build if its not already there to hold the build of cpp files
rm -rf build/* # build from scratch
else
for repo in $AIDGE_DEPENDENCIES ; do # case for other projects
search_path=$1
REPO_PATH=$(find $search_path ! -writable -prune -o -type d \
-name "$repo" \
-not -path "*/install/*" \
-not -path "*/.git/*" \
-not -path "*/.mypy_cache/*" \
-not -path "*/miniconda/*" \
-not -path "*/conda/*" \
-not -path "*/.local/*" \
-not -path "*/lib/*" \
-not -path "*/$repo/$repo/*" \
-not -path "*/proc/*" \
-print -quit)
if [[ -z "$REPO_PATH" ]]; then
echo "ERROR : dependency $repo not found in search_path \"$search_path\". ABORTING."
exit -1
fi
cd $REPO_PATH
mkdir -p build # creating build if its not already there to hold the build of cpp files
rm -rf build/* # build from scratch
pip install . -v
# Give all rights on generated build folder to avoid root issues once out of the Docker
chmod -R a+rwX build/
chmod -R a+rwX *.egg-info/
cd -
done
fi
set +x
set +e
coverage:ubuntu_cpp:
stage: coverage
needs: ["build:ubuntu_cpp"]
tags:
- docker
script:
- cd build_cpp
- ctest --output-on-failure
# HTML report for visualization
- gcovr --html-details --exclude-unreachable-branches -o coverage.html --root ${CI_PROJECT_DIR} --filter '\.\./include/' --filter '\.\./src/'
# Coberta XML report for Gitlab integration
- gcovr --xml-pretty --exclude-unreachable-branches --print-summary -o coverage.xml --root ${CI_PROJECT_DIR} --filter '\.\./include/' --filter '\.\./src/'
coverage: /^\s*lines:\s*\d+.\d+\%/
artifacts:
name: ${CI_JOB_NAME}-${CI_COMMIT_REF_NAME}-${CI_COMMIT_SHA}
expire_in: 2 days
reports:
coverage_report:
coverage_format: cobertura
path: build_cpp/coverage.xml
coverage:ubuntu_python:
stage: coverage
needs: ["build:ubuntu_python"]
tags:
- docker
script:
- source venv/bin/activate
- python3 -m pip install numpy coverage
- cd ${CI_PROJECT_NAME}
# Retrieve the installation path of the module, since it is installed with pip.
- export MODULE_LOCATION=`python -c "import ${CI_PROJECT_NAME} as _; print(_.__path__[0])"`
- python3 -m coverage run --source=$MODULE_LOCATION -m unittest discover -s unit_tests/ -v -b
- python3 -m coverage report
- python3 -m coverage xml
coverage: '/(?i)total.*? (100(?:\.0+)?\%|[1-9]?\d(?:\.\d+)?\%)$/'
artifacts:
reports:
coverage_report:
coverage_format: cobertura
path: ${CI_PROJECT_NAME}/coverage.xml
test:ubuntu_cpp:
stage: test
needs: ["build:ubuntu_cpp"]
tags:
- docker
script:
- cd build_cpp
- ctest --output-junit ctest-results.xml --output-on-failure
artifacts:
reports:
junit: build_cpp/ctest-results.xml
test:ubuntu_python:
stage: test
needs: ["build:ubuntu_python"]
tags:
- docker
script:
- source venv/bin/activate
- cd ${CI_PROJECT_NAME}
- python3 -m pip install numpy unittest-xml-reporting
- python3 -m pip list
# Run on discovery all tests located in core/unit_tests/python
- python3 -m xmlrunner discover -s unit_tests/ -v -b --output-file xmlrunner-results.xml
artifacts:
reports:
junit: ${CI_PROJECT_NAME}/xmlrunner-results.xml
# test:windows_cpp:
# stage: test
# needs: ["build:windows_cpp"]
# tags:
# - windows
# image: buildtools
# before_script:
# # Install Chocolatey
# - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
# # Install dependencies
# - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
# - choco install python -Y
# # Update PATH
# - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
# script:
# - cd build_cpp
# - ctest --output-junit ctest-results.xml --output-on-failure
# artifacts:
# reports:
# junit: build_cpp/ctest-results.xml
# Version 0.5.0 (January 31, 2024)
# Version 0.4.0 (December 6, 2024)
# Version 0.1.0 (January 23, 2024)
Initial release
# CMake >= 3.18 is required for good support of FindCUDAToolkit # CMake >= 3.18 is required for good support of FindCUDAToolkit
cmake_minimum_required(VERSION 3.18) cmake_minimum_required(VERSION 3.18)
file(READ "${CMAKE_SOURCE_DIR}/version.txt" version) set(CMAKE_CXX_STANDARD 14)
file(READ "${CMAKE_SOURCE_DIR}/project_name.txt" project) set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
message(STATUS "Project name: ${project}") file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version)
message(STATUS "Project version: ${version}")
# Parse version.txt to retrieve Major, Minor and Path
string(REGEX MATCH "([0-9]+\\.[0-9]+\\.[0-9]+)" _ MATCHES ${version})
set(PROJECT_VERSION_MAJOR ${CMAKE_MATCH_1})
set(PROJECT_VERSION_MINOR ${CMAKE_MATCH_2})
set(PROJECT_VERSION_PATCH ${CMAKE_MATCH_3})
# Note : project name is {project} and python module name is also {project} project(aidge_backend_cuda
set(module_name _${project}) # target name VERSION ${version}
DESCRIPTION "CUDA implementations of the operators of aidge framework."
LANGUAGES CXX)
message(STATUS "Project name: ${CMAKE_PROJECT_NAME}")
message(STATUS "Project version: ${version}")
execute_process(
COMMAND git rev-parse --short HEAD
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_COMMIT_HASH
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_QUIET
)
message(STATUS "Latest git commit: ${GIT_COMMIT_HASH}")
# Define a preprocessor macro with the Git commit version
project(${project}) # Note : project name is ${CMAKE_PROJECT_NAME} and python module name is also ${CMAKE_PROJECT_NAME}
set(module_name _${CMAKE_PROJECT_NAME}) # target name
############################################## ##############################################
# Define options # Define options
option(PYBIND "python binding" ON) option(PYBIND "python binding (Default: OFF)" OFF)
option(WERROR "Warning as error" OFF) option(WERROR "Warning as error (Default: OFF)" OFF)
option(TEST "Enable tests" ON) option(TEST "Enable tests (Default: ON)" ON)
option(COVERAGE "Enable coverage" OFF) option(COVERAGE "Enable coverage (Default: OFF)" OFF)
option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memory use (over/underflow, memory leak, ...) (Default: OFF)" OFF)
############################################## ##############################################
# Import utils CMakeLists # Import utils CMakeLists
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
include(PybindModuleCreation)
if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE)
Include(CodeCoverage) Include(CodeCoverage)
endif() endif()
enable_language(CUDA)
############################################## ##############################################
# Find system dependencies # Find system dependencies
find_package(CUDAToolkit REQUIRED) ##############################################
# FIND AIDGE Dependencies
if(NOT $ENV{AIDGE_INSTALL} STREQUAL "")
set(CMAKE_INSTALL_PREFIX $ENV{AIDGE_INSTALL})
list(APPEND CMAKE_PREFIX_PATH $ENV{AIDGE_INSTALL})
message(WARNING "Env var AIDGE_INSTALL detected : $ENV{AIDGE_INSTALL}. Set CMAKE_INSTALL_PREFIX to AIDGE_INSTALL & added to CMAKE_PREFIX_PATH"
"\n\tCMAKE_INSTALL_PREFIX = ${CMAKE_INSTALL_PREFIX}"
"\n\tCMAKE_PREFIX_PATH = ${CMAKE_PREFIX_PATH}")
endif()
find_package(aidge_core REQUIRED) find_package(aidge_core REQUIRED)
find_package(aidge_backend_cpu REQUIRED) if(TEST)
find_package(aidge_backend_cpu REQUIRED)
endif()
##########
# CUDA
if(NOT $ENV{CIBUILDWHEEL} STREQUAL "")
message(WARNING "Env var CIBUILDWHEEL detected : currently building for a release job."
"\nSetting manually CUDACXX, PATH & LD_LIBRARY_PATH Variables")
list(APPEND ENV{LD_LIBRARY_PATH} /usr/local/cuda/lib64)
list(APPEND ENV{PATH} /usr/local/cuda/bin)
set(ENV{CUDACXX} /usr/local/cuda/bin/nvcc)
endif()
find_package(CUDAToolkit 12 REQUIRED)
if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()
if(NOT DEFINED CMAKE_CUDA_ARCHITECURE)
set(CMAKE_CUDA_ARCHITECTURE native)
endif()
message(STATUS "Cuda compiler version = ${CMAKE_CUDA_COMPILER_VERSION}")
# Define a preprocessor macro with the Cuda compiler version
add_definitions(-DCUDA_COMPILER_VERSION="${CMAKE_CUDA_COMPILER_VERSION}")
message(STATUS "CUDA STANDARD : ${CMAKE_CUDA_STANDARD}")
message(STATUS "CUDA ARCHITECTURE : ${CMAKE_CUDA_ARCHITECTURES}")
enable_language(CUDA)
############################################## ##############################################
# Create target and set properties # Create target and set properties
file(GLOB_RECURSE src_files "src/*.cpp" "src/*.cu") file(GLOB_RECURSE src_files "src/*.cpp" "src/*.cu")
file(GLOB_RECURSE inc_files "include/*.hpp") file(GLOB_RECURSE inc_files "include/*.hpp")
add_library(${module_name} ${src_files} ${inc_files}) add_library(${module_name} ${src_files} ${inc_files})
# PYTHON BINDING
if (PYBIND)
# Handles Python + pybind11 headers dependencies
include(PybindModuleCreation)
# creates a target of the same name as CMAKE_PROJECT_NAME
generate_python_binding(${CMAKE_PROJECT_NAME} ${module_name}) # the python bindings module has the same name as the project.
target_link_libraries(${module_name}
PUBLIC
pybind11::pybind11
PRIVATE
Python::Module
)
endif()
message(STATUS "Creating ${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/backend/cuda_version.h")
# Generate version.h file from config file version.h.in
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/backend/version.h.in"
"${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/backend/cuda_version.h"
)
target_link_libraries(${module_name} target_link_libraries(${module_name}
PUBLIC PUBLIC
_aidge_core # _ is added because we link the target not the project _aidge_core # _ is added because we link the target not the project
_aidge_backend_cpu # _ is added because we link the target not the project PRIVATE
CUDA::cudart CUDA::cublas
cudnn cudnn
CUDA::cudart
) )
if( ${ENABLE_ASAN} )
message("Building ${module_name} with ASAN.")
set(SANITIZE_FLAGS -fsanitize=address -fno-omit-frame-pointer)
target_link_libraries(${module_name}
PUBLIC
-fsanitize=address
)
target_compile_options(${module_name}
PRIVATE
${SANITIZE_FLAGS}
)
endif()
if(TEST)
target_link_libraries(${module_name}
PUBLIC
_aidge_backend_cpu # _ is added because we link the target not the project
)
endif()
#Set target properties #Set target properties
target_include_directories(${module_name} target_include_directories(${module_name}
PUBLIC PUBLIC
...@@ -62,27 +161,9 @@ target_include_directories(${module_name} ...@@ -62,27 +161,9 @@ target_include_directories(${module_name}
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR}/src
) )
if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()
set_property(TARGET ${module_name} PROPERTY POSITION_INDEPENDENT_CODE ON) set_property(TARGET ${module_name} PROPERTY POSITION_INDEPENDENT_CODE ON)
set_target_properties(${module_name} PROPERTIES CUDA_SEPARABLE_COMPILATION ON) set_target_properties(${module_name} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
# PYTHON BINDING
if (PYBIND)
generate_python_binding(${project} ${module_name})
# Handles Python + pybind11 headers dependencies
target_link_libraries(${module_name}
PUBLIC
pybind11::pybind11
PRIVATE
Python::Python
)
endif()
target_compile_features(${module_name} PRIVATE cxx_std_14) target_compile_features(${module_name} PRIVATE cxx_std_14)
target_compile_options(${module_name} PRIVATE target_compile_options(${module_name} PRIVATE
...@@ -101,11 +182,10 @@ endif() ...@@ -101,11 +182,10 @@ endif()
############################################## ##############################################
# Installation instructions # Installation instructions
include(GNUInstallDirs) include(GNUInstallDirs)
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/${project}) set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/${CMAKE_PROJECT_NAME})
install(TARGETS ${module_name} EXPORT ${project}-targets install(TARGETS ${module_name} EXPORT ${CMAKE_PROJECT_NAME}-targets
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
...@@ -116,42 +196,44 @@ install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) ...@@ -116,42 +196,44 @@ install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
#Export the targets to a script #Export the targets to a script
install(EXPORT ${project}-targets install(EXPORT ${CMAKE_PROJECT_NAME}-targets
FILE "${project}-targets.cmake" FILE "${CMAKE_PROJECT_NAME}-targets.cmake"
DESTINATION ${INSTALL_CONFIGDIR} DESTINATION ${INSTALL_CONFIGDIR}
# COMPONENT ${module_name} # COMPONENT ${module_name}
) )
#Create a ConfigVersion.cmake file #Create a ConfigVersion.cmake file
include(CMakePackageConfigHelpers) include(CMakePackageConfigHelpers)
write_basic_package_version_file( write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/${project}-config-version.cmake" "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}-config-version.cmake"
VERSION ${version} VERSION ${version}
COMPATIBILITY AnyNewerVersion COMPATIBILITY AnyNewerVersion
) )
configure_package_config_file("${project}-config.cmake.in" configure_package_config_file("${CMAKE_PROJECT_NAME}-config.cmake.in"
"${CMAKE_CURRENT_BINARY_DIR}/${project}-config.cmake" "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}-config.cmake"
INSTALL_DESTINATION ${INSTALL_CONFIGDIR} INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
) )
#Install the config, configversion and custom find modules #Install the config, configversion and custom find modules
install(FILES install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/${project}-config.cmake" "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}-config.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/${project}-config-version.cmake" "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}-config-version.cmake"
DESTINATION ${INSTALL_CONFIGDIR} DESTINATION ${INSTALL_CONFIGDIR}
) )
############################################## ##############################################
## Exporting from the build tree ## Exporting from the build tree
export(EXPORT ${project}-targets export(EXPORT ${CMAKE_PROJECT_NAME}-targets
FILE "${CMAKE_CURRENT_BINARY_DIR}/${project}-targets.cmake") FILE "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}-targets.cmake")
############################################## ##############################################
## Add test ## Add test
if(TEST) if(TEST)
if(PYBIND)
message(FATAL_ERROR "PYBIND and TEST are both enabled. But cannot compile with catch_2.\nChoose between pybind and Catch2 for compilation.")
endif()
enable_testing() enable_testing()
add_subdirectory(unit_tests) add_subdirectory(unit_tests)
endif() endif()
include README.md LICENCE
recursive-include aidge_backend_cuda *.py
recursive-exclude aidge_backend_cuda/unit_tests *.py
recursive-include include *.hpp
recursive-include src *.cpp
recursive-include python_binding *.cpp
include CMakeLists.txt
...@@ -3,15 +3,30 @@ ...@@ -3,15 +3,30 @@
# Aidge CUDA library # Aidge CUDA library
You can find in this folder the library that implements the CUDA operators. You can find in this folder the library that implements the CUDA operators.
[TOC]
## Pip installation ## Installation
You will need to install first the aidge_core library before installing aidge_backend_cuda. ### Dependencies
Also, make sure that the install path was set before installing aidge_core library. - [CUDA ToolKit 12.4](https://developer.nvidia.com/cuda-12-4-0-download-archive)
Then run in your python environnement : - [CUDnn9](https://developer.nvidia.com/cudnn-downloads) make sure to install the CUDA 12 compatible version
- `GCC`
- `Make`/`Ninja`
- `CMake`
- `Python` (optional, if you have no intend to use this library in python with pybind)
#### Aidge dependencies
- `aidge_core`
- `aidge_backend_cpu`
### Pip installation
``` bash ``` bash
pip install . -v pip install . -v
``` ```
> **TIPS:** Use environment variables to change compilation options:
> - `AIDGE_INSTALL`: to set the installation folder. Defaults to /usr/local/lib. :warning: This path must be identical to aidge_core install path.
> - `AIDGE_PYTHON_BUILD_TYPE`: to set the compilation mode to **Debug** or **Release**
> - `AIDGE_BUILD_GEN`: to set the build backend with
## Standard C++ Compilation ## Standard C++ Compilation
......
from aidge_backend_cuda.aidge_backend_cuda import * # import so generated by PyBind from aidge_backend_cuda.aidge_backend_cuda import * # import so generated by PyBind
from . import benchmark
import time
import numpy as np
import aidge_core
import aidge_backend_cuda
def measure_inference_time(model: aidge_core.GraphView, input_data: list[str, np.ndarray], nb_warmup: int = 10, nb_iterations: int = 50) -> list[float]:
# update model and inputs backend
model.set_backend("cuda")
ordered_inputs = [aidge_core.Tensor(i[1]) for i in input_data]
for ordered_input in ordered_inputs:
ordered_input.set_backend("cuda")
scheduler = aidge_core.SequentialScheduler(model)
scheduler.generate_scheduling()
timings = []
# Warm-up runs.
for i in range(nb_warmup + nb_iterations):
if i < nb_warmup:
scheduler.forward(forward_dims=False, data=ordered_inputs)
else:
start = time.process_time()
scheduler.forward(forward_dims=False, data=ordered_inputs)
end = time.process_time()
timings.append((end - start))
return timings
def compute_output(model: aidge_core.GraphView, input_data: list[str, np.ndarray]) -> list[np.ndarray]:
# update model and inputs backend
model.set_backend("cuda", device = 1)
ordered_inputs = [aidge_core.Tensor(i[1]) for i in input_data]
for ordered_input in ordered_inputs:
ordered_input.set_backend("cuda", device = 1)
scheduler = aidge_core.SequentialScheduler(model)
scheduler.generate_scheduling()
scheduler.forward(forward_dims=False, data=ordered_inputs)
outs = []
for pair in model.get_ordered_outputs():
t = pair[0].get_operator().get_output(pair[1])
t.set_backend("cpu")
outs.append(t)
return [np.array(out) for out in outs]
\ No newline at end of file
...@@ -6,15 +6,17 @@ import numpy as np ...@@ -6,15 +6,17 @@ import numpy as np
class test_tensor(unittest.TestCase): class test_tensor(unittest.TestCase):
"""Test tensor binding """Test tensor binding"""
"""
def setUp(self): def setUp(self):
pass pass
def tearDown(self): def tearDown(self):
pass pass
def test_getavailable_backends(self): def test_getavailable_backends(self):
self.assertTrue("cuda" in aidge_core.Tensor.get_available_backends()) self.assertTrue("cuda" in aidge_core.Tensor.get_available_backends())
if __name__ == '__main__':
if __name__ == "__main__":
unittest.main() unittest.main()
function(generate_python_binding name target_to_bind) function(generate_python_binding pybind_module_name target_to_bind)
add_definitions(-DPYBIND) add_definitions(-DPYBIND)
Include(FetchContent) Include(FetchContent)
set(PYBIND_VERSION v2.13.6)
set(PYBIND11_FINDPYTHON ON)
message(STATUS "Retrieving pybind ${PYBIND_VERSION} from git")
FetchContent_Declare( FetchContent_Declare(
PyBind11 PyBind11
GIT_REPOSITORY https://github.com/pybind/pybind11.git GIT_REPOSITORY https://github.com/pybind/pybind11.git
GIT_TAG v2.10.4 # or a later release GIT_TAG ${PYBIND_VERSION} # or a later release
) )
# Use the New FindPython mode, recommanded. Requires CMake 3.15+ # Use the New FindPython mode, recommanded. Requires CMake 3.15+
find_package(Python COMPONENTS Interpreter Development) find_package(Python COMPONENTS Interpreter Development.Module)
FetchContent_MakeAvailable(PyBind11) FetchContent_MakeAvailable(PyBind11)
message(STATUS "Creating binding for module ${name}") message(STATUS "Creating binding for module ${pybind_module_name}")
file(GLOB_RECURSE pybind_src_files "python_binding/*.cpp") file(GLOB_RECURSE pybind_src_files "python_binding/*.cpp")
pybind11_add_module(${name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install pybind11_add_module(${pybind_module_name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install
target_include_directories(${name} PUBLIC "python_binding") target_include_directories(${pybind_module_name} PRIVATE "python_binding")
target_link_libraries(${name} PUBLIC ${target_to_bind}) target_link_libraries(${pybind_module_name}
PRIVATE
${target_to_bind}
CUDA::cublas
cudnn
CUDA::cudart
)
set_property(TARGET ${pybind_module_name} PROPERTY POSITION_INDEPENDENT_CODE ON)
set_target_properties(${pybind_module_name} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(${pybind_module_name} PROPERTIES
CMAKE_SHARED_LINKER_FLAGS "-Wl,--exclude-libs,ALL"
)
set_target_properties(${module} PROPERTIES INSTALL_RPATH "")
endfunction() endfunction()
...@@ -11,9 +11,42 @@ ...@@ -11,9 +11,42 @@
#ifndef AIDGE_BACKEND_CUDA_IMPORTS_H_ #ifndef AIDGE_BACKEND_CUDA_IMPORTS_H_
#define AIDGE_BACKEND_CUDA_IMPORTS_H_ #define AIDGE_BACKEND_CUDA_IMPORTS_H_
#include "aidge/backend/cuda_version.h"
#include "aidge/backend/cuda/data/TensorImpl.hpp" #include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/OperatorImpl.hpp"
#include "aidge/backend/cuda/operator/AbsImpl.hpp"
#include "aidge/backend/cuda/operator/AddImpl.hpp"
#include "aidge/backend/cuda/operator/ArgMaxImpl.hpp"
#include "aidge/backend/cuda/operator/AvgPoolingImpl.hpp"
#include "aidge/backend/cuda/operator/BatchNormImpl.hpp"
#include "aidge/backend/cuda/operator/ConvImpl.hpp" #include "aidge/backend/cuda/operator/ConvImpl.hpp"
#include "aidge/backend/cuda/operator/ProducerImpl.hpp" #include "aidge/backend/cuda/operator/ClipImpl.hpp"
#include "aidge/backend/cuda/operator/DivImpl.hpp"
#include "aidge/backend/cuda/operator/EqualImpl.hpp"
#include "aidge/backend/cuda/operator/ErfImpl.hpp"
#include "aidge/backend/cuda/operator/FCImpl.hpp"
#include "aidge/backend/cuda/operator/GlobalAveragePoolingImpl.hpp"
#include "aidge/backend/cuda/operator/ILayerNormImpl.hpp"
#include "aidge/backend/cuda/operator/LRNImpl.hpp"
#include "aidge/backend/cuda/operator/LnImpl.hpp"
#include "aidge/backend/cuda/operator/MaxPoolingImpl.hpp"
#include "aidge/backend/cuda/operator/MatMulImpl.hpp"
#include "aidge/backend/cuda/operator/MulImpl.hpp"
#include "aidge/backend/cuda/operator/PadImpl.hpp"
#include "aidge/backend/cuda/operator/PowImpl.hpp"
#include "aidge/backend/cuda/operator/ReduceMeanImpl.hpp"
#include "aidge/backend/cuda/operator/ReduceSumImpl.hpp"
#include "aidge/backend/cuda/operator/ReLUImpl.hpp"
#include "aidge/backend/cuda/operator/RoundImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftGELUImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftMaxImpl.hpp"
#include "aidge/backend/cuda/operator/SigmoidImpl.hpp"
#include "aidge/backend/cuda/operator/SoftmaxImpl.hpp"
#include "aidge/backend/cuda/operator/SqrtImpl.hpp"
#include "aidge/backend/cuda/operator/SubImpl.hpp"
#include "aidge/backend/cuda/operator/TanhImpl.hpp"
#endif /* AIDGE_BACKEND_CUDA_IMPORTS_H_ */ #endif /* AIDGE_BACKEND_CUDA_IMPORTS_H_ */
\ No newline at end of file
#ifndef AIDGE_BACKEND_CUDA_DATA_TENSORIMPL_H_ #ifndef AIDGE_BACKEND_CUDA_DATA_TENSORIMPL_H_
#define AIDGE_BACKEND_CUDA_DATA_TENSORIMPL_H_ #define AIDGE_BACKEND_CUDA_DATA_TENSORIMPL_H_
#include <cstddef> // std::size_t
#include <memory>
#include <string>
#include <vector>
#include <cuda.h>
#include <type_traits> // std::enable_if, std::is_same
#include "aidge/backend/TensorImpl.hpp" #include "aidge/backend/TensorImpl.hpp"
#include "aidge/data/Tensor.hpp" #include "aidge/data/Tensor.hpp"
#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Registrar.hpp"
...@@ -13,30 +21,33 @@ ...@@ -13,30 +21,33 @@
namespace Aidge { namespace Aidge {
template <typename SRC_T, typename DST_T> template <typename SRC_T, typename DST_T>
void thrust_copy(const SRC_T* /*srcData*/, DST_T* /*dstData*/, size_t /*size*/); void thrust_copy(const SRC_T* srcData, DST_T* dstData, size_t size);
template <typename SRC_T, typename std::enable_if<!std::is_same<half_float::half, SRC_T>::value>::type* = nullptr>
void thrust_copy(const SRC_T* srcData, half_float::half* dstData, size_t size);
template <typename DST_T, typename std::enable_if<!std::is_same<half_float::half, DST_T>::value>::type* = nullptr>
void thrust_copy(const half_float::half* srcData, DST_T* dstData, size_t size);
template <>
void thrust_copy(const half_float::half* srcData, half_float::half* dstData, size_t size);
/** /**
* @brief Abstract class for the TensorImpl_cuda class template. * @brief Abstract class for the TensorImpl_cuda class template.
* @details Its purpose is to provide access to base methods that are specific * @details Its purpose is to provide access to base methods that are specific
* to the implementation (which are therefore not present in the TensorImpl * to the implementation (which are therefore not present in the TensorImpl
* class), but whose data type does not need to be known. * class), but whose data type does not need to be known.
*/ */
class TensorImpl_cuda_ { class TensorImpl_cuda_ {
protected:
mutable cudnnTensorDescriptor_t mCudnnTensor = nullptr;
public: public:
/** /**
* @brief Return the CuDNN tensor descriptor of the tensor. * @brief Return the CuDNN tensor descriptor of the tensor.
* @details This method uses lazy initialization for the descriptor * @details This method uses lazy initialization for the descriptor
* (which is therefore mutable in the derived class). * (which is therefore mutable in the derived class).
* @return cudnnTensorDescriptor_t CuDNN tensor descriptor. * @return cudnnTensorDescriptor_t CuDNN tensor descriptor.
*/ */
virtual const cudnnTensorDescriptor_t& getCudnnTensorDesc() const = 0; virtual const cudnnTensorDescriptor_t& getCudnnTensorDesc(const Tensor& tensor) const = 0;
virtual ~TensorImpl_cuda_() {
if (mCudnnTensor != nullptr)
cudnnDestroyTensorDescriptor(mCudnnTensor);
}
}; };
template <class T> template <class T>
...@@ -54,151 +65,186 @@ private: ...@@ -54,151 +65,186 @@ private:
} }
private: private:
const Tensor &mTensor; // Impl needs to access Tensor information, but is not
// supposed to change it!
/// Pointer to the data and its capacity
future_std::span<T> mData; future_std::span<T> mData;
/// If this instance own the data, std::unique_ptr manages it /// If this instance own the data, std::unique_ptr manages it
std::unique_ptr<T, decltype(&cudaDelete)> mDataOwner; std::unique_ptr<T, decltype(&cudaDelete)> mDataOwner;
mutable cudnnTensorDescriptor_t mCudnnTensor = nullptr;
public: public:
static constexpr const char *Backend = "cuda"; static const std::string Backend;
TensorImpl_cuda(DeviceIdx_t device, std::vector<DimSize_t> dims) : TensorImpl(Backend, device, dims), mDataOwner(nullptr, cudaDelete) {}
TensorImpl_cuda(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor), mDataOwner(nullptr, cudaDelete) {}
bool operator==(const TensorImpl &otherImpl) const override final; bool operator==(const TensorImpl &otherImpl) const override final;
static std::unique_ptr<TensorImpl_cuda> create(const Tensor &tensor) { static std::shared_ptr<TensorImpl_cuda> create(DeviceIdx_t device, std::vector<DimSize_t> dims) {
return std::make_unique<TensorImpl_cuda<T>>(tensor); return std::make_shared<TensorImpl_cuda<T>>(device, dims);
} }
// native interface // native interface
const future_std::span<T>& data() const { return mData; } const future_std::span<T>& data() const { return mData; }
std::size_t size() const override { return mData.size(); } inline std::size_t capacity() const noexcept override { return mData.size(); }
std::size_t scalarSize() const override { return sizeof(T); }
std::size_t scalarSize() const noexcept override { return sizeof(T); }
void setDevice(DeviceIdx_t device) override { void zeros() override final {
mDevice = device; CHECK_CUDA_STATUS(cudaMemset(rawPtr(), T(0), mNbElts * sizeof(T)));
} }
void copy(const void *src, NbElts_t length, NbElts_t offset = 0) override { void copy(const void *src, NbElts_t length, NbElts_t offset = 0) override {
void* dst = static_cast<void*>(static_cast<T*>(rawPtr()) + offset); AIDGE_ASSERT(offset + length <= mNbElts, "TensorImpl_cuda<{}>::copy(): copy offset ({}) + length ({}) is above capacity ({})", typeid(T).name(), offset, length, mNbElts);
CHECK_CUDA_STATUS(cudaMemcpy(dst, src, length * sizeof(T), cudaMemcpyDeviceToDevice)); const T* srcT = static_cast<const T *>(src);
T* dstT = static_cast<T *>(rawPtr(offset));
AIDGE_ASSERT(dstT < srcT || dstT >= srcT + length, "TensorImpl_cuda<{}>::copy(): overlapping copy is not supported", typeid(T).name());
CHECK_CUDA_STATUS(cudaMemcpy(dstT, srcT, length * sizeof(T), cudaMemcpyDeviceToDevice));
} }
void copyCast(const void *src, NbElts_t length, const DataType srcDt) override { void copyCast(const void *src, const DataType srcDt, NbElts_t length, NbElts_t offset = 0) override {
if (length == 0) { if (length == 0) {
return; return;
} }
AIDGE_ASSERT(length <= mData.size() || length <= mTensor.size(), "copy length is above capacity"); AIDGE_ASSERT(offset + length <= mNbElts, "TensorImpl_cuda<{}>::copyCast(): copy offset ({}) + length ({}) is above capacity ({})", typeid(T).name(), offset, length, mNbElts);
if (srcDt == DataType::Float64) { switch (srcDt) {
case DataType::Float64:
thrust_copy(static_cast<const double*>(src), thrust_copy(static_cast<const double*>(src),
static_cast<T*>(rawPtr()), static_cast<T*>(rawPtr(offset)),
length); static_cast<size_t>(length));
} break;
else if (srcDt == DataType::Float32) { case DataType::Float32:
thrust_copy(static_cast<const float*>(src), thrust_copy(static_cast<const float*>(src),
static_cast<T*>(rawPtr()), static_cast<T*>(rawPtr(offset)),
length); static_cast<size_t>(length));
} break;
else if (srcDt == DataType::Float16) { case DataType::Float16:
thrust_copy(static_cast<const half_float::half*>(src), thrust_copy(static_cast<const half_float::half*>(src),
static_cast<T*>(rawPtr()), static_cast<T*>(rawPtr(offset)),
length); static_cast<size_t>(length));
} break;
else if (srcDt == DataType::Int64) { case DataType::Int64:
thrust_copy(static_cast<const int64_t*>(src), thrust_copy(static_cast<const int64_t*>(src),
static_cast<T*>(rawPtr()), static_cast<T*>(rawPtr(offset)),
length); static_cast<size_t>(length));
} break;
else if (srcDt == DataType::UInt64) { case DataType::UInt64:
thrust_copy(static_cast<const uint64_t*>(src), thrust_copy(static_cast<const uint64_t*>(src),
static_cast<T*>(rawPtr()), static_cast<T*>(rawPtr(offset)),
length); static_cast<size_t>(length));
} break;
else if (srcDt == DataType::Int32) { case DataType::Int32:
thrust_copy(static_cast<const int32_t*>(src), thrust_copy(static_cast<const int32_t*>(src),
static_cast<T*>(rawPtr()), static_cast<T*>(rawPtr(offset)),
length); static_cast<size_t>(length));
} break;
else if (srcDt == DataType::UInt32) { case DataType::UInt32:
thrust_copy(static_cast<const uint32_t*>(src), thrust_copy(static_cast<const uint32_t*>(src),
static_cast<T*>(rawPtr()), static_cast<T*>(rawPtr(offset)),
length); static_cast<size_t>(length));
} break;
else if (srcDt == DataType::Int16) { case DataType::Int16:
thrust_copy(static_cast<const int16_t*>(src), thrust_copy(static_cast<const int16_t*>(src),
static_cast<T*>(rawPtr()), static_cast<T*>(rawPtr(offset)),
length); static_cast<size_t>(length));
} break;
else if (srcDt == DataType::UInt16) { case DataType::UInt16:
thrust_copy(static_cast<const uint16_t*>(src), thrust_copy(static_cast<const uint16_t*>(src),
static_cast<T*>(rawPtr()), static_cast<T*>(rawPtr(offset)),
length); static_cast<size_t>(length));
} break;
else if (srcDt == DataType::Int8) { case DataType::Int8:
thrust_copy(static_cast<const int8_t*>(src), thrust_copy(static_cast<const int8_t*>(src),
static_cast<T*>(rawPtr()), static_cast<T*>(rawPtr(offset)),
length); static_cast<size_t>(length));
} break;
else if (srcDt == DataType::UInt8) { case DataType::UInt8:
thrust_copy(static_cast<const uint8_t*>(src), thrust_copy(static_cast<const uint8_t*>(src),
static_cast<T*>(rawPtr()), static_cast<T*>(rawPtr(offset)),
length); static_cast<size_t>(length));
} break;
else { default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Unsupported data type."); AIDGE_THROW_OR_ABORT(std::runtime_error, "TensorImpl_cuda<{}>::copyCast(): unsupported data type {}.", typeid(T).name(), srcDt);
break;
} }
} }
void copyFromDevice(const void *src, NbElts_t length, const std::pair<std::string, DeviceIdx_t>& device) override { void copyFromDevice(const void *src, const std::pair<std::string, DeviceIdx_t>& device, NbElts_t length, NbElts_t offset = 0) override {
AIDGE_ASSERT(length <= mData.size() || length <= mTensor.size(), "copy length is above capacity"); AIDGE_ASSERT(offset + length <= mNbElts, "TensorImpl_cuda<{}>::copyFromDevice(): copy offset ({}) + length ({}) is above capacity ({})", typeid(T).name(), offset, length, mNbElts);
CHECK_CUDA_STATUS(cudaMemcpy(rawPtr(), src, length * sizeof(T), cudaMemcpyDeviceToDevice)); CHECK_CUDA_STATUS(cudaMemcpy(rawPtr(offset), src, length * sizeof(T), cudaMemcpyDeviceToDevice));
} }
void copyFromHost(const void *src, NbElts_t length) override { void copyFromHost(const void *src, NbElts_t length, NbElts_t offset = 0) override {
AIDGE_ASSERT(length <= mData.size() || length <= mTensor.size(), "copy length is above capacity"); AIDGE_ASSERT(offset + length <= mNbElts, "TensorImpl_cuda<{}>::copyFromHost(): copy offset ({}) + length ({}) is above capacity ({})", typeid(T).name(), offset, length, mNbElts);
CHECK_CUDA_STATUS(cudaMemcpy(rawPtr(), src, length * sizeof(T), cudaMemcpyHostToDevice)); CHECK_CUDA_STATUS(cudaMemcpy(rawPtr(offset), src, length * sizeof(T), cudaMemcpyHostToDevice));
} }
void copyToHost(void *dst, NbElts_t length) const override { void copyToHost(void *dst, NbElts_t length, NbElts_t offset = 0) const override {
AIDGE_ASSERT(length <= mData.size() || length <= mTensor.size(), "copy length is above capacity"); AIDGE_ASSERT(offset + length <= mNbElts, "TensorImpl_cuda<{}>::copyToHost(): copy offset ({}) + length ({}) is above capacity ({})", typeid(T).name(), offset, length, mNbElts);
CHECK_CUDA_STATUS(cudaMemcpy(dst, rawPtr(), length * sizeof(T), cudaMemcpyDeviceToHost)); CHECK_CUDA_STATUS(cudaMemcpy(dst, rawPtr(offset), length * sizeof(T), cudaMemcpyDeviceToHost));
} }
void *rawPtr(NbElts_t offset = 0) override { void *rawPtr(NbElts_t offset = 0) override {
cudaSetDevice(mDevice);
lazyInit(); lazyInit();
return (mData.data() + offset); return (mData.data() + offset);
}; };
const void *rawPtr(NbElts_t offset = 0) const override { const void *rawPtr(NbElts_t offset = 0) const override {
AIDGE_ASSERT(mData.size() >= mTensor.size(), "accessing uninitialized const rawPtr"); AIDGE_ASSERT(mData.size() >= mNbElts, "TensorImpl_cuda<{}>::rawPtr(): accessing uninitialized const rawPtr", typeid(T).name());
return (mData.data() + offset); return (mData.data() + offset);
}; };
const cudnnTensorDescriptor_t& getCudnnTensorDesc() const override { const cudnnTensorDescriptor_t& getCudnnTensorDesc(const Tensor& tensor) const override {
if (mCudnnTensor == nullptr) { if (mCudnnTensor == nullptr) {
CHECK_CUDNN_STATUS(cudnnCreateTensorDescriptor(&mCudnnTensor)); CHECK_CUDNN_STATUS(cudnnCreateTensorDescriptor(&mCudnnTensor));
if (mTensor.size() > 0) { if (tensor.size() > 0) {
/** /**
** cudNN Tensors are restricted to having at least 4 dimensions : ** cudNN Tensors are restricted to having at least 4 dimensions :
** When working with lower dimensionsal data, unused dimensions are set to 1. ** When working with lower dimensionsal data, unused dimensions are set to 1.
** Referes to the cudnnSetTensorNdDescriptor documentation from : ** Referes to the cudnnSetTensorNdDescriptor documentation from :
** https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html ** https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html
**/ **/
std::vector<int> dims(mTensor.dims().begin(), mTensor.dims().end()); std::vector<int> dims(tensor.dims().cbegin(), tensor.dims().cend());
std::vector<int> strides(tensor.strides().cbegin(), tensor.strides().cend());
if (dims.size() < 4) if (dims.size() < 4) {
dims.resize(4, 1); dims.resize(4, 1);
strides.resize(4, 1);
}
CHECK_CUDNN_STATUS(cudnnSetTensorNdDescriptor(mCudnnTensor,
CudaContext::data_type<T>::value,
dims.size(),
&dims[0],
&strides[0]));
}
}
else {
// Compare if the shape of the tensor has changed
cudnnDataType_t currentDataType;
int currentNbDims;
// Since we don't know the nb dims of the current tensor, we init with CUDNN_DIM_MAX then remove the trailing zeros
std::vector<int> currentDims(CUDNN_DIM_MAX);
std::vector<int> currentStrides(CUDNN_DIM_MAX);
CHECK_CUDNN_STATUS(cudnnGetTensorNdDescriptor(mCudnnTensor, CUDNN_DIM_MAX, &currentDataType, &currentNbDims, currentDims.data(), currentStrides.data()));
// Remove the trailing zeros
currentDims.erase(std::find_if(currentDims.rbegin(), currentDims.rend(), [](int x) { return x != 0; }).base(),
currentDims.end());
std::vector<int> dims(tensor.dims().cbegin(), tensor.dims().cend());
if (dims.size() < 4) {
dims.resize(4, 1);
}
std::vector<int> strides(dims.size(), 1); // Update descriptor if shape has changed
if (dims!=currentDims) {
std::vector<int> strides(tensor.strides().cbegin(), tensor.strides().cend());
for (size_t dim = 1; dim < dims.size(); ++dim) { if (strides.size() < 4) {
strides[dims.size() - dim - 1] = strides[dims.size() - dim] * dims[dims.size() - dim]; strides.resize(4, 1);
} }
CHECK_CUDNN_STATUS(cudnnSetTensorNdDescriptor(mCudnnTensor, CHECK_CUDNN_STATUS(cudnnSetTensorNdDescriptor(mCudnnTensor,
...@@ -208,42 +254,42 @@ public: ...@@ -208,42 +254,42 @@ public:
&strides[0])); &strides[0]));
} }
} }
return mCudnnTensor; return mCudnnTensor;
} }
void setRawPtr(void *ptr, NbElts_t length) override final { void setRawPtr(void *ptr, NbElts_t length) override final {
AIDGE_ASSERT(length >= mTensor.size(), "trying to set raw pointer of insufficient capacity"); AIDGE_ASSERT(length >= mNbElts, "TensorImpl_cuda<{}>::setRawPtr(): trying to set raw pointer (length: {}) of insufficient capacity (required: {})", typeid(T).name(), length, mNbElts);
mData = future_std::span<T>(static_cast<T *>(ptr), length); mData = future_std::span<T>(static_cast<T *>(ptr), length);
mDataOwner.reset(); mDataOwner.reset();
}; };
virtual ~TensorImpl_cuda() { virtual ~TensorImpl_cuda() = default;
if (mCudnnTensor != nullptr)
cudnnDestroyTensorDescriptor(mCudnnTensor);
}
private: private:
void lazyInit() { void lazyInit() {
if (mData.size() < mTensor.size()) { if (mData.size() < mNbElts) {
// Need more data, a re-allocation will occur // Need more data, a re-allocation will occur
AIDGE_ASSERT(mData.empty() || mDataOwner != nullptr, "trying to enlarge non-owned data"); AIDGE_ASSERT(mData.empty() || mDataOwner != nullptr, "TensorImpl_cuda<{}>: trying to enlarge non-owned data", typeid(T).name());
mDataOwner.reset(cudaAlloc(mTensor.size())); mDataOwner.reset(cudaAlloc(mNbElts));
mData = future_std::span<T>(mDataOwner.get(), mTensor.size()); mData = future_std::span<T>(mDataOwner.get(), mNbElts);
} }
} }
}; };
namespace { template <typename T>
static Registrar<Tensor> registrarTensorImpl_cuda_Float64( const std::string TensorImpl_cuda<T>::Backend = "cuda";
{"cuda", DataType::Float64}, Aidge::TensorImpl_cuda<double>::create);
static Registrar<Tensor> registrarTensorImpl_cuda_Float32( REGISTRAR(Tensor, {"cuda", DataType::Float64}, Aidge::TensorImpl_cuda<double>::create);
{"cuda", DataType::Float32}, Aidge::TensorImpl_cuda<float>::create); REGISTRAR(Tensor, {"cuda", DataType::Float32}, Aidge::TensorImpl_cuda<float>::create);
static Registrar<Tensor> registrarTensorImpl_cuda_Float16( REGISTRAR(Tensor, {"cuda", DataType::Float16}, Aidge::TensorImpl_cuda<half_float::half>::create);
{"cuda", DataType::Float16}, Aidge::TensorImpl_cuda<half_float::half>::create); REGISTRAR(Tensor, {"cuda", DataType::Int64}, Aidge::TensorImpl_cuda<int64_t>::create);
static Registrar<Tensor> registrarTensorImpl_cuda_Int32( REGISTRAR(Tensor, {"cuda", DataType::Int32}, Aidge::TensorImpl_cuda<int32_t>::create);
{"cuda", DataType::Int32}, Aidge::TensorImpl_cuda<int>::create); REGISTRAR(Tensor, {"cuda", DataType::Int16}, Aidge::TensorImpl_cuda<int16_t>::create);
} // namespace REGISTRAR(Tensor, {"cuda", DataType::Int8}, Aidge::TensorImpl_cuda<int8_t>::create);
REGISTRAR(Tensor, {"cuda", DataType::UInt64}, Aidge::TensorImpl_cuda<uint64_t>::create);
REGISTRAR(Tensor, {"cuda", DataType::UInt32}, Aidge::TensorImpl_cuda<uint32_t>::create);
REGISTRAR(Tensor, {"cuda", DataType::UInt16}, Aidge::TensorImpl_cuda<uint16_t>::create);
REGISTRAR(Tensor, {"cuda", DataType::UInt8}, Aidge::TensorImpl_cuda<uint8_t>::create);
} // namespace Aidge } // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_DATA_TENSORIMPL_H_ */ #endif /* AIDGE_BACKEND_CUDA_DATA_TENSORIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_BACKEND_CUDA_OPERATOR_ABSIMPL_H_
#define AIDGE_BACKEND_CUDA_OPERATOR_ABSIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include <cudnn.h>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Abs.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
// Operator implementation entry point for the backend
class AbsImpl_cuda : public OperatorImpl {
public:
AbsImpl_cuda(const Abs_Op& op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<AbsImpl_cuda> create(const Abs_Op& op) {
return std::make_unique<AbsImpl_cuda>(op);
}
virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
return {
{DataType::Float64},
{DataType::Float32},
{DataType::Float16},
};
}
void forward() override;
void backward() override;
private:
std::shared_ptr<Tensor> mInputFallback;
std::shared_ptr<Tensor> mOutputGradFallback;
template <class T> void forward_(const Tensor& input);
template <class T> void backward_(const Tensor& input, const Tensor& outputGrad);
};
// Implementation entry point registration to Operator
REGISTRAR(Abs_Op, "cuda", Aidge::AbsImpl_cuda::create);
} // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_ABSIMPL_H_ */
/******************************************************************************** /********************************************************************************
* Copyright (c) 2023 CEA-List * Copyright (c) 2024 CEA-List
* *
* This program and the accompanying materials are made available under the * This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at * terms of the Eclipse Public License 2.0 which is available at
...@@ -9,26 +9,28 @@ ...@@ -9,26 +9,28 @@
* *
********************************************************************************/ ********************************************************************************/
#include <cassert> #ifndef AIDGE_CUDA_OPERATOR_ABSIMPL_KERNELS_H_
#include <numeric> // std::accumulate #define AIDGE_CUDA_OPERATOR_ABSIMPL_KERNELS_H_
#include <vector>
#include "aidge/data/Tensor.hpp" #include <stdexcept>
#include "aidge/operator/Producer.hpp" #include <cfloat>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_fp16.h>
#include "aidge/data/Data.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/utils/Types.h" #include "aidge/utils/Types.h"
#include "aidge/backend/cuda/operator/ProducerImpl.hpp" namespace Aidge {
Aidge::DimSize_t Aidge::ProducerImpl_cuda::getNbProducedData( template <class T>
Aidge::IOIndex_t outputIdx) const void absForward(const T* input, T* output, int size);
{
// Requires the whole tensors, regardless of available data on inputs
assert(outputIdx == 0 && "operator has only one output");
(void) outputIdx;
return std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->size();
} }
#endif /* AIDGE_CUDA_OPERATOR_ABSIMPL_KERNELS_H_ */
void Aidge::ProducerImpl_cuda::forward()
{
}