Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • eclipse/aidge/aidge_backend_cuda
  • hrouis/aidge_backend_cuda
  • oantoni/aidge_backend_cuda
  • jeromeh/aidge_backend_cuda
  • lrakotoarivony/aidge_backend_cuda
  • silvanosky/aidge_backend_cuda
  • maab05/aidge_backend_cuda
  • noamzerah/aidge_backend_cuda
  • farnez/aidge_backend_cuda
  • axelfarr/aidge_backend_cuda
  • farges/trying_all-major
11 results
Show changes
Commits on Source (193)
Showing
with 461 additions and 81 deletions
......@@ -4,6 +4,7 @@
# C++ Build
build*/
install*/
include/aidge/backend/cuda_version.h
# VSCode
.vscode
......
......@@ -12,23 +12,46 @@ stages:
- deploy
include:
- project: 'eclipse/aidge/gitlab_shared_files'
- project: 'eclipse/aidge/gitlab_shared_files'
ref: 'main'
file:
file:
# choose which jobs to run by including the corresponding files.
- '.gitlab/ci/ubuntu_cpp.gitlab-ci.yml'
- '.gitlab/ci/ubuntu_python.gitlab-ci.yml'
- '.gitlab/ci/release/cibuildwheel_ubuntu.gitlab-ci.yml'
- '.gitlab/ci/release/cibuildwheel_ubuntu.gitlab-ci.yml'
# - '.gitlab/ci/windows_cpp.gitlab-ci.yml'
# - '.gitlab/ci/windows_python.gitlab-ci.yml'
# - '.gitlab/ci/release/cibuildwheel_windows.gitlab-ci.yml'
# - '.gitlab/ci/windows_python.gitlab-ci.yml'
# - '.gitlab/ci/release/cibuildwheel_windows.gitlab-ci.yml'
build:ubuntu_python:
# Use cudnn image instead of the cuda one
image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
test:ubuntu_python:
# Use cudnn image instead of the cuda one
image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
coverage:ubuntu_python:
# Use cudnn image instead of the cuda one
image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
.build:ubuntu_cpp:template:
# Use cudnn image instead of the cuda one
image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
test:ubuntu_cpp:
# Use cudnn image instead of the cuda one
image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
coverage:ubuntu_cpp:
# Use cudnn image instead of the cuda one
image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
release:pip:ubuntu:
tags:
tags:
- release:cuda
variables:
DOCKER_HOST: unix:///var/run/docker.sock
......@@ -36,19 +59,22 @@ release:pip:ubuntu:
BUILD_WITH_CUDA=1
AIDGE_DEPENDENCIES='aidge_core aidge_backend_cpu'
AIDGE_INSTALL='/AIDGE_INSTALL_CIBUILDWHEEL'
CUDA_TOOLKIT_VERSION='11-8'
DOCKER_HOST='unix:///var/run/docker.sock'
ARCH='x86_64'
CUDNN_VERSION='9'
CUDA_MAJOR_VERSION='11'
CUDA_MAJOR_VERSION='12'
CUDA_MINOR_VERSION='8'
SEARCH_PATH='/home/ubuntu/builds/$CI_RUNNER_SHORT_TOKEN/$CI_CONCURRENT_ID'
SEARCH_PATH='/home/gitlab-runner/builds/$CI_RUNNER_SHORT_TOKEN/$CI_CONCURRENT_ID'
CIBW_REPAIR_WHEEL_COMMAND='auditwheel --verbose repair {wheel} -w {dest_dir} --exclude libcudart.so.12 --exclude libcudnn.so.9 --exclude libcublas.so.12 --exclude libcublasLt.so.12'
parallel:
matrix:
- CIBW_BUILD: "cp38-manylinux_x86_64"
- CIBW_BUILD: "cp39-manylinux_x86_64"
# - CIBW_BUILD: "cp38-manylinux_x86_64"
# - CIBW_BUILD: "cp39-manylinux_x86_64"
- CIBW_BUILD: "cp310-manylinux_x86_64"
- CIBW_BUILD: "cp311-manylinux_x86_64"
- CIBW_BUILD: "cp312-manylinux_x86_64"
- CIBW_BUILD: "cp313-manylinux_x86_64"
before_script:
# retrieve aidge dependencies
......@@ -56,9 +82,11 @@ release:pip:ubuntu:
- !reference [.ubuntu:download:repositories, before_script] # located in common.gitlab-ci.yml
script:
- /home/ubuntu/.local/bin/cibuildwheel --output-dir wheelhouse
# - /home/ubuntu/.local/bin/cibuildwheel --output-dir wheelhouse
- cibuildwheel --output-dir wheelhouse
after_script:
# Ensure all files are owned by the correct user at the end of the job
- sudo chown -R $(whoami):$(whoami) .
# after_script:
# # Ensure all files are owned by the correct user at the end of the job
# # Note: sudo requires the job to have privileged = true
# - sudo chown -R $(whoami):$(whoami) .
#!/bin/bash
set -e
if [[ "$1" == "" ]]; then
if [[ "$1" == "" ]]; then
echo "build aidge deps in cibuildwheel container before building wheel."
echo "search path defines where the dependencies will be searched."
echo "Hint : In wheel containers, files are mounted on /host by default."
......@@ -10,13 +10,14 @@ set -x
if [[ $AIDGE_DEPENDENCIES == "" ]]; then # case for aidge_ core
mkdir -p build # creating build if its not already there to hold the build of cpp files
rm -rf build/* # build from scratch
else
else
for repo in $AIDGE_DEPENDENCIES ; do # case for other projects
search_path=$1
REPO_PATH=$(find $search_path ! -writable -prune -o -type d \
-name "$repo" \
-not -path "*/install/*" \
-not -path "*/.git/*" \
-not -path "*/.mypy_cache/*" \
-not -path "*/miniconda/*" \
-not -path "*/conda/*" \
-not -path "*/.local/*" \
......@@ -24,7 +25,7 @@ else
-not -path "*/$repo/$repo/*" \
-not -path "*/proc/*" \
-print -quit)
if [[ -z "$REPO_PATH" ]]; then
if [[ -z "$REPO_PATH" ]]; then
echo "ERROR : dependency $repo not found in search_path \"$search_path\". ABORTING."
exit -1
fi
......@@ -33,6 +34,10 @@ else
mkdir -p build # creating build if its not already there to hold the build of cpp files
rm -rf build/* # build from scratch
pip install . -v
# Give all rights on generated build folder to avoid root issues once out of the Docker
chmod -R a+rwX build/
chmod -R a+rwX *.egg-info/
cd -
done
fi
......
# Version 0.5.0 (January 31, 2024)
# Version 0.4.0 (December 6, 2024)
# Version 0.1.0 (January 23, 2024)
Initial release
# CMake >= 3.18 is required for good support of FindCUDAToolkit
cmake_minimum_required(VERSION 3.18)
set(CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version)
# Parse version.txt to retrieve Major, Minor and Path
string(REGEX MATCH "([0-9]+\\.[0-9]+\\.[0-9]+)" _ MATCHES ${version})
set(PROJECT_VERSION_MAJOR ${CMAKE_MATCH_1})
set(PROJECT_VERSION_MINOR ${CMAKE_MATCH_2})
set(PROJECT_VERSION_PATCH ${CMAKE_MATCH_3})
project(aidge_backend_cuda
VERSION ${version}
DESCRIPTION "CUDA implementations of the operators of aidge framework."
......@@ -21,18 +30,17 @@ execute_process(
)
message(STATUS "Latest git commit: ${GIT_COMMIT_HASH}")
# Define a preprocessor macro with the Git commit version
add_definitions(-DGIT_COMMIT_HASH="${GIT_COMMIT_HASH}")
# Note : project name is ${CMAKE_PROJECT_NAME} and python module name is also ${CMAKE_PROJECT_NAME}
set(module_name _${CMAKE_PROJECT_NAME}) # target name
##############################################
# Define options
option(PYBIND "python binding" OFF)
option(WERROR "Warning as error" OFF)
option(TEST "Enable tests" ON)
option(COVERAGE "Enable coverage" OFF)
option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memory use (over/underflow, memory leak, ...)" OFF)
option(PYBIND "python binding (Default: OFF)" OFF)
option(WERROR "Warning as error (Default: OFF)" OFF)
option(TEST "Enable tests (Default: ON)" ON)
option(COVERAGE "Enable coverage (Default: OFF)" OFF)
option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memory use (over/underflow, memory leak, ...) (Default: OFF)" OFF)
##############################################
# Import utils CMakeLists
......@@ -60,14 +68,14 @@ endif()
##########
# CUDA
if(NOT $ENV{AIDGE_INSTALL} STREQUAL "")
if(NOT $ENV{CIBUILDWHEEL} STREQUAL "")
message(WARNING "Env var CIBUILDWHEEL detected : currently building for a release job."
"\nSetting manually CUDACXX, PATH & LD_LIBRARY_PATH Variables")
list(APPEND ENV{LD_LIBRARY_PATH} /usr/local/cuda/lib64)
list(APPEND ENV{PATH} /usr/local/cuda/bin)
set(ENV{CUDACXX} /usr/local/cuda/bin/nvcc)
endif()
find_package(CUDAToolkit REQUIRED)
find_package(CUDAToolkit 12 REQUIRED)
if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
......@@ -97,7 +105,7 @@ if (PYBIND)
# Handles Python + pybind11 headers dependencies
include(PybindModuleCreation)
# creates a target of the same name as CMAKE_PROJECT_NAME
generate_python_binding(${CMAKE_PROJECT_NAME} ${module_name}) # the python bindings module has the same name as the project.
generate_python_binding(${CMAKE_PROJECT_NAME} ${module_name}) # the python bindings module has the same name as the project.
target_link_libraries(${module_name}
PUBLIC
......@@ -107,12 +115,21 @@ if (PYBIND)
)
endif()
message(STATUS "Creating ${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/backend/cuda_version.h")
# Generate version.h file from config file version.h.in
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/backend/version.h.in"
"${CMAKE_CURRENT_SOURCE_DIR}/include/aidge/backend/cuda_version.h"
)
target_link_libraries(${module_name}
PUBLIC
_aidge_core # _ is added because we link the target not the project
CUDA::cudart
PRIVATE
CUDA::cublas
cudnn
CUDA::cudart
)
if( ${ENABLE_ASAN} )
......
......@@ -8,6 +8,8 @@ You can find in this folder the library that implements the CUDA operators.
## Installation
### Dependencies
- [CUDA ToolKit 12.4](https://developer.nvidia.com/cuda-12-4-0-download-archive)
- [CUDnn9](https://developer.nvidia.com/cudnn-downloads) make sure to install the CUDA 12 compatible version
- `GCC`
- `Make`/`Ninja`
- `CMake`
......
from aidge_backend_cuda.aidge_backend_cuda import * # import so generated by PyBind
from ._version import *
from . import benchmark
import time
import numpy as np
import aidge_core
import aidge_backend_cuda
def measure_inference_time(model: aidge_core.GraphView, input_data: list[str, np.ndarray], nb_warmup: int = 10, nb_iterations: int = 50) -> list[float]:
# update model and inputs backend
model.set_backend("cuda")
ordered_inputs = [aidge_core.Tensor(i[1]) for i in input_data]
for ordered_input in ordered_inputs:
ordered_input.set_backend("cuda")
scheduler = aidge_core.SequentialScheduler(model)
scheduler.generate_scheduling()
timings = []
# Warm-up runs.
for i in range(nb_warmup + nb_iterations):
if i < nb_warmup:
scheduler.forward(forward_dims=False, data=ordered_inputs)
else:
start = time.process_time()
scheduler.forward(forward_dims=False, data=ordered_inputs)
end = time.process_time()
timings.append((end - start))
return timings
def compute_output(model: aidge_core.GraphView, input_data: list[str, np.ndarray]) -> list[np.ndarray]:
# update model and inputs backend
model.set_backend("cuda", device = 1)
ordered_inputs = [aidge_core.Tensor(i[1]) for i in input_data]
for ordered_input in ordered_inputs:
ordered_input.set_backend("cuda", device = 1)
scheduler = aidge_core.SequentialScheduler(model)
scheduler.generate_scheduling()
scheduler.forward(forward_dims=False, data=ordered_inputs)
outs = []
for pair in model.get_ordered_outputs():
t = pair[0].get_operator().get_output(pair[1])
t.set_backend("cpu")
outs.append(t)
return [np.array(out) for out in outs]
\ No newline at end of file
function(generate_python_binding pybind_module_name target_to_bind)
function(generate_python_binding pybind_module_name target_to_bind)
add_definitions(-DPYBIND)
Include(FetchContent)
set(PYBIND_VERSION v2.10.4)
set(PYBIND_VERSION v2.13.6)
set(PYBIND11_FINDPYTHON ON)
message(STATUS "Retrieving pybind ${PYBIND_VERSION} from git")
......@@ -20,6 +20,18 @@ function(generate_python_binding pybind_module_name target_to_bind)
file(GLOB_RECURSE pybind_src_files "python_binding/*.cpp")
pybind11_add_module(${pybind_module_name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install
target_include_directories(${pybind_module_name} PUBLIC "python_binding")
target_link_libraries(${pybind_module_name} PUBLIC ${target_to_bind})
target_include_directories(${pybind_module_name} PRIVATE "python_binding")
target_link_libraries(${pybind_module_name}
PRIVATE
${target_to_bind}
CUDA::cublas
cudnn
CUDA::cudart
)
set_property(TARGET ${pybind_module_name} PROPERTY POSITION_INDEPENDENT_CODE ON)
set_target_properties(${pybind_module_name} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(${pybind_module_name} PROPERTIES
CMAKE_SHARED_LINKER_FLAGS "-Wl,--exclude-libs,ALL"
)
set_target_properties(${module} PROPERTIES INSTALL_RPATH "")
endfunction()
......@@ -11,35 +11,43 @@
#ifndef AIDGE_BACKEND_CUDA_IMPORTS_H_
#define AIDGE_BACKEND_CUDA_IMPORTS_H_
#include "aidge/backend/cuda_version.h"
#include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/OperatorImpl.hpp"
#include "aidge/backend/cuda/operator/AbsImpl.hpp"
#include "aidge/backend/cuda/operator/AddImpl.hpp"
#include "aidge/backend/cuda/operator/AndImpl.hpp"
#include "aidge/backend/cuda/operator/ArgMaxImpl.hpp"
#include "aidge/backend/cuda/operator/AvgPoolingImpl.hpp"
#include "aidge/backend/cuda/operator/BatchNormImpl.hpp"
#include "aidge/backend/cuda/operator/BitShiftImpl.hpp"
#include "aidge/backend/cuda/operator/ConvImpl.hpp"
#include "aidge/backend/cuda/operator/ClipImpl.hpp"
#include "aidge/backend/cuda/operator/DivImpl.hpp"
#include "aidge/backend/cuda/operator/EqualImpl.hpp"
#include "aidge/backend/cuda/operator/ErfImpl.hpp"
#include "aidge/backend/cuda/operator/FCImpl.hpp"
#include "aidge/backend/cuda/operator/GlobalAveragePoolingImpl.hpp"
#include "aidge/backend/cuda/operator/ILayerNormImpl.hpp"
#include "aidge/backend/cuda/operator/LRNImpl.hpp"
#include "aidge/backend/cuda/operator/LnImpl.hpp"
#include "aidge/backend/cuda/operator/MaxPoolingImpl.hpp"
#include "aidge/backend/cuda/operator/MatMulImpl.hpp"
#include "aidge/backend/cuda/operator/MulImpl.hpp"
#include "aidge/backend/cuda/operator/PadImpl.hpp"
#include "aidge/backend/cuda/operator/PowImpl.hpp"
#include "aidge/backend/cuda/operator/ReduceMeanImpl.hpp"
#include "aidge/backend/cuda/operator/ReduceSumImpl.hpp"
#include "aidge/backend/cuda/operator/ReLUImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftMaxImpl.hpp"
#include "aidge/backend/cuda/operator/RoundImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftGELUImpl.hpp"
#include "aidge/backend/cuda/operator/ReshapeImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftMaxImpl.hpp"
#include "aidge/backend/cuda/operator/SigmoidImpl.hpp"
#include "aidge/backend/cuda/operator/SoftmaxImpl.hpp"
#include "aidge/backend/cuda/operator/SqrtImpl.hpp"
#include "aidge/backend/cuda/operator/SubImpl.hpp"
#include "aidge/backend/cuda/operator/TanhImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftMaxImpl.hpp"
#include "aidge/backend/cuda/operator/ShiftGELUImpl.hpp"
#include "aidge/backend/cuda/operator/ILayerNormImpl.hpp"
#endif /* AIDGE_BACKEND_CUDA_IMPORTS_H_ */
......@@ -4,6 +4,10 @@
#include <cstddef> // std::size_t
#include <memory>
#include <string>
#include <vector>
#include <cuda.h>
#include <type_traits> // std::enable_if, std::is_same
#include "aidge/backend/TensorImpl.hpp"
#include "aidge/data/Tensor.hpp"
......@@ -17,14 +21,9 @@
namespace Aidge {
template <typename SRC_T, typename DST_T>
void thrust_copy(const SRC_T* /*srcData*/, DST_T* /*dstData*/, size_t /*size*/);
template <typename SRC_T, typename std::enable_if<!std::is_same<half_float::half, SRC_T>::value>::type* = nullptr>
void thrust_copy(const SRC_T* srcData, half_float::half* dstData, size_t size);
template <typename DST_T, typename std::enable_if<!std::is_same<half_float::half, DST_T>::value>::type* = nullptr>
void thrust_copy(const half_float::half* srcData, DST_T* dstData, size_t size);
template <>
void thrust_copy(const half_float::half* srcData, half_float::half* dstData, size_t size);
void thrust_copy(const SRC_T* srcData, DST_T* dstData, size_t size);
/**
* @brief Abstract class for the TensorImpl_cuda class template.
......@@ -94,7 +93,7 @@ public:
}
void copy(const void *src, NbElts_t length, NbElts_t offset = 0) override {
AIDGE_ASSERT(length <= mData.size() || length <= mNbElts, "TensorImpl_cuda<{}>::copy(): copy length ({}) is above capacity ({})", typeid(T).name(), length, mNbElts);
AIDGE_ASSERT(offset + length <= mNbElts, "TensorImpl_cuda<{}>::copy(): copy offset ({}) + length ({}) is above capacity ({})", typeid(T).name(), offset, length, mNbElts);
const T* srcT = static_cast<const T *>(src);
T* dstT = static_cast<T *>(rawPtr(offset));
......@@ -107,62 +106,62 @@ public:
return;
}
AIDGE_ASSERT(length <= mData.size() || length <= mNbElts, "TensorImpl_cuda<{}>::copyCast(): copy length ({}) is above capacity ({})", typeid(T).name(), length, mNbElts);
AIDGE_ASSERT(offset + length <= mNbElts, "TensorImpl_cuda<{}>::copyCast(): copy offset ({}) + length ({}) is above capacity ({})", typeid(T).name(), offset, length, mNbElts);
switch (srcDt) {
case DataType::Float64:
thrust_copy(static_cast<const double*>(src),
static_cast<T*>(rawPtr(offset)),
length);
static_cast<size_t>(length));
break;
case DataType::Float32:
thrust_copy(static_cast<const float*>(src),
static_cast<T*>(rawPtr(offset)),
length);
static_cast<size_t>(length));
break;
case DataType::Float16:
thrust_copy(static_cast<const half_float::half*>(src),
static_cast<T*>(rawPtr(offset)),
length);
static_cast<size_t>(length));
break;
case DataType::Int64:
thrust_copy(static_cast<const int64_t*>(src),
static_cast<T*>(rawPtr(offset)),
length);
static_cast<size_t>(length));
break;
case DataType::UInt64:
thrust_copy(static_cast<const uint64_t*>(src),
static_cast<T*>(rawPtr(offset)),
length);
static_cast<size_t>(length));
break;
case DataType::Int32:
thrust_copy(static_cast<const int32_t*>(src),
static_cast<T*>(rawPtr(offset)),
length);
static_cast<size_t>(length));
break;
case DataType::UInt32:
thrust_copy(static_cast<const uint32_t*>(src),
static_cast<T*>(rawPtr(offset)),
length);
static_cast<size_t>(length));
break;
case DataType::Int16:
thrust_copy(static_cast<const int16_t*>(src),
static_cast<T*>(rawPtr(offset)),
length);
static_cast<size_t>(length));
break;
case DataType::UInt16:
thrust_copy(static_cast<const uint16_t*>(src),
static_cast<T*>(rawPtr(offset)),
length);
static_cast<size_t>(length));
break;
case DataType::Int8:
thrust_copy(static_cast<const int8_t*>(src),
static_cast<T*>(rawPtr(offset)),
length);
static_cast<size_t>(length));
break;
case DataType::UInt8:
thrust_copy(static_cast<const uint8_t*>(src),
static_cast<T*>(rawPtr(offset)),
length);
static_cast<size_t>(length));
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "TensorImpl_cuda<{}>::copyCast(): unsupported data type {}.", typeid(T).name(), srcDt);
......@@ -171,21 +170,22 @@ public:
}
void copyFromDevice(const void *src, const std::pair<std::string, DeviceIdx_t>& device, NbElts_t length, NbElts_t offset = 0) override {
AIDGE_ASSERT(length <= mData.size() || length <= mNbElts, "TensorImpl_cuda<{}>::copyFromDevice(): copy length ({}) is above capacity ({})", typeid(T).name(), length, mNbElts);
AIDGE_ASSERT(offset + length <= mNbElts, "TensorImpl_cuda<{}>::copyFromDevice(): copy offset ({}) + length ({}) is above capacity ({})", typeid(T).name(), offset, length, mNbElts);
CHECK_CUDA_STATUS(cudaMemcpy(rawPtr(offset), src, length * sizeof(T), cudaMemcpyDeviceToDevice));
}
void copyFromHost(const void *src, NbElts_t length, NbElts_t offset = 0) override {
AIDGE_ASSERT(length <= mData.size() || length <= mNbElts, "TensorImpl_cuda<{}>::copyFromHost(): copy length ({}) is above capacity ({})", typeid(T).name(), length, mNbElts);
AIDGE_ASSERT(offset + length <= mNbElts, "TensorImpl_cuda<{}>::copyFromHost(): copy offset ({}) + length ({}) is above capacity ({})", typeid(T).name(), offset, length, mNbElts);
CHECK_CUDA_STATUS(cudaMemcpy(rawPtr(offset), src, length * sizeof(T), cudaMemcpyHostToDevice));
}
void copyToHost(void *dst, NbElts_t length, NbElts_t offset = 0) const override {
AIDGE_ASSERT(length <= mData.size() || length <= mNbElts, "TensorImpl_cuda<{}>::copyToHost(): copy length ({}) is above capacity ({})", typeid(T).name(), length, mNbElts);
AIDGE_ASSERT(offset + length <= mNbElts, "TensorImpl_cuda<{}>::copyToHost(): copy offset ({}) + length ({}) is above capacity ({})", typeid(T).name(), offset, length, mNbElts);
CHECK_CUDA_STATUS(cudaMemcpy(dst, rawPtr(offset), length * sizeof(T), cudaMemcpyDeviceToHost));
}
void *rawPtr(NbElts_t offset = 0) override {
cudaSetDevice(mDevice);
lazyInit();
return (mData.data() + offset);
};
......@@ -279,16 +279,17 @@ private:
template <typename T>
const std::string TensorImpl_cuda<T>::Backend = "cuda";
namespace {
static Registrar<Tensor> registrarTensorImpl_cuda_Float64(
{"cuda", DataType::Float64}, Aidge::TensorImpl_cuda<double>::create);
static Registrar<Tensor> registrarTensorImpl_cuda_Float32(
{"cuda", DataType::Float32}, Aidge::TensorImpl_cuda<float>::create);
static Registrar<Tensor> registrarTensorImpl_cuda_Float16(
{"cuda", DataType::Float16}, Aidge::TensorImpl_cuda<half_float::half>::create);
static Registrar<Tensor> registrarTensorImpl_cuda_Int32(
{"cuda", DataType::Int32}, Aidge::TensorImpl_cuda<int32_t>::create);
} // namespace
REGISTRAR(Tensor, {"cuda", DataType::Float64}, Aidge::TensorImpl_cuda<double>::create);
REGISTRAR(Tensor, {"cuda", DataType::Float32}, Aidge::TensorImpl_cuda<float>::create);
REGISTRAR(Tensor, {"cuda", DataType::Float16}, Aidge::TensorImpl_cuda<half_float::half>::create);
REGISTRAR(Tensor, {"cuda", DataType::Int64}, Aidge::TensorImpl_cuda<int64_t>::create);
REGISTRAR(Tensor, {"cuda", DataType::Int32}, Aidge::TensorImpl_cuda<int32_t>::create);
REGISTRAR(Tensor, {"cuda", DataType::Int16}, Aidge::TensorImpl_cuda<int16_t>::create);
REGISTRAR(Tensor, {"cuda", DataType::Int8}, Aidge::TensorImpl_cuda<int8_t>::create);
REGISTRAR(Tensor, {"cuda", DataType::UInt64}, Aidge::TensorImpl_cuda<uint64_t>::create);
REGISTRAR(Tensor, {"cuda", DataType::UInt32}, Aidge::TensorImpl_cuda<uint32_t>::create);
REGISTRAR(Tensor, {"cuda", DataType::UInt16}, Aidge::TensorImpl_cuda<uint16_t>::create);
REGISTRAR(Tensor, {"cuda", DataType::UInt8}, Aidge::TensorImpl_cuda<uint8_t>::create);
} // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_DATA_TENSORIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_BACKEND_CUDA_OPERATOR_ABSIMPL_H_
#define AIDGE_BACKEND_CUDA_OPERATOR_ABSIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include <cudnn.h>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Abs.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
// Operator implementation entry point for the backend
class AbsImpl_cuda : public OperatorImpl {
public:
AbsImpl_cuda(const Abs_Op& op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<AbsImpl_cuda> create(const Abs_Op& op) {
return std::make_unique<AbsImpl_cuda>(op);
}
virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
return {
{DataType::Float64},
{DataType::Float32},
{DataType::Float16},
};
}
void forward() override;
private:
std::shared_ptr<Tensor> mInputFallback;
std::shared_ptr<Tensor> mOutputGradFallback;
template <class T> void forward_(const Tensor& input);
};
// Implementation entry point registration to Operator
REGISTRAR(Abs_Op, "cuda", Aidge::AbsImpl_cuda::create);
} // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_ABSIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CUDA_OPERATOR_ABSIMPL_KERNELS_H_
#define AIDGE_CUDA_OPERATOR_ABSIMPL_KERNELS_H_
#include <stdexcept>
#include <cfloat>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_fp16.h>
#include "aidge/data/Data.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
template <class T>
void absForward(const T* input, T* output, int size);
}
#endif /* AIDGE_CUDA_OPERATOR_ABSIMPL_KERNELS_H_ */
......@@ -36,7 +36,7 @@ public:
return std::make_unique<AddImpl_cuda>(op);
}
virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
return {
{DataType::Float64},
{DataType::Float32},
......@@ -46,10 +46,18 @@ public:
void forward() override;
void backward() override;
~AddImpl_cuda();
private:
template <class T> void forward_(const std::vector<Tensor>& inputs, const std::vector<std::vector<int>>& inputsDims, const std::vector<std::vector<int>>& inputsStrides);
template <class T> void backward_(const Tensor& outGrad, const std::vector<std::vector<int>>& inputsDims, const std::vector<std::vector<int>>& inputsStrides);
std::vector<cudnnTensorDescriptor_t> mTensorDesc;
cudnnReduceTensorDescriptor_t mBwdReduceDesc = nullptr;
size_t mBwdWorkspaceSize = 0;
void* mBwdWorkspace = nullptr;
std::vector<std::shared_ptr<Tensor>> mInputFallbacks;
std::shared_ptr<Tensor> mOutputGradFallback;
template <class T> void forward_(const std::vector<std::reference_wrapper<Tensor>>& inputs);
template <class T> void backward_(const Tensor& outGrad);
};
// Implementation entry point registration to Operator
......
......@@ -36,7 +36,7 @@ public:
return std::make_unique<ArgMaxImpl_cuda>(op);
}
virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
return {
{DataType::Float64},
{DataType::Float32},
......
......@@ -37,7 +37,7 @@ public:
return std::make_unique<AvgPoolingImpl_cuda>(op);
}
virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
return {
{DataType::Float64},
{DataType::Float32},
......
......@@ -37,7 +37,7 @@ public:
return std::make_unique<BatchNormImpl_cuda>(op);
}
virtual std::set<ImplSpec> getAvailableImplSpecs() const override {
virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
return {
{DataType::Float64},
{DataType::Float32},
......
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_BACKEND_CUDA_OPERATOR_BITSHIFTIMPL_H_
#define AIDGE_BACKEND_CUDA_OPERATOR_BITSHIFTIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include <cudnn.h>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/BitShift.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
// Operator implementation entry point for the backend
class BitShiftImpl_cuda : public OperatorImpl {
public:
BitShiftImpl_cuda(const BitShift_Op& op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<BitShiftImpl_cuda> create(const BitShift_Op& op) {
return std::make_unique<BitShiftImpl_cuda>(op);
}
virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
return {
{DataType::Int32},
{DataType::Int64}
};
}
void forward() override;
private:
template <class T> void forward_(const std::vector<Tensor>& inputs, const std::vector<std::vector<int>>& inputsDims, const std::vector<std::vector<int>>& inputsStrides);
};
// Implementation entry point registration to Operator
REGISTRAR(BitShift_Op, "cuda", Aidge::BitShiftImpl_cuda::create);
} // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_BITSHIFTIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CUDA_OPERATOR_BITSHIFTIMPL_KERNELS_H_
#define AIDGE_CUDA_OPERATOR_BITSHIFTIMPL_KERNELS_H_
#include <stdexcept>
#include <cfloat>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_fp16.h>
#include "aidge/data/Data.hpp"
#include "aidge/operator/BitShift.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
template <class T>
void bitShiftForward(const BitShift_Op::BitShiftDirection direction,
const bool rounding,
const T* input1, T* output, const T* intput2,
const std::vector<int>& input1Dims,const std::vector<int>& input2Dims, const std::vector<int>& outputDims,
const std::vector<int>& input1Strides, const std::vector<int>& input2Strides,const std::vector<int>& outputStrides,
int outSize);
}
#endif /* AIDGE_CUDA_OPERATOR_BITSHIFTIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_BACKEND_CUDA_OPERATOR_CLIPIMPL_H_
#define AIDGE_BACKEND_CUDA_OPERATOR_CLIPIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include <cudnn.h>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Clip.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
// Operator implementation entry point for the backend
class ClipImpl_cuda : public OperatorImpl {
public:
ClipImpl_cuda(const Clip_Op& op) : OperatorImpl(op, "cuda") {}
static std::unique_ptr<ClipImpl_cuda> create(const Clip_Op& op) {
return std::make_unique<ClipImpl_cuda>(op);
}
virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
return {
{DataType::Float64},
{DataType::Float32},
{DataType::Float16},
};
}
void forward() override;
private:
template <class T> void forward_();
};
// Implementation entry point registration to Operator
REGISTRAR(Clip_Op, "cuda", Aidge::ClipImpl_cuda::create);
} // namespace Aidge
#endif /* AIDGE_BACKEND_CUDA_OPERATOR_CLIPIMPL_H_ */