From e3d5cc509d06518e233b227e56fb3fc20b36419f Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Wed, 13 Sep 2023 18:32:30 +0200 Subject: [PATCH] Fixed CUDA compilation --- CMakeLists.txt | 62 +++++++++++++------------------ cmake/PybindModuleCreation.cmake | 34 ++++++++--------- include/aidge/data/TensorImpl.hpp | 14 +++++-- include/aidge/utils/CudaUtils.hpp | 1 + unit_tests/CMakeLists.txt | 7 +--- unit_tests/Test_TensorImpl.cpp | 48 ++++++++++++++++++++++++ unit_tests/Test_cuda.cu | 12 ++++++ unit_tests/Test_cuda.hpp | 6 +++ 8 files changed, 120 insertions(+), 64 deletions(-) create mode 100644 unit_tests/Test_cuda.cu create mode 100644 unit_tests/Test_cuda.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index a52f230..cf4866d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,5 @@ -cmake_minimum_required(VERSION 3.15) +# CMake >= 3.18 is required for good support of FindCUDAToolkit +cmake_minimum_required(VERSION 3.18) file(READ "${CMAKE_SOURCE_DIR}/version.txt" version) file(READ "${CMAKE_SOURCE_DIR}/project_name.txt" project) @@ -12,8 +13,6 @@ set(module_name _${project}) # target name project(${project}) -enable_language(CUDA) - ############################################## # Define options option(PYBIND "python binding" ON) @@ -26,18 +25,18 @@ option(COVERAGE "Enable coverage" OFF) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") include(PybindModuleCreation) -#if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) -# Include(CodeCoverage) -#endif() +if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) + Include(CodeCoverage) +endif() + +enable_language(CUDA) ############################################## # Find system dependencies -find_library(CUDART_LIBRARY cudart ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) -message(STATUS "CUDA Toolkit include DIRS: ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}") +find_package(CUDAToolkit REQUIRED) find_package(aidge_core REQUIRED) find_package(aidge_backend_cpu REQUIRED) -# TODO: add here additional Aidge dependencies if you need to. ############################################## # Create target and set properties @@ -50,22 +49,26 @@ target_link_libraries(${module_name} PUBLIC _aidge_core # _ is added because we link the target not the project _aidge_backend_cpu # _ is added because we link the target not the project - # TODO: add here additional Aidge dependencies if you need to. - ${CUDART_LIBRARY} + CUDA::cudart ) #Set target properties -set_property(TARGET ${module_name} PROPERTY POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --std=c++14") target_include_directories(${module_name} PUBLIC $<INSTALL_INTERFACE:include> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src - ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ) +if(NOT DEFINED CMAKE_CUDA_STANDARD) + set(CMAKE_CUDA_STANDARD 14) + set(CMAKE_CUDA_STANDARD_REQUIRED ON) +endif() + +set_property(TARGET ${module_name} PROPERTY POSITION_INDEPENDENT_CODE ON) +set_target_properties(${module_name} PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + # PYTHON BINDING if (PYBIND) generate_python_binding(${project} ${module_name}) @@ -81,32 +84,19 @@ endif() target_compile_features(${module_name} PRIVATE cxx_std_14) - -if(WERROR) - target_compile_options(${module_name} PRIVATE +target_compile_options(${module_name} PRIVATE $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>: - -Wall -Wextra -fPIC -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow -Werror>) - target_compile_options(${module_name} PRIVATE + -Wall -Wextra -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow $<$<BOOL:${WERROR}>:-Werror>>) +target_compile_options(${module_name} PRIVATE + $<$<COMPILE_LANGUAGE:CUDA>: + -Wall>) +target_compile_options(${module_name} PRIVATE $<$<CXX_COMPILER_ID:MSVC>: /W4>) -else() - target_compile_options(${module_name} PRIVATE - $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>: - -Wall -Wextra -fPIC -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow -Wpedantic>) - target_compile_options(${module_name} PRIVATE - $<$<CXX_COMPILER_ID:MSVC>: - /W4>) -endif() - -#if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) -# append_coverage_compiler_flags() -#endif() -# We need to explicitly state that we need all CUDA files in the -# ${module_name} library to be built with -dc as the member functions -# could be called by other libraries and executables -set_target_properties(${module_name} - PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) + append_coverage_compiler_flags() +endif() ############################################## # Installation instructions diff --git a/cmake/PybindModuleCreation.cmake b/cmake/PybindModuleCreation.cmake index 18f4abc..8030c1a 100644 --- a/cmake/PybindModuleCreation.cmake +++ b/cmake/PybindModuleCreation.cmake @@ -1,23 +1,21 @@ -function(generate_python_binding name target_to_bind) - if (PYBIND) - add_definitions(-DPYBIND) - Include(FetchContent) +function(generate_python_binding name target_to_bind) + add_definitions(-DPYBIND) + Include(FetchContent) - FetchContent_Declare( - PyBind11 - GIT_REPOSITORY https://github.com/pybind/pybind11.git - GIT_TAG v2.10.4 # or a later release - ) + FetchContent_Declare( + PyBind11 + GIT_REPOSITORY https://github.com/pybind/pybind11.git + GIT_TAG v2.10.4 # or a later release + ) - # Use the New FindPython mode, recommanded. Requires CMake 3.15+ - find_package(Python COMPONENTS Interpreter Development) - FetchContent_MakeAvailable(PyBind11) + # Use the New FindPython mode, recommanded. Requires CMake 3.15+ + find_package(Python COMPONENTS Interpreter Development) + FetchContent_MakeAvailable(PyBind11) - message(STATUS "Creating binding for module ${name}") - file(GLOB_RECURSE pybind_src_files "python_binding/*.cpp") + message(STATUS "Creating binding for module ${name}") + file(GLOB_RECURSE pybind_src_files "python_binding/*.cpp") - pybind11_add_module(${name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install - target_include_directories(${name} PUBLIC "python_binding") - target_link_libraries(${name} PUBLIC ${target_to_bind}) - endif() + pybind11_add_module(${name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install + target_include_directories(${name} PUBLIC "python_binding") + target_link_libraries(${name} PUBLIC ${target_to_bind}) endfunction() diff --git a/include/aidge/data/TensorImpl.hpp b/include/aidge/data/TensorImpl.hpp index 47edb55..fa12344 100644 --- a/include/aidge/data/TensorImpl.hpp +++ b/include/aidge/data/TensorImpl.hpp @@ -1,6 +1,9 @@ #ifndef AIDGE_BACKEND_CUDA_DATA_TENSORIMPL_H_ #define AIDGE_BACKEND_CUDA_DATA_TENSORIMPL_H_ +#include <thrust/equal.h> +#include <thrust/execution_policy.h> + #include "aidge/backend/TensorImpl.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/utils/Registrar.hpp" @@ -22,8 +25,12 @@ class TensorImpl_cuda : public TensorImpl { TensorImpl_cuda(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor) {} bool operator==(const TensorImpl &otherImpl) const override final { - printf("Not implemented yet."); - return false; + const auto& otherImplCuda = static_cast<const TensorImpl_cuda<T>&>(otherImpl); + + if (mTensor.size() != otherImplCuda.mTensor.size()) + return false; + + return thrust::equal(mData, mData + mTensor.size(), otherImplCuda.mData); } static std::unique_ptr<TensorImpl_cuda> create(const Tensor &tensor) { @@ -49,9 +56,8 @@ class TensorImpl_cuda : public TensorImpl { cudaFree(mData); } - void setRawPtr(void *ptr) override final { + void setRawPtr(void* /*ptr*/) override final { printf("Not implemented yet."); - return false; }; private: diff --git a/include/aidge/utils/CudaUtils.hpp b/include/aidge/utils/CudaUtils.hpp index df657eb..767025c 100644 --- a/include/aidge/utils/CudaUtils.hpp +++ b/include/aidge/utils/CudaUtils.hpp @@ -2,6 +2,7 @@ #define CudaUtils_cuda_H_ #include <string> +#include <cassert> #include <cuda.h> diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt index c57b370..ab65c92 100644 --- a/unit_tests/CMakeLists.txt +++ b/unit_tests/CMakeLists.txt @@ -8,15 +8,10 @@ FetchContent_Declare( FetchContent_MakeAvailable(Catch2) -file(GLOB_RECURSE src_files "*.cpp") +file(GLOB_RECURSE src_files "*.cpp" "*.cu") add_executable(tests${module_name} ${src_files}) -target_include_directories(tests${module_name} - PRIVATE - ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} -) - target_link_libraries(tests${module_name} PUBLIC ${module_name}) target_link_libraries(tests${module_name} PRIVATE Catch2::Catch2WithMain) diff --git a/unit_tests/Test_TensorImpl.cpp b/unit_tests/Test_TensorImpl.cpp index 40e8aa8..56e3ee1 100644 --- a/unit_tests/Test_TensorImpl.cpp +++ b/unit_tests/Test_TensorImpl.cpp @@ -13,11 +13,59 @@ #include <catch2/catch_test_macros.hpp> +#include "Test_cuda.hpp" + #include "aidge/data/Tensor.hpp" #include "aidge/data/TensorImpl.hpp" using namespace Aidge; +TEST_CASE("CUDA test") { + const int N = 100; + + // Allocate host memory + float* a = new float[N](); + float* b = new float[N](); + float* out = new float[N](); + + // Initialize host arrays + for(int i = 0; i < N; i++){ + a[i] = 1.0f; + b[i] = 2.0f; + } + + // Allocate device memory + float *d_a, *d_b, *d_out; + cudaMalloc(reinterpret_cast<void**>(&d_a), sizeof(float) * N); + cudaMalloc(reinterpret_cast<void**>(&d_b), sizeof(float) * N); + cudaMalloc(reinterpret_cast<void**>(&d_out), sizeof(float) * N); + + // Transfer data from host to device memory + cudaMemcpy(d_a, a, sizeof(float) * N, cudaMemcpyHostToDevice); + cudaMemcpy(d_b, b, sizeof(float) * N, cudaMemcpyHostToDevice); + + // Executing kernel + vector_add(d_out, d_a, d_b, N); + + // Transfer data back to host memory + cudaMemcpy(out, d_out, sizeof(float) * N, cudaMemcpyDeviceToHost); + + // Verification + for(int i = 0; i < N; i++){ + REQUIRE(fabs(out[i] - a[i] - b[i]) < 1e-6); + } + + // Deallocate device memory + cudaFree(d_a); + cudaFree(d_b); + cudaFree(d_out); + + // Deallocate host memory + delete[] a; + delete[] b; + delete[] out; +} + TEST_CASE("Tensor creation", "[Connector]") { SECTION("from const array") { Tensor x; diff --git a/unit_tests/Test_cuda.cu b/unit_tests/Test_cuda.cu new file mode 100644 index 0000000..c46aee3 --- /dev/null +++ b/unit_tests/Test_cuda.cu @@ -0,0 +1,12 @@ +#include "Test_cuda.hpp" + +__global__ void vector_add_kernel(float *out, float *a, float *b, int n) { + for(int i = 0; i < n; i ++){ + out[i] = a[i] + b[i]; + } +} + +void vector_add(float *out, float *a, float *b, int n) { + vector_add_kernel<<<1, 2>>>(out, a, b, n); + CHECK_CUDA_STATUS(cudaPeekAtLastError()); +} diff --git a/unit_tests/Test_cuda.hpp b/unit_tests/Test_cuda.hpp new file mode 100644 index 0000000..1d319f0 --- /dev/null +++ b/unit_tests/Test_cuda.hpp @@ -0,0 +1,6 @@ +#include <cuda.h> +#include <cuda_runtime.h> + +#include "aidge/utils/CudaUtils.hpp" + +void vector_add(float *out, float *a, float *b, int n); -- GitLab