diff --git a/CMakeLists.txt b/CMakeLists.txt index a52f23013e84fc9a86f2d4f5c6bb77cf522743c7..cf4866d9ef0bd32e5f264b356e15133b7d2b56e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,5 @@ -cmake_minimum_required(VERSION 3.15) +# CMake >= 3.18 is required for good support of FindCUDAToolkit +cmake_minimum_required(VERSION 3.18) file(READ "${CMAKE_SOURCE_DIR}/version.txt" version) file(READ "${CMAKE_SOURCE_DIR}/project_name.txt" project) @@ -12,8 +13,6 @@ set(module_name _${project}) # target name project(${project}) -enable_language(CUDA) - ############################################## # Define options option(PYBIND "python binding" ON) @@ -26,18 +25,18 @@ option(COVERAGE "Enable coverage" OFF) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") include(PybindModuleCreation) -#if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) -# Include(CodeCoverage) -#endif() +if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) + Include(CodeCoverage) +endif() + +enable_language(CUDA) ############################################## # Find system dependencies -find_library(CUDART_LIBRARY cudart ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) -message(STATUS "CUDA Toolkit include DIRS: ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}") +find_package(CUDAToolkit REQUIRED) find_package(aidge_core REQUIRED) find_package(aidge_backend_cpu REQUIRED) -# TODO: add here additional Aidge dependencies if you need to. ############################################## # Create target and set properties @@ -50,22 +49,26 @@ target_link_libraries(${module_name} PUBLIC _aidge_core # _ is added because we link the target not the project _aidge_backend_cpu # _ is added because we link the target not the project - # TODO: add here additional Aidge dependencies if you need to. - ${CUDART_LIBRARY} + CUDA::cudart ) #Set target properties -set_property(TARGET ${module_name} PROPERTY POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --std=c++14") target_include_directories(${module_name} PUBLIC $<INSTALL_INTERFACE:include> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src - ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ) +if(NOT DEFINED CMAKE_CUDA_STANDARD) + set(CMAKE_CUDA_STANDARD 14) + set(CMAKE_CUDA_STANDARD_REQUIRED ON) +endif() + +set_property(TARGET ${module_name} PROPERTY POSITION_INDEPENDENT_CODE ON) +set_target_properties(${module_name} PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + # PYTHON BINDING if (PYBIND) generate_python_binding(${project} ${module_name}) @@ -81,32 +84,19 @@ endif() target_compile_features(${module_name} PRIVATE cxx_std_14) - -if(WERROR) - target_compile_options(${module_name} PRIVATE +target_compile_options(${module_name} PRIVATE $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>: - -Wall -Wextra -fPIC -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow -Werror>) - target_compile_options(${module_name} PRIVATE + -Wall -Wextra -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow $<$<BOOL:${WERROR}>:-Werror>>) +target_compile_options(${module_name} PRIVATE + $<$<COMPILE_LANGUAGE:CUDA>: + -Wall>) +target_compile_options(${module_name} PRIVATE $<$<CXX_COMPILER_ID:MSVC>: /W4>) -else() - target_compile_options(${module_name} PRIVATE - $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>: - -Wall -Wextra -fPIC -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow -Wpedantic>) - target_compile_options(${module_name} PRIVATE - $<$<CXX_COMPILER_ID:MSVC>: - /W4>) -endif() - -#if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) -# append_coverage_compiler_flags() -#endif() -# We need to explicitly state that we need all CUDA files in the -# ${module_name} library to be built with -dc as the member functions -# could be called by other libraries and executables -set_target_properties(${module_name} - PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE) + append_coverage_compiler_flags() +endif() ############################################## # Installation instructions diff --git a/cmake/PybindModuleCreation.cmake b/cmake/PybindModuleCreation.cmake index 18f4abc38e2537c3f4d949f08772a57b90758cb0..8030c1a8639e4b7ae0c5fb865e928a4260c6ae7d 100644 --- a/cmake/PybindModuleCreation.cmake +++ b/cmake/PybindModuleCreation.cmake @@ -1,23 +1,21 @@ -function(generate_python_binding name target_to_bind) - if (PYBIND) - add_definitions(-DPYBIND) - Include(FetchContent) +function(generate_python_binding name target_to_bind) + add_definitions(-DPYBIND) + Include(FetchContent) - FetchContent_Declare( - PyBind11 - GIT_REPOSITORY https://github.com/pybind/pybind11.git - GIT_TAG v2.10.4 # or a later release - ) + FetchContent_Declare( + PyBind11 + GIT_REPOSITORY https://github.com/pybind/pybind11.git + GIT_TAG v2.10.4 # or a later release + ) - # Use the New FindPython mode, recommanded. Requires CMake 3.15+ - find_package(Python COMPONENTS Interpreter Development) - FetchContent_MakeAvailable(PyBind11) + # Use the New FindPython mode, recommanded. Requires CMake 3.15+ + find_package(Python COMPONENTS Interpreter Development) + FetchContent_MakeAvailable(PyBind11) - message(STATUS "Creating binding for module ${name}") - file(GLOB_RECURSE pybind_src_files "python_binding/*.cpp") + message(STATUS "Creating binding for module ${name}") + file(GLOB_RECURSE pybind_src_files "python_binding/*.cpp") - pybind11_add_module(${name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install - target_include_directories(${name} PUBLIC "python_binding") - target_link_libraries(${name} PUBLIC ${target_to_bind}) - endif() + pybind11_add_module(${name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install + target_include_directories(${name} PUBLIC "python_binding") + target_link_libraries(${name} PUBLIC ${target_to_bind}) endfunction() diff --git a/include/aidge/data/TensorImpl.hpp b/include/aidge/data/TensorImpl.hpp index 47edb5542b8e2d32be5a18848f1d336ca2dfb5f1..fa123443174cffe6b5b81340f56767c8ea7149bf 100644 --- a/include/aidge/data/TensorImpl.hpp +++ b/include/aidge/data/TensorImpl.hpp @@ -1,6 +1,9 @@ #ifndef AIDGE_BACKEND_CUDA_DATA_TENSORIMPL_H_ #define AIDGE_BACKEND_CUDA_DATA_TENSORIMPL_H_ +#include <thrust/equal.h> +#include <thrust/execution_policy.h> + #include "aidge/backend/TensorImpl.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/utils/Registrar.hpp" @@ -22,8 +25,12 @@ class TensorImpl_cuda : public TensorImpl { TensorImpl_cuda(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor) {} bool operator==(const TensorImpl &otherImpl) const override final { - printf("Not implemented yet."); - return false; + const auto& otherImplCuda = static_cast<const TensorImpl_cuda<T>&>(otherImpl); + + if (mTensor.size() != otherImplCuda.mTensor.size()) + return false; + + return thrust::equal(mData, mData + mTensor.size(), otherImplCuda.mData); } static std::unique_ptr<TensorImpl_cuda> create(const Tensor &tensor) { @@ -49,9 +56,8 @@ class TensorImpl_cuda : public TensorImpl { cudaFree(mData); } - void setRawPtr(void *ptr) override final { + void setRawPtr(void* /*ptr*/) override final { printf("Not implemented yet."); - return false; }; private: diff --git a/include/aidge/utils/CudaUtils.hpp b/include/aidge/utils/CudaUtils.hpp index df657eb5bbe62dd82ad5096d9745b9779361298a..767025c2d3306565d7efd49483143db216304ad0 100644 --- a/include/aidge/utils/CudaUtils.hpp +++ b/include/aidge/utils/CudaUtils.hpp @@ -2,6 +2,7 @@ #define CudaUtils_cuda_H_ #include <string> +#include <cassert> #include <cuda.h> diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt index c57b370852e40c18b8590d9735211d08302562f0..ab65c924e4ac9abecc132e5d7cbc4dc91e172821 100644 --- a/unit_tests/CMakeLists.txt +++ b/unit_tests/CMakeLists.txt @@ -8,15 +8,10 @@ FetchContent_Declare( FetchContent_MakeAvailable(Catch2) -file(GLOB_RECURSE src_files "*.cpp") +file(GLOB_RECURSE src_files "*.cpp" "*.cu") add_executable(tests${module_name} ${src_files}) -target_include_directories(tests${module_name} - PRIVATE - ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} -) - target_link_libraries(tests${module_name} PUBLIC ${module_name}) target_link_libraries(tests${module_name} PRIVATE Catch2::Catch2WithMain) diff --git a/unit_tests/Test_TensorImpl.cpp b/unit_tests/Test_TensorImpl.cpp index 40e8aa8f81b28b42af415103eb0cbc77a602c7f9..56e3ee1d306c06e83ce860043253558de74d567b 100644 --- a/unit_tests/Test_TensorImpl.cpp +++ b/unit_tests/Test_TensorImpl.cpp @@ -13,11 +13,59 @@ #include <catch2/catch_test_macros.hpp> +#include "Test_cuda.hpp" + #include "aidge/data/Tensor.hpp" #include "aidge/data/TensorImpl.hpp" using namespace Aidge; +TEST_CASE("CUDA test") { + const int N = 100; + + // Allocate host memory + float* a = new float[N](); + float* b = new float[N](); + float* out = new float[N](); + + // Initialize host arrays + for(int i = 0; i < N; i++){ + a[i] = 1.0f; + b[i] = 2.0f; + } + + // Allocate device memory + float *d_a, *d_b, *d_out; + cudaMalloc(reinterpret_cast<void**>(&d_a), sizeof(float) * N); + cudaMalloc(reinterpret_cast<void**>(&d_b), sizeof(float) * N); + cudaMalloc(reinterpret_cast<void**>(&d_out), sizeof(float) * N); + + // Transfer data from host to device memory + cudaMemcpy(d_a, a, sizeof(float) * N, cudaMemcpyHostToDevice); + cudaMemcpy(d_b, b, sizeof(float) * N, cudaMemcpyHostToDevice); + + // Executing kernel + vector_add(d_out, d_a, d_b, N); + + // Transfer data back to host memory + cudaMemcpy(out, d_out, sizeof(float) * N, cudaMemcpyDeviceToHost); + + // Verification + for(int i = 0; i < N; i++){ + REQUIRE(fabs(out[i] - a[i] - b[i]) < 1e-6); + } + + // Deallocate device memory + cudaFree(d_a); + cudaFree(d_b); + cudaFree(d_out); + + // Deallocate host memory + delete[] a; + delete[] b; + delete[] out; +} + TEST_CASE("Tensor creation", "[Connector]") { SECTION("from const array") { Tensor x; diff --git a/unit_tests/Test_cuda.cu b/unit_tests/Test_cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..c46aee32414f4ef903821f87514762b934001c18 --- /dev/null +++ b/unit_tests/Test_cuda.cu @@ -0,0 +1,12 @@ +#include "Test_cuda.hpp" + +__global__ void vector_add_kernel(float *out, float *a, float *b, int n) { + for(int i = 0; i < n; i ++){ + out[i] = a[i] + b[i]; + } +} + +void vector_add(float *out, float *a, float *b, int n) { + vector_add_kernel<<<1, 2>>>(out, a, b, n); + CHECK_CUDA_STATUS(cudaPeekAtLastError()); +} diff --git a/unit_tests/Test_cuda.hpp b/unit_tests/Test_cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1d319f06562309038cd94cc65c5c7b2ffae9555f --- /dev/null +++ b/unit_tests/Test_cuda.hpp @@ -0,0 +1,6 @@ +#include <cuda.h> +#include <cuda_runtime.h> + +#include "aidge/utils/CudaUtils.hpp" + +void vector_add(float *out, float *a, float *b, int n);