diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7f4b675b68f9ea6b1111ee1ed833331661803746..4cc1b0b9d2cf30db7f532f10d8941a1a1719fac5 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -82,31 +82,6 @@ New Compiler Flags in that case. The option's behaviour mirrors GCC, the helpers are implemented both in compiler-rt and libgcc. -- -fpch-codegen and -fpch-debuginfo generate shared code and/or debuginfo - for contents of a precompiled header in a separate object file. This object - file needs to be linked in, but its contents do not need to be generated - for other objects using the precompiled header. This should usually save - compile time. If not using clang-cl, the separate object file needs to - be created explicitly from the precompiled header. - Example of use: - - .. code-block:: console - - $ clang++ -x c++-header header.h -o header.pch -fpch-codegen -fpch-debuginfo - $ clang++ -c header.pch -o shared.o - $ clang++ -c source.cpp -o source.o -include-pch header.pch - $ clang++ -o binary source.o shared.o - - - Using -fpch-instantiate-templates when generating the precompiled header - usually increases the amount of code/debuginfo that can be shared. - - In some cases, especially when building with optimizations enabled, using - -fpch-codegen may generate so much code in the shared object that compiling - it may be a net loss in build time. - - Since headers may bring in private symbols of other libraries, it may be - sometimes necessary to discard unused symbols (such as by adding - -Wl,--gc-sections on ELF platforms to the linking command, and possibly - adding -fdata-sections -ffunction-sections to the command generating - the shared object). - New option ``-fbinutils-version=`` specifies the targeted binutils version. For example, ``-fbinutils-version=2.35`` means compatibility with GNU as/ld before 2.35 is not needed: new features can be used and there is no need to @@ -139,6 +114,16 @@ Modified Compiler Flags This behavior matches newer GCC. (`D91760 <https://reviews.llvm.org/D91760>`_) (`D92054 <https://reviews.llvm.org/D92054>`_) +- Support has been added for the following processors (command-line identifiers + in parentheses): + + - Arm Cortex-A78C (cortex-a78c). + - Arm Cortex-R82 (cortex-r82). + - Arm Neoverse V1 (neoverse-v1). + - Arm Neoverse N2 (neoverse-n2). + - Fujitsu A64FX (a64fx). + For example, to select architecture support and tuning for Neoverse-V1 based + systems, use ``-mcpu=neoverse-v1``. Removed Compiler Flags ------------------------- @@ -183,6 +168,13 @@ Windows Support exception. To workaround (with reduced security), compile with /guard:cf,nolongjmp. +- Windows on Arm64: LLVM 12 adds official binary release hosted on + Windows on Arm64. The binary is built and tested by Linaro alongside + AArch64 and ARM 32-bit Linux binary releases. This first WoA release + includes Clang compiler, LLD Linker, and compiler-rt runtime libraries. + Work on LLDB, sanitizer support, OpenMP, and other features is in progress + and will be included in future Windows on Arm64 LLVM releases. + C Language Changes in Clang --------------------------- @@ -200,10 +192,38 @@ C++1z Feature Support Objective-C Language Changes in Clang ------------------------------------- -OpenCL C Language Changes in Clang ----------------------------------- - -... +OpenCL Kernel Language Changes in Clang +--------------------------------------- + +- Improved online documentation: :doc:`UsersManual` and :doc:`OpenCLSupport` + pages. +- Added ``-cl-std=CL3.0`` and predefined version macro for OpenCL 3.0. +- Added ``-cl-std=CL1.0`` and mapped to the existing OpenCL 1.0 functionality. +- Improved OpenCL extension handling per target. +- Added clang extension for function pointers ``__cl_clang_function_pointers`` + and variadic functions ``__cl_clang_variadic_functions``, more details can be + found in :doc:`LanguageExtensions`. +- Removed extensions without kernel language changes: + ``cl_khr_select_fprounding_mode``, ``cl_khr_gl_sharing``, ``cl_khr_icd``, + ``cl_khr_gl_event``, ``cl_khr_d3d10_sharing``, ``cl_khr_context_abort``, + ``cl_khr_d3d11_sharing``, ``cl_khr_dx9_media_sharing``, + ``cl_khr_image2d_from_buffer``, ``cl_khr_initialize_memory``, + ``cl_khr_gl_depth_images``, ``cl_khr_spir``, ``cl_khr_egl_event``, + ``cl_khr_egl_image``, ``cl_khr_terminate_context``. +- Improved diagnostics for unevaluated ``vec_step`` expression. +- Allow nested pointers (e.g. pointer-to-pointer) kernel arguments beyond OpenCL + 1.2. +- Added ``global_device`` and ``global_host`` address spaces for USM + allocations. + +Miscellaneous improvements in C++ for OpenCL support: + +- Added diagnostics for pointers to member functions and references to + functions. +- Added support of ``vec_step`` builtin. +- Fixed ICE on address spaces with forwarding references and templated copy + constructors. +- Removed warning for variadic macro use. ABI Changes in Clang -------------------- diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 57cc2d60e2af044de9c975068d228ad5e9fd3742..83dfa0780547d203856829557a189c1f10135c24 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9892,7 +9892,7 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall( llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), + CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), Args); } }; diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index f4b7a57e0bb709be7a2f1529fca96e7877966abc..13943b6c404a9b6fa4a0bd4ebd00905cc999149b 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -11,6 +11,7 @@ #include "Darwin.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/Version.h" +#include "clang/Config/config.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" @@ -520,7 +521,10 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, // translate 'lld' into 'lld-link', and in the case of the regular msvc // linker, we need to use a special search algorithm. llvm::SmallString<128> linkPath; - StringRef Linker = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "link"); + StringRef Linker + = Args.getLastArgValue(options::OPT_fuse_ld_EQ, CLANG_DEFAULT_LINKER); + if (Linker.empty()) + Linker = "link"; if (Linker.equals_lower("lld")) Linker = "lld-link"; diff --git a/clang/test/Driver/Xlinker-args.c b/clang/test/Driver/Xlinker-args.c index a44957cd8aef11311351cc218d736ffa21b0100b..cb045a1d40ac172e3c7bf1de039d31c170d79812 100644 --- a/clang/test/Driver/Xlinker-args.c +++ b/clang/test/Driver/Xlinker-args.c @@ -17,7 +17,7 @@ // LINUX: "--no-demangle" "-e" "_start" "one" "two" "three" "four" "-z" "five" "-r" {{.*}} "-T" "a.lds" // Check that we forward '-Xlinker' and '-Wl,' on Windows. -// RUN: %clang -target i686-pc-win32 -### \ +// RUN: %clang -target i686-pc-win32 -fuse-ld=link -### \ // RUN: -Xlinker one -Wl,two %s 2>&1 | \ // RUN: FileCheck -check-prefix=WIN %s // WIN: link.exe diff --git a/clang/test/Driver/cl-inputs.c b/clang/test/Driver/cl-inputs.c index 59455a0aa5e5c7e06a4f1bb673c9ce6fd43d6f09..8eb44517ee167a8bfcc087c3121fea1744a3912c 100644 --- a/clang/test/Driver/cl-inputs.c +++ b/clang/test/Driver/cl-inputs.c @@ -50,16 +50,16 @@ // RUN: %clang_cl -### /Tc - 2>&1 | FileCheck -check-prefix=STDINTc %s // STDINTc: "-x" "c" -// RUN: env LIB=%S/Inputs/cl-libs %clang_cl -### -- %s cl-test.lib 2>&1 | FileCheck -check-prefix=LIBINPUT %s +// RUN: env LIB=%S/Inputs/cl-libs %clang_cl -fuse-ld=link -### -- %s cl-test.lib 2>&1 | FileCheck -check-prefix=LIBINPUT %s // LIBINPUT: link.exe" // LIBINPUT: "cl-test.lib" -// RUN: env LIB=%S/Inputs/cl-libs %clang_cl -### -- %s cl-test2.lib 2>&1 | FileCheck -check-prefix=LIBINPUT2 %s +// RUN: env LIB=%S/Inputs/cl-libs %clang_cl -fuse-ld=link -### -- %s cl-test2.lib 2>&1 | FileCheck -check-prefix=LIBINPUT2 %s // LIBINPUT2: error: no such file or directory: 'cl-test2.lib' // LIBINPUT2: link.exe" // LIBINPUT2-NOT: "cl-test2.lib" -// RUN: %clang_cl -### -- %s /nonexisting.lib 2>&1 | FileCheck -check-prefix=LIBINPUT3 %s +// RUN: %clang_cl -fuse-ld=link -### -- %s /nonexisting.lib 2>&1 | FileCheck -check-prefix=LIBINPUT3 %s // LIBINPUT3: error: no such file or directory: '/nonexisting.lib' // LIBINPUT3: link.exe" // LIBINPUT3-NOT: "/nonexisting.lib" diff --git a/clang/test/Driver/cl-link-at-file.c b/clang/test/Driver/cl-link-at-file.c index 50ae07fadf5bf0beb11c4eff09b5d6fc2fd65e7c..4e665f89b74e1d0e5d3ffeac8f73fbaf8f533f53 100644 --- a/clang/test/Driver/cl-link-at-file.c +++ b/clang/test/Driver/cl-link-at-file.c @@ -7,7 +7,7 @@ // RUN: echo /link bar.lib baz.lib > %t.args // RUN: touch %t.obj -// RUN: %clang_cl -### @%t.args -- %t.obj 2>&1 | FileCheck %s -check-prefix=ARGS +// RUN: %clang_cl -fuse-ld=link -### @%t.args -- %t.obj 2>&1 | FileCheck %s -check-prefix=ARGS // If the "/link" option captures all remaining args beyond its response file, // it will also capture "--" and our input argument. In this case, Clang will // be clueless and will emit "argument unused" warnings. If PR17239 is properly diff --git a/clang/test/Driver/cl-link.c b/clang/test/Driver/cl-link.c index 142725fed8eb2ee1b0265460495a4eb3fd632224..e2f5397e913393bd10fafbfba9b66c518b732be9 100644 --- a/clang/test/Driver/cl-link.c +++ b/clang/test/Driver/cl-link.c @@ -2,14 +2,14 @@ // be interpreted as a command-line option, e.g. on Mac where %s is commonly // under /Users. -// RUN: %clang_cl /Tc%s -### /link foo bar baz 2>&1 | FileCheck --check-prefix=LINK %s -// RUN: %clang_cl /Tc%s -### /linkfoo bar baz 2>&1 | FileCheck --check-prefix=LINK %s +// RUN: %clang_cl /Tc%s -fuse-ld=link -### /link foo bar baz 2>&1 | FileCheck --check-prefix=LINK %s +// RUN: %clang_cl /Tc%s -fuse-ld=link -### /linkfoo bar baz 2>&1 | FileCheck --check-prefix=LINK %s // LINK: link.exe // LINK: "foo" // LINK: "bar" // LINK: "baz" -// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN %s +// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN %s // ASAN: link.exe // ASAN: "-debug" // ASAN: "-incremental:no" @@ -19,7 +19,7 @@ // ASAN: "-wholearchive:{{.*}}clang_rt.asan_cxx-i386.lib" // ASAN: "{{.*}}cl-link{{.*}}.obj" -// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /MD /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-MD %s +// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /MD /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-MD %s // ASAN-MD: link.exe // ASAN-MD: "-debug" // ASAN-MD: "-incremental:no" @@ -29,13 +29,13 @@ // ASAN-MD: "-wholearchive:{{.*}}clang_rt.asan_dynamic_runtime_thunk-i386.lib" // ASAN-MD: "{{.*}}cl-link{{.*}}.obj" -// RUN: %clang_cl /LD -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s -// RUN: %clang_cl /LDd -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s +// RUN: %clang_cl /LD -fuse-ld=link -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s +// RUN: %clang_cl /LDd -fuse-ld=link -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s // DLL: link.exe // "-dll" -// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LD /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s -// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LDd /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s +// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LD /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s +// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LDd /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s // ASAN-DLL: link.exe // ASAN-DLL: "-dll" // ASAN-DLL: "-debug" @@ -43,13 +43,13 @@ // ASAN-DLL: "{{.*}}clang_rt.asan_dll_thunk-i386.lib" // ASAN-DLL: "{{.*}}cl-link{{.*}}.obj" -// RUN: %clang_cl /Zi /Tc%s -### 2>&1 | FileCheck --check-prefix=DEBUG %s +// RUN: %clang_cl /Zi /Tc%s -fuse-ld=link -### 2>&1 | FileCheck --check-prefix=DEBUG %s // DEBUG: link.exe // DEBUG: "-debug" // PR27234 -// RUN: %clang_cl /Tc%s nonexistent.obj -### /link /libpath:somepath 2>&1 | FileCheck --check-prefix=NONEXISTENT %s -// RUN: %clang_cl /Tc%s nonexistent.lib -### /link /libpath:somepath 2>&1 | FileCheck --check-prefix=NONEXISTENT %s +// RUN: %clang_cl /Tc%s nonexistent.obj -fuse-ld=link -### /link /libpath:somepath 2>&1 | FileCheck --check-prefix=NONEXISTENT %s +// RUN: %clang_cl /Tc%s nonexistent.lib -fuse-ld=link -### /link /libpath:somepath 2>&1 | FileCheck --check-prefix=NONEXISTENT %s // NONEXISTENT-NOT: no such file // NONEXISTENT: link.exe // NONEXISTENT: "/libpath:somepath" diff --git a/clang/test/Driver/msvc-link.c b/clang/test/Driver/msvc-link.c index 13dccd21bfd8d1c0ed7ec61c28670fe189d2f3ad..1ee17fc63c321359edade2c847b2c04282bb00d7 100644 --- a/clang/test/Driver/msvc-link.c +++ b/clang/test/Driver/msvc-link.c @@ -1,4 +1,4 @@ -// RUN: %clang -target i686-pc-windows-msvc -### %s 2>&1 | FileCheck --check-prefix=BASIC %s +// RUN: %clang -target i686-pc-windows-msvc -fuse-ld=link -### %s 2>&1 | FileCheck --check-prefix=BASIC %s // BASIC: link.exe" // BASIC: "-out:a.exe" // BASIC: "-defaultlib:libcmt" @@ -6,7 +6,7 @@ // BASIC: "-nologo" // BASIC-NOT: "-Brepro" -// RUN: %clang -target i686-pc-windows-msvc -shared -o a.dll -### %s 2>&1 | FileCheck --check-prefix=DLL %s +// RUN: %clang -target i686-pc-windows-msvc -shared -o a.dll -fuse-ld=link -### %s 2>&1 | FileCheck --check-prefix=DLL %s // DLL: link.exe" // DLL: "-out:a.dll" // DLL: "-defaultlib:libcmt" @@ -19,13 +19,13 @@ // LIBPATH: "-libpath:/usr/lib" // LIBPATH: "-nologo" -// RUN: %clang_cl /Brepro -### -- %s 2>&1 | FileCheck --check-prefix=REPRO %s +// RUN: %clang_cl /Brepro -fuse-ld=link -### -- %s 2>&1 | FileCheck --check-prefix=REPRO %s // REPRO: link.exe" // REPRO: "-out:msvc-link.exe" // REPRO: "-nologo" // REPRO: "-Brepro" -// RUN: %clang_cl /Brepro- -### -- %s 2>&1 | FileCheck --check-prefix=NOREPRO %s +// RUN: %clang_cl /Brepro- -fuse-ld=link -### -- %s 2>&1 | FileCheck --check-prefix=NOREPRO %s // NOREPRO: link.exe" // NOREPRO: "-out:msvc-link.exe" // NOREPRO: "-nologo" diff --git a/clang/test/OpenMP/linking.c b/clang/test/OpenMP/linking.c index 802553c1be7525de7aeb8c5253a125a7f9e85f1d..1c44396264705bb6638ed289cd83849b07f6bb01 100644 --- a/clang/test/OpenMP/linking.c +++ b/clang/test/OpenMP/linking.c @@ -81,7 +81,7 @@ // CHECK-LD-OVERRIDE-64: "-lgomp" "-lrt" // CHECK-LD-OVERRIDE-64: "-lpthread" "-lc" // -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +// RUN: %clang -no-canonical-prefixes -fuse-ld=link %s -### -o %t.o 2>&1 \ // RUN: -fopenmp=libomp -target x86_64-msvc-win32 -rtlib=platform \ // RUN: | FileCheck --check-prefix=CHECK-MSVC-LINK-64 %s // CHECK-MSVC-LINK-64: link.exe @@ -95,7 +95,7 @@ // SIMD-ONLY11-NOT: libomp // SIMD-ONLY11-NOT: libgomp // -// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +// RUN: %clang -no-canonical-prefixes %s -fuse-ld=link -### -o %t.o 2>&1 \ // RUN: -fopenmp=libiomp5 -target x86_64-msvc-win32 -rtlib=platform \ // RUN: | FileCheck --check-prefix=CHECK-MSVC-ILINK-64 %s diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp index 0229ace911f87b189cc89d8b420d710a9368b811..c0f53239aa13335f7a60cd3184ba3d20ef1f660a 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp @@ -39,7 +39,7 @@ #ifdef CK1 -// HCK_NO_TGT-NOT: @__kmpc_push_target_tripcount +// HCK_NO_TGT-NOT: @__kmpc_push_target_tripcount_mapper // HCK1: define{{.*}} i32 @{{.+}}target_teams_fun{{.*}}( int target_teams_fun(int *g){ @@ -60,7 +60,7 @@ int target_teams_fun(int *g){ // HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]], // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], - // HCK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) + // HCK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) // HCK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, // HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]]) diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp index 6650e05575110efb1c10dcbd2fde4b2fac2b3aca..efe7df819fb6213353a4def8690c5c6858b445a2 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp @@ -49,10 +49,10 @@ int Arg; // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test void gtid_test() { -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, // CHECK: call void [[OFFLOADING_FUN_0:@.+]]( -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( #pragma omp target teams distribute parallel for @@ -107,12 +107,12 @@ int tmain(T Arg) { // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main() int main() { -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, // CHECK: call void [[OFFLOADING_FUN_0:@.+]]( -// CHECK-NOT: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK-NOT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp index b2ab37f22ec33b79d2ae85d2b951381d417b5661..b99ba9d38a43024b6ea7ad8b324fb53f11c31a9d 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp @@ -14,7 +14,7 @@ // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test void gtid_test() { -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: %0 = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{.+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0) // CHECK: call void [[TARGET_OUTLINE:@.+]]() // CHECK: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp index e6049145702bcb7fbddef57f5563b04a04c8b385..39ccb87462c02b35b4107ebe1d57c6580de60c4c 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp @@ -60,7 +60,7 @@ int target_teams_fun(int *g){ // HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]], // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], -// HCK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) +// HCK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) // HCK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, // HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[I_PAR]], i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]]) diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp index 8b0eaba07f1cbd92c1f848fffd40769fb20d7b3b..19dc15b94f64b50267087d841532a7a60f37e2f5 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp @@ -43,10 +43,10 @@ int Arg; // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test void gtid_test() { -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, // CHECK: call void [[OFFLOADING_FUN_0:@.+]]( -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( #ifdef OMP5 @@ -110,12 +110,12 @@ int tmain(T Arg) { // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main() int main() { -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, // CHECK: call void [[OFFLOADING_FUN_0:@.+]]( -// CHECK-NOT: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK-NOT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( -// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100) +// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100) // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain diff --git a/clang/test/OpenMP/teams_distribute_codegen.cpp b/clang/test/OpenMP/teams_distribute_codegen.cpp index 5bbb100e669ec4e80f0792534b68324a02c63ca3..aab5cced4c70f910696d8fd143620806e5ac5dc3 100644 --- a/clang/test/OpenMP/teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_codegen.cpp @@ -33,7 +33,7 @@ int teams_argument_global(int n) { // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], - // CK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) + // CK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) // CK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i8** null, i32 {{.+}}, i32 {{.+}}) // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp index b63e5aeddb7a2c0a746b4350ff1bfe8c39c8092b..8fa73e76009bcf1d99396cd662c7b75513448809 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp @@ -32,7 +32,7 @@ int teams_argument_global(int n){ // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}}, // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], - // CK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) + // CK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) // CK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i8** null, i32 {{.+}}, i32 {{.+}}) // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp index 3d479c4cd29dfaa4aa559f65652cbf35d1189f96..9b3855c61759ae88aaf0742264c2e7bbe4d683fd 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp @@ -33,7 +33,7 @@ int teams_argument_global(int n){ // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], - // CK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) + // CK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) // CK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], diff --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp index fd1214d22ce9e04eec184cabbbd07a7cec615eaa..6e5d06b0c5689ea84a840735d3c7dbdba0913708 100644 --- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp @@ -35,7 +35,7 @@ int teams_argument_global(int n) { // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], - // CK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) + // CK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}}) // CK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i8** null, i32 {{.+}}, i32 1) // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h index 183107c148a6d2e128d65867425fb97a1461e783..9beef44c89dd5ed2b49fbd320df437e42b6a5f0b 100644 --- a/llvm/include/llvm-c/Orc.h +++ b/llvm/include/llvm-c/Orc.h @@ -339,8 +339,7 @@ LLVMErrorRef LLVMOrcResourceTrackerRemove(LLVMOrcResourceTrackerRef RT); * ownership has not been passed to a JITDylib (e.g. because some error * prevented the client from calling LLVMOrcJITDylibAddGenerator). */ -void LLVMOrcDisposeDefinitionGenerator( - LLVMOrcDefinitionGeneratorRef DG); +void LLVMOrcDisposeDefinitionGenerator(LLVMOrcDefinitionGeneratorRef DG); /** * Dispose of a MaterializationUnit. @@ -388,7 +387,9 @@ LLVMOrcExecutionSessionCreateJITDylib(LLVMOrcExecutionSessionRef ES, * Returns the JITDylib with the given name, or NULL if no such JITDylib * exists. */ -LLVMOrcJITDylibRef LLVMOrcExecutionSessionGetJITDylibByName(const char *Name); +LLVMOrcJITDylibRef +LLVMOrcExecutionSessionGetJITDylibByName(LLVMOrcExecutionSessionRef ES, + const char *Name); /** * Return a reference to a newly created resource tracker associated with JD. diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h index 81c1d6aad49a1bdb2dcfb8b32dda5f06f04075be..26bf4ab2618ce698bda4a61d8963b98421985035 100644 --- a/llvm/include/llvm/CodeGen/FastISel.h +++ b/llvm/include/llvm/CodeGen/FastISel.h @@ -490,7 +490,10 @@ protected: /// - \c Add has a constant operand. bool canFoldAddIntoGEP(const User *GEP, const Value *Add); - /// Test whether the given value has exactly one use. + /// Test whether the register associated with this value has exactly one use, + /// in which case that single use is killing. Note that multiple IR values + /// may map onto the same register, in which case this is not the same as + /// checking that an IR value has one use. bool hasTrivialKill(const Value *V); /// Create a machine mem operand from the given instruction. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 844046167975960f8902f625cdbaf123d32d71a3..75d360bf4237a4b728863de84acb34fac25cf109 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -375,7 +375,7 @@ __OMP_RTL(__kmpc_init_allocator, false, /* omp_allocator_handle_t */ VoidPtr, __OMP_RTL(__kmpc_destroy_allocator, false, Void, /* Int */ Int32, /* omp_allocator_handle_t */ VoidPtr) -__OMP_RTL(__kmpc_push_target_tripcount, false, Void, IdentPtr, Int64, Int64) +__OMP_RTL(__kmpc_push_target_tripcount_mapper, false, Void, IdentPtr, Int64, Int64) __OMP_RTL(__tgt_target_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) __OMP_RTL(__tgt_target_nowait_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32, @@ -844,7 +844,7 @@ __OMP_RTL_ATTRS(__kmpc_free, AllocAttrs, AttributeSet(), {}) __OMP_RTL_ATTRS(__kmpc_init_allocator, DefaultAttrs, ReturnPtrAttrs, {}) __OMP_RTL_ATTRS(__kmpc_destroy_allocator, AllocAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_push_target_tripcount, SetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__kmpc_push_target_tripcount_mapper, SetterAttrs, AttributeSet(), {}) __OMP_RTL_ATTRS(__tgt_target_mapper, ForkAttrs, AttributeSet(), {}) __OMP_RTL_ATTRS(__tgt_target_nowait_mapper, ForkAttrs, AttributeSet(), {}) __OMP_RTL_ATTRS(__tgt_target_teams_mapper, ForkAttrs, AttributeSet(), {}) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index b97c369b832daf7e6d1fe6c418d726b422acd789..b7883cbc3120fde88139afa32a2457df724ac446 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -840,9 +840,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, // For conditional branch lowering, we might try to do something silly like // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so, // just re-use the existing condition vreg. - if (CI && CI->getZExtValue() == 1 && - MRI->getType(CondLHS).getSizeInBits() == 1 && - CB.PredInfo.Pred == CmpInst::ICMP_EQ) { + if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI && + CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) { Cond = CondLHS; } else { Register CondRHS = getOrCreateVReg(*CB.CmpRHS); diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 62f7f3d98ba6d663fea8f20dd19c8895186502ff..0ff77d4ba1abb012df737d0a6f58f6ddffc6ac81 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -261,12 +261,16 @@ bool FastISel::hasTrivialKill(const Value *V) { if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0))) return false; + // Casts and extractvalues may be trivially coalesced by fast-isel. + if (I->getOpcode() == Instruction::BitCast || + I->getOpcode() == Instruction::PtrToInt || + I->getOpcode() == Instruction::IntToPtr || + I->getOpcode() == Instruction::ExtractValue) + return false; + // Only instructions with a single use in the same basic block are considered // to have trivial kills. return I->hasOneUse() && - !(I->getOpcode() == Instruction::BitCast || - I->getOpcode() == Instruction::PtrToInt || - I->getOpcode() == Instruction::IntToPtr) && cast<Instruction>(*I->user_begin())->getParent() == I->getParent(); } diff --git a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp index dfdd2c6c669f980af9172a89f86efd6ebdc4679d..834d4cc8f51455eae814dd0f13871b76d04ee582 100644 --- a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp @@ -393,7 +393,7 @@ void LLVMOrcDisposeJITTargetMachineBuilder( delete unwrap(JTMB); } -void lLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) { +void LLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) { delete unwrap(ObjLayer); } diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc index dc9bcf8683810a496b9eaa9d8a34994497e7537c..adcbd1b5f8f31a1d737dbacfd2f86eed3a40d1a9 100644 --- a/llvm/lib/Support/Windows/Path.inc +++ b/llvm/lib/Support/Windows/Path.inc @@ -402,8 +402,22 @@ std::error_code is_local(int FD, bool &Result) { } static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) { - // First, check if the file is on a network (non-local) drive. If so, don't - // set DeleteFile to true, since it prevents opening the file for writes. + // Clear the FILE_DISPOSITION_INFO flag first, before checking if it's a + // network file. On Windows 7 the function realPathFromHandle() below fails + // if the FILE_DISPOSITION_INFO flag was already set to 'DeleteFile = true' by + // a prior call. + FILE_DISPOSITION_INFO Disposition; + Disposition.DeleteFile = false; + if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition, + sizeof(Disposition))) + return mapWindowsError(::GetLastError()); + if (!Delete) + return std::error_code(); + + // Check if the file is on a network (non-local) drive. If so, don't + // continue when DeleteFile is true, since it prevents opening the file for + // writes. Note -- this will leak temporary files on disk, but only when the + // target file is on a network drive. SmallVector<wchar_t, 128> FinalPath; if (std::error_code EC = realPathFromHandle(Handle, FinalPath)) return EC; @@ -415,9 +429,9 @@ static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) { if (!IsLocal) return std::error_code(); - // The file is on a local drive, set the DeleteFile to true. - FILE_DISPOSITION_INFO Disposition; - Disposition.DeleteFile = Delete; + // The file is on a local drive, we can safely set FILE_DISPOSITION_INFO's + // flag. + Disposition.DeleteFile = true; if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition, sizeof(Disposition))) return mapWindowsError(::GetLastError()); diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 693b0adaede437d4b85270a2594b37a4a962ccbd..2604218da160019aca8dd396c1cf6c4dfe986858 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -5896,7 +5896,13 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { User->getMachineOpcode() != PPC::SELECT_I8) return false; + SDNode *Op1 = User->getOperand(1).getNode(); SDNode *Op2 = User->getOperand(2).getNode(); + // If we have a degenerate select with two equal operands, swapping will + // not do anything, and we may run into an infinite loop. + if (Op1 == Op2) + return false; + if (!Op2->isMachineOpcode()) return false; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 86fbc73d81d51933ba10a68624e51f91c9609d87..b3fc76aee1614ec3a21739d872f48f70227b8239 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -504,19 +504,19 @@ def VSOXEI16_V : VIndexedStore<MOPSTIndexedOrder, LSWidth16, "vsoxei16.v">; def VSOXEI32_V : VIndexedStore<MOPSTIndexedOrder, LSWidth32, "vsoxei32.v">; def VSOXEI64_V : VIndexedStore<MOPSTIndexedOrder, LSWidth64, "vsoxei64.v">; -defm VL1R : VWholeLoad<1, "vl1r">; -defm VL2R : VWholeLoad<2, "vl2r">; -defm VL4R : VWholeLoad<4, "vl4r">; -defm VL8R : VWholeLoad<8, "vl8r">; +defm VL1R : VWholeLoad<0, "vl1r">; +defm VL2R : VWholeLoad<1, "vl2r">; +defm VL4R : VWholeLoad<3, "vl4r">; +defm VL8R : VWholeLoad<7, "vl8r">; def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>; def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VR:$vd, GPR:$rs1)>; def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VR:$vd, GPR:$rs1)>; def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VR:$vd, GPR:$rs1)>; -def VS1R_V : VWholeStore<1, "vs1r.v">; -def VS2R_V : VWholeStore<2, "vs2r.v">; -def VS4R_V : VWholeStore<4, "vs4r.v">; -def VS8R_V : VWholeStore<8, "vs8r.v">; +def VS1R_V : VWholeStore<0, "vs1r.v">; +def VS2R_V : VWholeStore<1, "vs2r.v">; +def VS4R_V : VWholeStore<3, "vs4r.v">; +def VS8R_V : VWholeStore<7, "vs8r.v">; // Vector Single-Width Integer Add and Subtract defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>; diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index caf158102230bbb34bea5c9dde3f72c3bd9a6418..a1a16a19f5e5258f8049755a2a23f2e00ad8f867 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -284,6 +284,14 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, return false; } + // Make sure no potentially eflags clobbering phi moves can be inserted in + // between. + auto HasPhis = [](const BasicBlock *Succ) { + return !llvm::empty(Succ->phis()); + }; + if (I->isTerminator() && llvm::any_of(successors(I), HasPhis)) + return false; + CC = TmpCC; return true; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 0b53007bb6dc811bca875ae4a43d232fe2185825..07e68c44416d9aa1e437427525aeba9dd3c70999 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1270,6 +1270,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) { ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0)); ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1)); if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && + LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() && (transformZExtICmp(LHS, CI, false) || transformZExtICmp(RHS, CI, false))) { // zext (or icmp, icmp) -> or (zext icmp), (zext icmp) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 7cfe17618cde1808e0f6b1f9411719e4f46f0e56..de9560df9785eaf10b7190a6027fa6e11024f3f5 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1628,6 +1628,11 @@ static bool canSinkInstructions( I->getType()->isTokenTy()) return false; + // Do not try to sink an instruction in an infinite loop - it can cause + // this algorithm to infinite loop. + if (I->getParent()->getSingleSuccessor() == I->getParent()) + return false; + // Conservatively return false if I is an inline-asm instruction. Sinking // and merging inline-asm instructions can potentially create arguments // that cannot satisfy the inline-asm constraints. @@ -1714,13 +1719,13 @@ static bool canSinkInstructions( return true; } -// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last +// Assuming canSinkInstructions(Blocks) has returned true, sink the last // instruction of every block in Blocks to their common successor, commoning // into one instruction. static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0); - // canSinkLastInstruction returning true guarantees that every block has at + // canSinkInstructions returning true guarantees that every block has at // least one non-terminator instruction. SmallVector<Instruction*,4> Insts; for (auto *BB : Blocks) { @@ -1733,9 +1738,9 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { } // The only checking we need to do now is that all users of all instructions - // are the same PHI node. canSinkLastInstruction should have checked this but - // it is slightly over-aggressive - it gets confused by commutative instructions - // so double-check it here. + // are the same PHI node. canSinkInstructions should have checked this but + // it is slightly over-aggressive - it gets confused by commutative + // instructions so double-check it here. Instruction *I0 = Insts.front(); if (!I0->user_empty()) { auto *PNUse = dyn_cast<PHINode>(*I0->user_begin()); @@ -1746,11 +1751,11 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) { return false; } - // We don't need to do any more checking here; canSinkLastInstruction should + // We don't need to do any more checking here; canSinkInstructions should // have done it all for us. SmallVector<Value*, 4> NewOperands; for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { - // This check is different to that in canSinkLastInstruction. There, we + // This check is different to that in canSinkInstructions. There, we // cared about the global view once simplifycfg (and instcombine) have // completed - it takes into account PHIs that become trivially // simplifiable. However here we need a more local view; if an operand diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d36e078444bc291ffd83175f2cccbf6da5f9ae7c..b456a97aa4ec094f641da99691909fd6e6a1bdc0 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -372,19 +372,11 @@ static Type *getMemInstValueType(Value *I) { /// A helper function that returns true if the given type is irregular. The /// type is irregular if its allocated size doesn't equal the store size of an -/// element of the corresponding vector type at the given vectorization factor. -static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) { - // Determine if an array of VF elements of type Ty is "bitcast compatible" - // with a <VF x Ty> vector. - if (VF.isVector()) { - auto *VectorTy = VectorType::get(Ty, VF); - return TypeSize::get(VF.getKnownMinValue() * - DL.getTypeAllocSize(Ty).getFixedValue(), - VF.isScalable()) != DL.getTypeStoreSize(VectorTy); - } - - // If the vectorization factor is one, we just check if an array of type Ty - // requires padding between elements. +/// element of the corresponding vector type. +static bool hasIrregularType(Type *Ty, const DataLayout &DL) { + // Determine if an array of N elements of type Ty is "bitcast compatible" + // with a <N x Ty> vector. + // This is only true if there is no padding between the array elements. return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty); } @@ -5212,7 +5204,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened( // requires padding and will be scalarized. auto &DL = I->getModule()->getDataLayout(); auto *ScalarTy = getMemInstValueType(I); - if (hasIrregularType(ScalarTy, DL, VF)) + if (hasIrregularType(ScalarTy, DL)) return false; // Check if masking is required. @@ -5259,7 +5251,7 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened( // requires padding and will be scalarized. auto &DL = I->getModule()->getDataLayout(); auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType(); - if (hasIrregularType(ScalarTy, DL, VF)) + if (hasIrregularType(ScalarTy, DL)) return false; return true; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll b/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll new file mode 100644 index 0000000000000000000000000000000000000000..8742a848c4af1066f6b40d3073c75d1c2441eaea --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=arm64-apple-ios %s -o - -O0 -global-isel=1 | FileCheck %s +define void @foo(i512 %in) { +; CHECK-LABEL: foo: +; CHECK: cbz + switch i512 %in, label %default [ + i512 3923188584616675477397368389504791510063972152790021570560, label %l1 + i512 3923188584616675477397368389504791510063972152790021570561, label %l2 + i512 3923188584616675477397368389504791510063972152790021570562, label %l3 + ] + +default: + ret void + +l1: + ret void + +l2: + ret void + +l3: + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/pr49509.ll b/llvm/test/CodeGen/PowerPC/pr49509.ll new file mode 100644 index 0000000000000000000000000000000000000000..f13733c1804757978152a5ec5941360e56cd0147 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr49509.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s + +target datalayout = "E-m:e-p:32:32-i64:64-n32" + +define void @test() { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: bc 12, 20, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %bb2 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: stw 3, 0(3) +; CHECK-NEXT: lis 3, 256 +; CHECK-NEXT: stw 3, 0(3) +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB0_2: # %bb1 +; CHECK-NEXT: bclr 4, 20, 0 +; CHECK-NEXT: # %bb.3: # %bb66 +; CHECK-NEXT: lwz 4, 12(0) +; CHECK-NEXT: lwz 5, 8(0) +; CHECK-NEXT: lwz 6, 0(0) +; CHECK-NEXT: lwz 7, 4(0) +; CHECK-NEXT: lbz 3, 0(3) +; CHECK-NEXT: and 5, 5, 6 +; CHECK-NEXT: and 4, 4, 7 +; CHECK-NEXT: and 4, 4, 5 +; CHECK-NEXT: cmpwi 3, 0 +; CHECK-NEXT: lis 3, 256 +; CHECK-NEXT: lis 7, 512 +; CHECK-NEXT: bc 12, 2, .LBB0_4 +; CHECK-NEXT: b .LBB0_5 +; CHECK-NEXT: .LBB0_4: # %bb66 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: .LBB0_5: # %bb66 +; CHECK-NEXT: cmpwi 1, 4, -1 +; CHECK-NEXT: cmpwi 5, 4, -1 +; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: bc 12, 6, .LBB0_6 +; CHECK-NEXT: b .LBB0_7 +; CHECK-NEXT: .LBB0_6: # %bb66 +; CHECK-NEXT: addi 3, 7, 0 +; CHECK-NEXT: .LBB0_7: # %bb66 +; CHECK-NEXT: cror 20, 22, 2 +; CHECK-NEXT: stw 3, 0(3) +; CHECK-NEXT: bc 12, 20, .LBB0_9 +; CHECK-NEXT: # %bb.8: # %bb66 +; CHECK-NEXT: ori 3, 6, 0 +; CHECK-NEXT: b .LBB0_10 +; CHECK-NEXT: .LBB0_9: # %bb66 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: .LBB0_10: # %bb66 +; CHECK-NEXT: stw 3, 0(3) +; CHECK-NEXT: blr +bb: + br i1 undef, label %bb2, label %bb1 + +bb2: ; preds = %bb + %i = select i1 undef, i64 0, i64 72057594037927936 + store i64 %i, i64* undef, align 8 + ret void + +bb1: ; preds = %bb + %i50 = load i8, i8* undef, align 8 + %i52 = load i128, i128* null, align 8 + %i62 = icmp eq i8 %i50, 0 + br i1 undef, label %bb66, label %bb64 + +bb64: ; preds = %bb63 + ret void + +bb66: ; preds = %bb63 + %i67 = lshr i128 -1, 0 + %i68 = xor i128 %i52, -1 + %i69 = add i128 0, %i68 + %i70 = and i128 %i67, %i69 + %i71 = icmp eq i128 %i70, 0 + %i74 = select i1 %i62, i64 0, i64 72057594037927936 + %i75 = select i1 %i71, i64 144115188075855872, i64 %i74 + store i64 %i75, i64* undef, align 8 + ret void +} diff --git a/llvm/test/CodeGen/X86/pr49467.ll b/llvm/test/CodeGen/X86/pr49467.ll new file mode 100644 index 0000000000000000000000000000000000000000..9b350255206661e948b920929adf446927fa101d --- /dev/null +++ b/llvm/test/CodeGen/X86/pr49467.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=x86_64 < %s | FileCheck %s + +declare { i8*, i64 } @get() + +declare void @use(i8*, i64) + +define void @test(i64* %p) nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movq %rdi, (%rsp) # 8-byte Spill +; CHECK-NEXT: callq get@PLT +; CHECK-NEXT: movq (%rsp), %rdi # 8-byte Reload +; CHECK-NEXT: movq %rdx, %rsi +; CHECK-NEXT: movq %rsi, (%rdi) +; CHECK-NEXT: # implicit-def: $rdi +; CHECK-NEXT: callq use@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %struct = call { i8*, i64 } @get() + %struct.1 = extractvalue { i8*, i64 } %struct, 1 + store i64 %struct.1, i64* %p, align 8 + %struct.2 = extractvalue { i8*, i64 } %struct, 1 + call void @use(i8* undef, i64 %struct.2) + ret void +} diff --git a/llvm/test/CodeGen/X86/pr49587.ll b/llvm/test/CodeGen/X86/pr49587.ll new file mode 100644 index 0000000000000000000000000000000000000000..7dc54a526608cf13125cede2470ee47c68607306 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr49587.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -fast-isel -mtriple=x86_64-- < %s | FileCheck %s + +define i32 @test(i64 %arg) nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $1, %rdi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: # %bb.1: # %no_overflow +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: jmp .LBB0_2 +; CHECK-NEXT: .LBB0_2: # %merge +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: retq +entry: + %usubo = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %arg, i64 1) + %overflow = extractvalue { i64, i1 } %usubo, 1 + br i1 %overflow, label %merge, label %no_overflow + +no_overflow: + br label %merge + +merge: + %phi = phi i32 [ 1, %no_overflow ], [ 0, %entry ] + ret i32 %phi +} + +declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) diff --git a/llvm/test/MC/RISCV/rvv/aliases.s b/llvm/test/MC/RISCV/rvv/aliases.s index 2e5120c91e4515103945dca7bcfdaf0dc92daf6d..ebe9e79399a697ea205f421302bb3d7865e0b2e7 100644 --- a/llvm/test/MC/RISCV/rvv/aliases.s +++ b/llvm/test/MC/RISCV/rvv/aliases.s @@ -54,17 +54,17 @@ vmset.m v0 # ALIAS: vmnot.m v0, v1 # encoding: [0x57,0xa0,0x10,0x76] # NO-ALIAS: vmnand.mm v0, v1, v1 # encoding: [0x57,0xa0,0x10,0x76] vmnot.m v0, v1 -# ALIAS: vl1r.v v0, (a0) # encoding: [0x07,0x00,0x85,0x22] -# NO-ALIAS: vl1re8.v v0, (a0) # encoding: [0x07,0x00,0x85,0x22] +# ALIAS: vl1r.v v0, (a0) # encoding: [0x07,0x00,0x85,0x02] +# NO-ALIAS: vl1re8.v v0, (a0) # encoding: [0x07,0x00,0x85,0x02] vl1r.v v0, (a0) -# ALIAS: vl2r.v v0, (a0) # encoding: [0x07,0x00,0x85,0x42] -# NO-ALIAS: vl2re8.v v0, (a0) # encoding: [0x07,0x00,0x85,0x42] +# ALIAS: vl2r.v v0, (a0) # encoding: [0x07,0x00,0x85,0x22] +# NO-ALIAS: vl2re8.v v0, (a0) # encoding: [0x07,0x00,0x85,0x22] vl2r.v v0, (a0) -# ALIAS: vl4r.v v0, (a0) # encoding: [0x07,0x00,0x85,0x82] -# NO-ALIAS: vl4re8.v v0, (a0) # encoding: [0x07,0x00,0x85,0x82] +# ALIAS: vl4r.v v0, (a0) # encoding: [0x07,0x00,0x85,0x62] +# NO-ALIAS: vl4re8.v v0, (a0) # encoding: [0x07,0x00,0x85,0x62] vl4r.v v0, (a0) -# ALIAS: vl8r.v v0, (a0) # encoding: [0x07,0x00,0x85,0x02] -# NO-ALIAS: vl8re8.v v0, (a0) # encoding: [0x07,0x00,0x85,0x02] +# ALIAS: vl8r.v v0, (a0) # encoding: [0x07,0x00,0x85,0xe2] +# NO-ALIAS: vl8re8.v v0, (a0) # encoding: [0x07,0x00,0x85,0xe2] vl8r.v v0, (a0) # ALIAS: vneg.v v2, v1, v0.t # encoding: [0x57,0x41,0x10,0x0c] # NO-ALIAS: vrsub.vx v2, v1, zero, v0.t # encoding: [0x57,0x41,0x10,0x0c] diff --git a/llvm/test/MC/RISCV/rvv/load.s b/llvm/test/MC/RISCV/rvv/load.s index 3d0dbb15c36ec52f1c75763e883e78d4a59ade33..45a3881cb60d1b952372776c4e3c4b03a75a8f93 100644 --- a/llvm/test/MC/RISCV/rvv/load.s +++ b/llvm/test/MC/RISCV/rvv/load.s @@ -256,96 +256,96 @@ vloxei64.v v8, (a0), v4 vl1re8.v v8, (a0) # CHECK-INST: vl1re8.v v8, (a0) -# CHECK-ENCODING: [0x07,0x04,0x85,0x22] +# CHECK-ENCODING: [0x07,0x04,0x85,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 04 85 22 <unknown> +# CHECK-UNKNOWN: 07 04 85 02 <unknown> vl1re16.v v8, (a0) # CHECK-INST: vl1re16.v v8, (a0) -# CHECK-ENCODING: [0x07,0x54,0x85,0x22] +# CHECK-ENCODING: [0x07,0x54,0x85,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 54 85 22 <unknown> +# CHECK-UNKNOWN: 07 54 85 02 <unknown> vl1re32.v v8, (a0) # CHECK-INST: vl1re32.v v8, (a0) -# CHECK-ENCODING: [0x07,0x64,0x85,0x22] +# CHECK-ENCODING: [0x07,0x64,0x85,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 64 85 22 <unknown> +# CHECK-UNKNOWN: 07 64 85 02 <unknown> vl1re64.v v8, (a0) # CHECK-INST: vl1re64.v v8, (a0) -# CHECK-ENCODING: [0x07,0x74,0x85,0x22] +# CHECK-ENCODING: [0x07,0x74,0x85,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 74 85 22 <unknown> +# CHECK-UNKNOWN: 07 74 85 02 <unknown> vl2re8.v v8, (a0) # CHECK-INST: vl2re8.v v8, (a0) -# CHECK-ENCODING: [0x07,0x04,0x85,0x42] +# CHECK-ENCODING: [0x07,0x04,0x85,0x22] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 04 85 42 <unknown> +# CHECK-UNKNOWN: 07 04 85 22 <unknown> vl2re16.v v8, (a0) # CHECK-INST: vl2re16.v v8, (a0) -# CHECK-ENCODING: [0x07,0x54,0x85,0x42] +# CHECK-ENCODING: [0x07,0x54,0x85,0x22] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 54 85 42 <unknown> +# CHECK-UNKNOWN: 07 54 85 22 <unknown> vl2re32.v v8, (a0) # CHECK-INST: vl2re32.v v8, (a0) -# CHECK-ENCODING: [0x07,0x64,0x85,0x42] +# CHECK-ENCODING: [0x07,0x64,0x85,0x22] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 64 85 42 <unknown> +# CHECK-UNKNOWN: 07 64 85 22 <unknown> vl2re64.v v8, (a0) # CHECK-INST: vl2re64.v v8, (a0) -# CHECK-ENCODING: [0x07,0x74,0x85,0x42] +# CHECK-ENCODING: [0x07,0x74,0x85,0x22] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 74 85 42 <unknown> +# CHECK-UNKNOWN: 07 74 85 22 <unknown> vl4re8.v v8, (a0) # CHECK-INST: vl4re8.v v8, (a0) -# CHECK-ENCODING: [0x07,0x04,0x85,0x82] +# CHECK-ENCODING: [0x07,0x04,0x85,0x62] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 04 85 82 <unknown> +# CHECK-UNKNOWN: 07 04 85 62 <unknown> vl4re16.v v8, (a0) # CHECK-INST: vl4re16.v v8, (a0) -# CHECK-ENCODING: [0x07,0x54,0x85,0x82] +# CHECK-ENCODING: [0x07,0x54,0x85,0x62] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 54 85 82 <unknown> +# CHECK-UNKNOWN: 07 54 85 62 <unknown> vl4re32.v v8, (a0) # CHECK-INST: vl4re32.v v8, (a0) -# CHECK-ENCODING: [0x07,0x64,0x85,0x82] +# CHECK-ENCODING: [0x07,0x64,0x85,0x62] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 64 85 82 <unknown> +# CHECK-UNKNOWN: 07 64 85 62 <unknown> vl4re64.v v8, (a0) # CHECK-INST: vl4re64.v v8, (a0) -# CHECK-ENCODING: [0x07,0x74,0x85,0x82] +# CHECK-ENCODING: [0x07,0x74,0x85,0x62] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 74 85 82 <unknown> +# CHECK-UNKNOWN: 07 74 85 62 <unknown> vl8re8.v v8, (a0) # CHECK-INST: vl8re8.v v8, (a0) -# CHECK-ENCODING: [0x07,0x04,0x85,0x02] +# CHECK-ENCODING: [0x07,0x04,0x85,0xe2] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 04 85 02 <unknown> +# CHECK-UNKNOWN: 07 04 85 e2 <unknown> vl8re16.v v8, (a0) # CHECK-INST: vl8re16.v v8, (a0) -# CHECK-ENCODING: [0x07,0x54,0x85,0x02] +# CHECK-ENCODING: [0x07,0x54,0x85,0xe2] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 54 85 02 <unknown> +# CHECK-UNKNOWN: 07 54 85 e2 <unknown> vl8re32.v v8, (a0) # CHECK-INST: vl8re32.v v8, (a0) -# CHECK-ENCODING: [0x07,0x64,0x85,0x02] +# CHECK-ENCODING: [0x07,0x64,0x85,0xe2] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 64 85 02 <unknown> +# CHECK-UNKNOWN: 07 64 85 e2 <unknown> vl8re64.v v8, (a0) # CHECK-INST: vl8re64.v v8, (a0) -# CHECK-ENCODING: [0x07,0x74,0x85,0x02] +# CHECK-ENCODING: [0x07,0x74,0x85,0xe2] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 74 85 02 <unknown> +# CHECK-UNKNOWN: 07 74 85 e2 <unknown> diff --git a/llvm/test/MC/RISCV/rvv/store.s b/llvm/test/MC/RISCV/rvv/store.s index e4795aa1c2c9815fde060f9c6f56c058c31e2bcf..b5a75ac2d0087a888f2be2878bb7645dbedb0887 100644 --- a/llvm/test/MC/RISCV/rvv/store.s +++ b/llvm/test/MC/RISCV/rvv/store.s @@ -208,24 +208,24 @@ vsoxei64.v v24, (a0), v4 vs1r.v v24, (a0) # CHECK-INST: vs1r.v v24, (a0) -# CHECK-ENCODING: [0x27,0x0c,0x85,0x22] +# CHECK-ENCODING: [0x27,0x0c,0x85,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 27 0c 85 22 <unknown> +# CHECK-UNKNOWN: 27 0c 85 02 <unknown> vs2r.v v24, (a0) # CHECK-INST: vs2r.v v24, (a0) -# CHECK-ENCODING: [0x27,0x0c,0x85,0x42] +# CHECK-ENCODING: [0x27,0x0c,0x85,0x22] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 27 0c 85 42 <unknown> +# CHECK-UNKNOWN: 27 0c 85 22 <unknown> vs4r.v v24, (a0) # CHECK-INST: vs4r.v v24, (a0) -# CHECK-ENCODING: [0x27,0x0c,0x85,0x82] +# CHECK-ENCODING: [0x27,0x0c,0x85,0x62] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 27 0c 85 82 <unknown> +# CHECK-UNKNOWN: 27 0c 85 62 <unknown> vs8r.v v24, (a0) # CHECK-INST: vs8r.v v24, (a0) -# CHECK-ENCODING: [0x27,0x0c,0x85,0x02] +# CHECK-ENCODING: [0x27,0x0c,0x85,0xe2] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 27 0c 85 02 <unknown> +# CHECK-UNKNOWN: 27 0c 85 e2 <unknown> diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll index a77aa7ac7ebd814a0872cdc06d3fd6169f66302d..5ae3d8ea0dba00761e7e43442f817edb37eee1de 100644 --- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll +++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll @@ -106,3 +106,69 @@ block2: %conv2 = zext i1 %cmp1 to i32 ret i32 %conv2 } + +; This should not end with more instructions than it started from. + +define i32 @PR49475(i32 %x, i16 %y) { +; CHECK-LABEL: @PR49475( +; CHECK-NEXT: [[M:%.*]] = and i16 [[Y:%.*]], 1 +; CHECK-NEXT: [[B1:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[B2:%.*]] = icmp eq i16 [[M]], 0 +; CHECK-NEXT: [[T1:%.*]] = or i1 [[B1]], [[B2]] +; CHECK-NEXT: [[Z:%.*]] = zext i1 [[T1]] to i32 +; CHECK-NEXT: ret i32 [[Z]] +; + %m = and i16 %y, 1 + %b1 = icmp eq i32 %x, 0 + %b2 = icmp eq i16 %m, 0 + %t1 = or i1 %b1, %b2 + %z = zext i1 %t1 to i32 + ret i32 %z +} + +; This would infinite-loop. + +define i8 @PR49475_infloop(i32 %t0, i16 %insert, i64 %e, i8 %i162) { +; CHECK-LABEL: @PR49475_infloop( +; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[T0:%.*]], 0 +; CHECK-NEXT: [[B2:%.*]] = icmp eq i16 [[INSERT:%.*]], 0 +; CHECK-NEXT: [[T1:%.*]] = or i1 [[B]], [[B2]] +; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[T1]] to i32 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[EXT]], [[T0]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[AND]], 140 +; CHECK-NEXT: [[XOR1:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[CONV16:%.*]] = sext i8 [[I162:%.*]] to i64 +; CHECK-NEXT: [[SUB17:%.*]] = sub i64 [[CONV16]], [[E:%.*]] +; CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[SUB17]], 32 +; CHECK-NEXT: [[CONV18:%.*]] = ashr exact i64 [[SEXT]], 32 +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[CONV18]], [[XOR1]] +; CHECK-NEXT: [[CONV19:%.*]] = zext i1 [[CMP]] to i16 +; CHECK-NEXT: [[OR21:%.*]] = or i16 [[CONV19]], [[INSERT]] +; CHECK-NEXT: [[TRUNC44:%.*]] = trunc i16 [[OR21]] to i8 +; CHECK-NEXT: [[INC:%.*]] = or i8 [[TRUNC44]], [[I162]] +; CHECK-NEXT: [[TOBOOL23_NOT:%.*]] = icmp eq i16 [[OR21]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[TOBOOL23_NOT]]) +; CHECK-NEXT: ret i8 [[INC]] +; + %b = icmp eq i32 %t0, 0 + %b2 = icmp eq i16 %insert, 0 + %t1 = or i1 %b, %b2 + %ext = zext i1 %t1 to i32 + %and = and i32 %t0, %ext + %conv13 = zext i32 %and to i64 + %xor = xor i64 %conv13, 140 + %conv16 = sext i8 %i162 to i64 + %sub17 = sub i64 %conv16, %e + %sext = shl i64 %sub17, 32 + %conv18 = ashr exact i64 %sext, 32 + %cmp = icmp sge i64 %xor, %conv18 + %conv19 = zext i1 %cmp to i16 + %or21 = or i16 %insert, %conv19 + %trunc44 = trunc i16 %or21 to i8 + %inc = add i8 %i162, %trunc44 + %tobool23.not = icmp eq i16 %or21, 0 + call void @llvm.assume(i1 %tobool23.not) + ret i8 %inc +} + +declare void @llvm.assume(i1 noundef) diff --git a/llvm/test/Transforms/LoopVectorize/irregular_type.ll b/llvm/test/Transforms/LoopVectorize/irregular_type.ll new file mode 100644 index 0000000000000000000000000000000000000000..167a1a101e6f96a2c25ea291b2a33443f3beec8b --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/irregular_type.ll @@ -0,0 +1,27 @@ +; RUN: opt %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s + +; Ensure the array loads/stores are not optimized into vector operations when +; the element type has padding bits. + +; CHECK: foo +; CHECK: vector.body +; CHECK-NOT: load <4 x i7> +; CHECK-NOT: store <4 x i7> +; CHECK: for.body +define void @foo(i7* %a, i64 %n) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i7, i7* %a, i64 %indvars.iv + %0 = load i7, i7* %arrayidx, align 1 + %sub = add nuw nsw i7 %0, 0 + store i7 %sub, i7* %arrayidx, align 1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %cmp = icmp eq i64 %indvars.iv.next, %n + br i1 %cmp, label %for.exit, label %for.body + +for.exit: + ret void +} diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll index b294542667bdb1dfd4e4dad7142cfde2f6737233..8476f42dd5297e222ebd9be92c40336eaca6ea76 100644 --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -627,7 +627,7 @@ declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) declare void @__kmpc_destroy_allocator(i32, i8*) -declare void @__kmpc_push_target_tripcount(%struct.ident_t*, i64, i64) +declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64) declare i32 @__kmpc_warp_active_thread_mask() @@ -1144,7 +1144,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) ; CHECK: ; Function Attrs: nounwind -; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(%struct.ident_t*, i64, i64) +; CHECK-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64) ; CHECK: ; Function Attrs: convergent nounwind ; CHECK-NEXT: declare i32 @__kmpc_warp_active_thread_mask() @@ -1669,7 +1669,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*) ; OPTIMISTIC-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly -; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount(%struct.ident_t*, i64, i64) +; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64) ; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_warp_active_thread_mask() diff --git a/llvm/test/Transforms/SimplifyCFG/sink-inf-loop.ll b/llvm/test/Transforms/SimplifyCFG/sink-inf-loop.ll new file mode 100644 index 0000000000000000000000000000000000000000..37399367efce7685117d107aa036377df1af34fd --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/sink-inf-loop.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -keep-loops=false -sink-common-insts=true -S | FileCheck %s + +; This would infinite-loop because we allowed code sinking to examine an infinite-loop block (%j). + +define void @PR49541(i32* %t1, i32 %a, i1 %bool) { +; CHECK-LABEL: @PR49541( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[I:%.*]] +; CHECK: j: +; CHECK-NEXT: [[T3:%.*]] = phi i32 [ [[B:%.*]], [[J:%.*]] ], [ [[A:%.*]], [[COND_TRUE:%.*]] ], [ [[A]], [[COND_FALSE:%.*]] ] +; CHECK-NEXT: [[T2:%.*]] = phi i32 [ [[T2]], [[J]] ], [ [[PRE2:%.*]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +; CHECK-NEXT: [[B]] = load i32, i32* [[T1:%.*]], align 4 +; CHECK-NEXT: br label [[J]] +; CHECK: i: +; CHECK-NEXT: [[G_1:%.*]] = phi i16 [ undef, [[ENTRY:%.*]] ], [ [[G_1]], [[COND_FALSE]] ] +; CHECK-NEXT: br i1 [[BOOL:%.*]], label [[COND_FALSE]], label [[COND_TRUE]] +; CHECK: cond.true: +; CHECK-NEXT: [[TOBOOL9_NOT:%.*]] = icmp eq i16 [[G_1]], 0 +; CHECK-NEXT: [[PRE2]] = load i32, i32* [[T1]], align 4 +; CHECK-NEXT: br label [[J]] +; CHECK: cond.false: +; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T1]], align 4 +; CHECK-NEXT: [[B2:%.*]] = icmp eq i32 [[T5]], 0 +; CHECK-NEXT: br i1 [[B2]], label [[J]], label [[I]] +; +entry: + br label %i + +j: + %t3 = phi i32 [ %b, %j ], [ %a, %cond.true ], [ %a, %cond.false ] + %t2 = phi i32 [ %t2, %j ], [ %pre2, %cond.true ], [ 0, %cond.false ] + %b = load i32, i32* %t1, align 4 + br label %j + +i: + %g.1 = phi i16 [ undef, %entry ], [ %g.1, %cond.false ] + br i1 %bool, label %cond.false, label %cond.true + +cond.true: + %tobool9.not = icmp eq i16 %g.1, 0 + %pre2 = load i32, i32* %t1, align 4 + br label %j + +cond.false: + %t5 = load i32, i32* %t1, align 4 + %b2 = icmp eq i32 %t5, 0 + br i1 %b2, label %j, label %i +} diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h index 46bb8206efa1934bc5b70bd32d799a47d9dc410e..36c25c33798a66578e98d157c4a78083cb2bace2 100644 --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -283,8 +283,10 @@ int __tgt_target_teams_nowait_mapper( int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); -void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id, - uint64_t loop_tripcount); +void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount); + +void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id, + uint64_t loop_tripcount); #ifdef __cplusplus } diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports index 5e09a088533da241b59370e6115d0e65393ffb4a..b7fc1c8c3c864d94b63c5a9be818ab3446224d25 100644 --- a/openmp/libomptarget/src/exports +++ b/openmp/libomptarget/src/exports @@ -25,6 +25,8 @@ VERS1.0 { __tgt_target_teams_nowait_mapper; __tgt_mapper_num_components; __tgt_push_mapper_component; + __kmpc_push_target_tripcount; + __kmpc_push_target_tripcount_mapper; omp_get_num_devices; omp_get_initial_device; omp_target_alloc; @@ -34,7 +36,6 @@ VERS1.0 { omp_target_memcpy_rect; omp_target_associate_ptr; omp_target_disassociate_ptr; - __kmpc_push_target_tripcount; local: *; }; diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index 01f3715d6bcc8e855f25625425f0ad6d6a953038..b97676a6981b5f2308a8be6e151eedaca6f483a4 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -514,8 +514,13 @@ EXTERN void __tgt_push_mapper_component(void *rt_mapper_handle, void *base, MapComponentInfoTy(base, begin, size, type, name)); } -EXTERN void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id, +EXTERN void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount) { + __kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount); +} + +EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id, + uint64_t loop_tripcount) { TIMESCOPE_WITH_IDENT(loc); if (IsOffloadDisabled()) return; diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 37150aae2fe61a745170cf1609a18f231dc921b9..af6f7d09a4a2d250f077910b3388d58522845afc 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -900,8 +900,8 @@ TableMap *getTableMap(void *HostPtr) { /// Get loop trip count /// FIXME: This function will not work right if calling -/// __kmpc_push_target_tripcount in one thread but doing offloading in another -/// thread, which might occur when we call task yield. +/// __kmpc_push_target_tripcount_mapper in one thread but doing offloading in +/// another thread, which might occur when we call task yield. uint64_t getLoopTripCount(int64_t DeviceId) { DeviceTy &Device = PM->Devices[DeviceId]; uint64_t LoopTripCount = 0; diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index a6e32bd008e1027e0736550701abfac04db7569e..b981f8740dbe2611d7698768f8317beb70a23311 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -920,6 +920,12 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, if (TCR_PTR(__kmp_threads[0]) == NULL) { --capacity; } + // If it is not for initializing the hidden helper team, we need to take + // __kmp_hidden_helper_threads_num out of the capacity because it is included + // in __kmp_threads_capacity. + if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) { + capacity -= __kmp_hidden_helper_threads_num; + } if (__kmp_nth + new_nthreads - (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > capacity) { @@ -3632,6 +3638,13 @@ int __kmp_register_root(int initial_thread) { --capacity; } + // If it is not for initializing the hidden helper team, we need to take + // __kmp_hidden_helper_threads_num out of the capacity because it is included + // in __kmp_threads_capacity. + if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) { + capacity -= __kmp_hidden_helper_threads_num; + } + /* see if there are too many threads */ if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) { if (__kmp_tp_cached) { @@ -3664,7 +3677,7 @@ int __kmp_register_root(int initial_thread) { /* find an available thread slot */ // Don't reassign the zero slot since we need that to only be used by // initial thread. Slots for hidden helper threads should also be skipped. - if (initial_thread && __kmp_threads[0] == NULL) { + if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { gtid = 0; } else { for (gtid = __kmp_hidden_helper_threads_num + 1; diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index b477edbbfb42bcff45762df300c0bb6792df853b..50f6a05faaf592b21e9f02fe63d8a4f24dccbe02 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -504,9 +504,10 @@ int __kmp_initial_threads_capacity(int req_nproc) { nth = (4 * __kmp_xproc); // If hidden helper task is enabled, we initialize the thread capacity with - // extra - // __kmp_hidden_helper_threads_num. - nth += __kmp_hidden_helper_threads_num; + // extra __kmp_hidden_helper_threads_num. + if (__kmp_enable_hidden_helper) { + nth += __kmp_hidden_helper_threads_num; + } if (nth > __kmp_max_nth) nth = __kmp_max_nth; diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp new file mode 100644 index 0000000000000000000000000000000000000000..776aee9d8e2cac91c9df3582e64b0189163dd19f --- /dev/null +++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp @@ -0,0 +1,45 @@ +// RUN: %libomp-cxx-compile-and-run + +#include <omp.h> + +#include <algorithm> +#include <cassert> +#include <chrono> +#include <thread> +#include <vector> + +void dummy_root() { + // omp_get_max_threads() will do middle initialization + int nthreads = omp_get_max_threads(); + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); +} + +int main(int argc, char *argv[]) { + const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()), + 4 * omp_get_num_procs()), + std::numeric_limits<int>::max()); + + std::vector<int> data(N); + + // Create a new thread to initialize the OpenMP RTL. The new thread will not + // be taken as the "initial thread". + std::thread root(dummy_root); + +#pragma omp parallel for num_threads(N) + for (unsigned i = 0; i < N; ++i) { + data[i] = i; + } + +#pragma omp parallel for num_threads(N + 1) + for (unsigned i = 0; i < N; ++i) { + data[i] += i; + } + + for (unsigned i = 0; i < N; ++i) { + assert(data[i] == 2 * i); + } + + root.join(); + + return 0; +} diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a9d394f729e9c5171d6c22746d19a4a6653d377e --- /dev/null +++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp @@ -0,0 +1,31 @@ +// RUN: %libomp-cxx-compile-and-run + +#include <omp.h> + +#include <algorithm> +#include <cassert> +#include <vector> + +int main(int argc, char *argv[]) { + const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()), + 4 * omp_get_num_procs()), + std::numeric_limits<int>::max()); + + std::vector<int> data(N); + +#pragma omp parallel for num_threads(N) + for (unsigned i = 0; i < N; ++i) { + data[i] = i; + } + +#pragma omp parallel for num_threads(N + 1) + for (unsigned i = 0; i < N; ++i) { + data[i] += i; + } + + for (unsigned i = 0; i < N; ++i) { + assert(data[i] == 2 * i); + } + + return 0; +}