diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7f4b675b68f9ea6b1111ee1ed833331661803746..4cc1b0b9d2cf30db7f532f10d8941a1a1719fac5 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -82,31 +82,6 @@ New Compiler Flags
   in that case. The option's behaviour mirrors GCC, the helpers are implemented
   both in compiler-rt and libgcc.
 
-- -fpch-codegen and -fpch-debuginfo generate shared code and/or debuginfo
-  for contents of a precompiled header in a separate object file. This object
-  file needs to be linked in, but its contents do not need to be generated
-  for other objects using the precompiled header. This should usually save
-  compile time. If not using clang-cl, the separate object file needs to
-  be created explicitly from the precompiled header.
-  Example of use:
-
-  .. code-block:: console
-
-    $ clang++ -x c++-header header.h -o header.pch -fpch-codegen -fpch-debuginfo
-    $ clang++ -c header.pch -o shared.o
-    $ clang++ -c source.cpp -o source.o -include-pch header.pch
-    $ clang++ -o binary source.o shared.o
-
-  - Using -fpch-instantiate-templates when generating the precompiled header
-    usually increases the amount of code/debuginfo that can be shared.
-  - In some cases, especially when building with optimizations enabled, using
-    -fpch-codegen may generate so much code in the shared object that compiling
-    it may be a net loss in build time.
-  - Since headers may bring in private symbols of other libraries, it may be
-    sometimes necessary to discard unused symbols (such as by adding
-    -Wl,--gc-sections on ELF platforms to the linking command, and possibly
-    adding -fdata-sections -ffunction-sections to the command generating
-    the shared object).
 - New option ``-fbinutils-version=`` specifies the targeted binutils version.
   For example, ``-fbinutils-version=2.35`` means compatibility with GNU as/ld
   before 2.35 is not needed: new features can be used and there is no need to
@@ -139,6 +114,16 @@ Modified Compiler Flags
   This behavior matches newer GCC.
   (`D91760 <https://reviews.llvm.org/D91760>`_)
   (`D92054 <https://reviews.llvm.org/D92054>`_)
+- Support has been added for the following processors (command-line identifiers
+  in parentheses):
+
+  - Arm Cortex-A78C (cortex-a78c).
+  - Arm Cortex-R82 (cortex-r82).
+  - Arm Neoverse V1 (neoverse-v1).
+  - Arm Neoverse N2 (neoverse-n2).
+  - Fujitsu A64FX (a64fx).
+  For example, to select architecture support and tuning for Neoverse-V1 based
+  systems, use ``-mcpu=neoverse-v1``.
 
 Removed Compiler Flags
 -------------------------
@@ -183,6 +168,13 @@ Windows Support
   exception. To workaround (with reduced security), compile with
   /guard:cf,nolongjmp.
 
+- Windows on Arm64: LLVM 12 adds official binary release hosted on
+  Windows on Arm64.  The binary is built and tested by Linaro alongside
+  AArch64 and ARM 32-bit Linux binary releases.  This first WoA release
+  includes Clang compiler, LLD Linker, and compiler-rt runtime libraries.
+  Work on LLDB, sanitizer support, OpenMP, and other features is in progress
+  and will be included in future Windows on Arm64 LLVM releases.
+
 C Language Changes in Clang
 ---------------------------
 
@@ -200,10 +192,38 @@ C++1z Feature Support
 Objective-C Language Changes in Clang
 -------------------------------------
 
-OpenCL C Language Changes in Clang
-----------------------------------
-
-...
+OpenCL Kernel Language Changes in Clang
+---------------------------------------
+
+- Improved online documentation: :doc:`UsersManual` and :doc:`OpenCLSupport`
+  pages.
+- Added ``-cl-std=CL3.0`` and predefined version macro for OpenCL 3.0.
+- Added ``-cl-std=CL1.0`` and mapped to the existing OpenCL 1.0 functionality.
+- Improved OpenCL extension handling per target.
+- Added clang extension for function pointers ``__cl_clang_function_pointers``
+  and variadic functions ``__cl_clang_variadic_functions``, more details can be
+  found in :doc:`LanguageExtensions`.
+- Removed extensions without kernel language changes:
+  ``cl_khr_select_fprounding_mode``, ``cl_khr_gl_sharing``, ``cl_khr_icd``,
+  ``cl_khr_gl_event``, ``cl_khr_d3d10_sharing``, ``cl_khr_context_abort``,
+  ``cl_khr_d3d11_sharing``, ``cl_khr_dx9_media_sharing``,
+  ``cl_khr_image2d_from_buffer``, ``cl_khr_initialize_memory``,
+  ``cl_khr_gl_depth_images``, ``cl_khr_spir``, ``cl_khr_egl_event``,
+  ``cl_khr_egl_image``, ``cl_khr_terminate_context``.
+- Improved diagnostics for  unevaluated ``vec_step`` expression.
+- Allow nested pointers (e.g. pointer-to-pointer) kernel arguments beyond OpenCL
+  1.2.
+- Added ``global_device`` and ``global_host`` address spaces for USM
+  allocations.
+
+Miscellaneous improvements in C++ for OpenCL support:
+
+- Added diagnostics for pointers to member functions and references to
+  functions.
+- Added support of ``vec_step`` builtin.
+- Fixed ICE on address spaces with forwarding references and templated copy
+  constructors.
+- Removed warning for variadic macro use.
 
 ABI Changes in Clang
 --------------------
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 57cc2d60e2af044de9c975068d228ad5e9fd3742..83dfa0780547d203856829557a189c1f10135c24 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9892,7 +9892,7 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall(
       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
       CGF.EmitRuntimeCall(
           OMPBuilder.getOrCreateRuntimeFunction(
-              CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
+              CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
           Args);
     }
   };
diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp
index f4b7a57e0bb709be7a2f1529fca96e7877966abc..13943b6c404a9b6fa4a0bd4ebd00905cc999149b 100644
--- a/clang/lib/Driver/ToolChains/MSVC.cpp
+++ b/clang/lib/Driver/ToolChains/MSVC.cpp
@@ -11,6 +11,7 @@
 #include "Darwin.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/Version.h"
+#include "clang/Config/config.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
@@ -520,7 +521,10 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   // translate 'lld' into 'lld-link', and in the case of the regular msvc
   // linker, we need to use a special search algorithm.
   llvm::SmallString<128> linkPath;
-  StringRef Linker = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "link");
+  StringRef Linker
+    = Args.getLastArgValue(options::OPT_fuse_ld_EQ, CLANG_DEFAULT_LINKER);
+  if (Linker.empty())
+    Linker = "link";
   if (Linker.equals_lower("lld"))
     Linker = "lld-link";
 
diff --git a/clang/test/Driver/Xlinker-args.c b/clang/test/Driver/Xlinker-args.c
index a44957cd8aef11311351cc218d736ffa21b0100b..cb045a1d40ac172e3c7bf1de039d31c170d79812 100644
--- a/clang/test/Driver/Xlinker-args.c
+++ b/clang/test/Driver/Xlinker-args.c
@@ -17,7 +17,7 @@
 // LINUX: "--no-demangle" "-e" "_start" "one" "two" "three" "four" "-z" "five" "-r" {{.*}} "-T" "a.lds"
 
 // Check that we forward '-Xlinker' and '-Wl,' on Windows.
-// RUN: %clang -target i686-pc-win32 -### \
+// RUN: %clang -target i686-pc-win32 -fuse-ld=link -### \
 // RUN:   -Xlinker one -Wl,two %s 2>&1 | \
 // RUN:   FileCheck -check-prefix=WIN %s
 // WIN: link.exe
diff --git a/clang/test/Driver/cl-inputs.c b/clang/test/Driver/cl-inputs.c
index 59455a0aa5e5c7e06a4f1bb673c9ce6fd43d6f09..8eb44517ee167a8bfcc087c3121fea1744a3912c 100644
--- a/clang/test/Driver/cl-inputs.c
+++ b/clang/test/Driver/cl-inputs.c
@@ -50,16 +50,16 @@
 // RUN: %clang_cl -### /Tc - 2>&1 | FileCheck -check-prefix=STDINTc %s
 // STDINTc: "-x" "c"
 
-// RUN: env LIB=%S/Inputs/cl-libs %clang_cl -### -- %s cl-test.lib 2>&1 | FileCheck -check-prefix=LIBINPUT %s
+// RUN: env LIB=%S/Inputs/cl-libs %clang_cl -fuse-ld=link -### -- %s cl-test.lib 2>&1 | FileCheck -check-prefix=LIBINPUT %s
 // LIBINPUT: link.exe"
 // LIBINPUT: "cl-test.lib"
 
-// RUN: env LIB=%S/Inputs/cl-libs %clang_cl -### -- %s cl-test2.lib 2>&1 | FileCheck -check-prefix=LIBINPUT2 %s
+// RUN: env LIB=%S/Inputs/cl-libs %clang_cl -fuse-ld=link -### -- %s cl-test2.lib 2>&1 | FileCheck -check-prefix=LIBINPUT2 %s
 // LIBINPUT2: error: no such file or directory: 'cl-test2.lib'
 // LIBINPUT2: link.exe"
 // LIBINPUT2-NOT: "cl-test2.lib"
 
-// RUN: %clang_cl -### -- %s /nonexisting.lib 2>&1 | FileCheck -check-prefix=LIBINPUT3 %s
+// RUN: %clang_cl -fuse-ld=link -### -- %s /nonexisting.lib 2>&1 | FileCheck -check-prefix=LIBINPUT3 %s
 // LIBINPUT3: error: no such file or directory: '/nonexisting.lib'
 // LIBINPUT3: link.exe"
 // LIBINPUT3-NOT: "/nonexisting.lib"
diff --git a/clang/test/Driver/cl-link-at-file.c b/clang/test/Driver/cl-link-at-file.c
index 50ae07fadf5bf0beb11c4eff09b5d6fc2fd65e7c..4e665f89b74e1d0e5d3ffeac8f73fbaf8f533f53 100644
--- a/clang/test/Driver/cl-link-at-file.c
+++ b/clang/test/Driver/cl-link-at-file.c
@@ -7,7 +7,7 @@
 
 // RUN: echo /link bar.lib baz.lib > %t.args
 // RUN: touch %t.obj
-// RUN: %clang_cl -### @%t.args -- %t.obj 2>&1 | FileCheck %s -check-prefix=ARGS
+// RUN: %clang_cl -fuse-ld=link -### @%t.args -- %t.obj 2>&1 | FileCheck %s -check-prefix=ARGS
 // If the "/link" option captures all remaining args beyond its response file,
 // it will also capture "--" and our input argument. In this case, Clang will
 // be clueless and will emit "argument unused" warnings. If PR17239 is properly
diff --git a/clang/test/Driver/cl-link.c b/clang/test/Driver/cl-link.c
index 142725fed8eb2ee1b0265460495a4eb3fd632224..e2f5397e913393bd10fafbfba9b66c518b732be9 100644
--- a/clang/test/Driver/cl-link.c
+++ b/clang/test/Driver/cl-link.c
@@ -2,14 +2,14 @@
 // be interpreted as a command-line option, e.g. on Mac where %s is commonly
 // under /Users.
 
-// RUN: %clang_cl /Tc%s -### /link foo bar baz 2>&1 | FileCheck --check-prefix=LINK %s
-// RUN: %clang_cl /Tc%s -### /linkfoo bar baz 2>&1 | FileCheck --check-prefix=LINK %s
+// RUN: %clang_cl /Tc%s -fuse-ld=link -### /link foo bar baz 2>&1 | FileCheck --check-prefix=LINK %s
+// RUN: %clang_cl /Tc%s -fuse-ld=link -### /linkfoo bar baz 2>&1 | FileCheck --check-prefix=LINK %s
 // LINK: link.exe
 // LINK: "foo"
 // LINK: "bar"
 // LINK: "baz"
 
-// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN %s
+// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN %s
 // ASAN: link.exe
 // ASAN: "-debug"
 // ASAN: "-incremental:no"
@@ -19,7 +19,7 @@
 // ASAN: "-wholearchive:{{.*}}clang_rt.asan_cxx-i386.lib"
 // ASAN: "{{.*}}cl-link{{.*}}.obj"
 
-// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /MD /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-MD %s
+// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /MD /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-MD %s
 // ASAN-MD: link.exe
 // ASAN-MD: "-debug"
 // ASAN-MD: "-incremental:no"
@@ -29,13 +29,13 @@
 // ASAN-MD: "-wholearchive:{{.*}}clang_rt.asan_dynamic_runtime_thunk-i386.lib"
 // ASAN-MD: "{{.*}}cl-link{{.*}}.obj"
 
-// RUN: %clang_cl /LD -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s
-// RUN: %clang_cl /LDd -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s
+// RUN: %clang_cl /LD -fuse-ld=link -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s
+// RUN: %clang_cl /LDd -fuse-ld=link -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s
 // DLL: link.exe
 // "-dll"
 
-// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LD /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s
-// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LDd /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s
+// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LD /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s
+// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LDd /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s
 // ASAN-DLL: link.exe
 // ASAN-DLL: "-dll"
 // ASAN-DLL: "-debug"
@@ -43,13 +43,13 @@
 // ASAN-DLL: "{{.*}}clang_rt.asan_dll_thunk-i386.lib"
 // ASAN-DLL: "{{.*}}cl-link{{.*}}.obj"
 
-// RUN: %clang_cl /Zi /Tc%s -### 2>&1 | FileCheck --check-prefix=DEBUG %s
+// RUN: %clang_cl /Zi /Tc%s -fuse-ld=link -### 2>&1 | FileCheck --check-prefix=DEBUG %s
 // DEBUG: link.exe
 // DEBUG: "-debug"
 
 // PR27234
-// RUN: %clang_cl /Tc%s nonexistent.obj -### /link /libpath:somepath 2>&1 | FileCheck --check-prefix=NONEXISTENT %s
-// RUN: %clang_cl /Tc%s nonexistent.lib -### /link /libpath:somepath 2>&1 | FileCheck --check-prefix=NONEXISTENT %s
+// RUN: %clang_cl /Tc%s nonexistent.obj -fuse-ld=link -### /link /libpath:somepath 2>&1 | FileCheck --check-prefix=NONEXISTENT %s
+// RUN: %clang_cl /Tc%s nonexistent.lib -fuse-ld=link -### /link /libpath:somepath 2>&1 | FileCheck --check-prefix=NONEXISTENT %s
 // NONEXISTENT-NOT: no such file
 // NONEXISTENT: link.exe
 // NONEXISTENT: "/libpath:somepath"
diff --git a/clang/test/Driver/msvc-link.c b/clang/test/Driver/msvc-link.c
index 13dccd21bfd8d1c0ed7ec61c28670fe189d2f3ad..1ee17fc63c321359edade2c847b2c04282bb00d7 100644
--- a/clang/test/Driver/msvc-link.c
+++ b/clang/test/Driver/msvc-link.c
@@ -1,4 +1,4 @@
-// RUN: %clang -target i686-pc-windows-msvc -### %s 2>&1 | FileCheck --check-prefix=BASIC %s
+// RUN: %clang -target i686-pc-windows-msvc -fuse-ld=link -### %s 2>&1 | FileCheck --check-prefix=BASIC %s
 // BASIC: link.exe"
 // BASIC: "-out:a.exe"
 // BASIC: "-defaultlib:libcmt"
@@ -6,7 +6,7 @@
 // BASIC: "-nologo"
 // BASIC-NOT: "-Brepro"
 
-// RUN: %clang -target i686-pc-windows-msvc -shared -o a.dll -### %s 2>&1 | FileCheck --check-prefix=DLL %s
+// RUN: %clang -target i686-pc-windows-msvc -shared -o a.dll -fuse-ld=link -### %s 2>&1 | FileCheck --check-prefix=DLL %s
 // DLL: link.exe"
 // DLL: "-out:a.dll"
 // DLL: "-defaultlib:libcmt"
@@ -19,13 +19,13 @@
 // LIBPATH: "-libpath:/usr/lib"
 // LIBPATH: "-nologo"
 
-// RUN: %clang_cl /Brepro -### -- %s 2>&1 | FileCheck --check-prefix=REPRO %s
+// RUN: %clang_cl /Brepro -fuse-ld=link -### -- %s 2>&1 | FileCheck --check-prefix=REPRO %s
 // REPRO: link.exe"
 // REPRO: "-out:msvc-link.exe"
 // REPRO: "-nologo"
 // REPRO: "-Brepro"
 
-// RUN: %clang_cl /Brepro- -### -- %s 2>&1 | FileCheck --check-prefix=NOREPRO %s
+// RUN: %clang_cl /Brepro- -fuse-ld=link -### -- %s 2>&1 | FileCheck --check-prefix=NOREPRO %s
 // NOREPRO: link.exe"
 // NOREPRO: "-out:msvc-link.exe"
 // NOREPRO: "-nologo"
diff --git a/clang/test/OpenMP/linking.c b/clang/test/OpenMP/linking.c
index 802553c1be7525de7aeb8c5253a125a7f9e85f1d..1c44396264705bb6638ed289cd83849b07f6bb01 100644
--- a/clang/test/OpenMP/linking.c
+++ b/clang/test/OpenMP/linking.c
@@ -81,7 +81,7 @@
 // CHECK-LD-OVERRIDE-64: "-lgomp" "-lrt"
 // CHECK-LD-OVERRIDE-64: "-lpthread" "-lc"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -no-canonical-prefixes -fuse-ld=link %s -### -o %t.o 2>&1 \
 // RUN:     -fopenmp=libomp -target x86_64-msvc-win32 -rtlib=platform \
 // RUN:   | FileCheck --check-prefix=CHECK-MSVC-LINK-64 %s
 // CHECK-MSVC-LINK-64: link.exe
@@ -95,7 +95,7 @@
 // SIMD-ONLY11-NOT: libomp
 // SIMD-ONLY11-NOT: libgomp
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -no-canonical-prefixes %s -fuse-ld=link -### -o %t.o 2>&1 \
 // RUN:     -fopenmp=libiomp5 -target x86_64-msvc-win32 -rtlib=platform \
 // RUN:   | FileCheck --check-prefix=CHECK-MSVC-ILINK-64 %s
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
index 0229ace911f87b189cc89d8b420d710a9368b811..c0f53239aa13335f7a60cd3184ba3d20ef1f660a 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
@@ -39,7 +39,7 @@
 
 #ifdef CK1
 
-// HCK_NO_TGT-NOT: @__kmpc_push_target_tripcount
+// HCK_NO_TGT-NOT: @__kmpc_push_target_tripcount_mapper
 
 // HCK1: define{{.*}} i32 @{{.+}}target_teams_fun{{.*}}(
 int target_teams_fun(int *g){
@@ -60,7 +60,7 @@ int target_teams_fun(int *g){
   // HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]],
   // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
-  // HCK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
+  // HCK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
   // HCK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}},
 
   // HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]])
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
index 6650e05575110efb1c10dcbd2fde4b2fac2b3aca..efe7df819fb6213353a4def8690c5c6858b445a2 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
@@ -49,10 +49,10 @@ int Arg;
 
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #pragma omp target teams distribute parallel for
@@ -107,12 +107,12 @@ int tmain(T Arg) {
 
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK-NOT: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK-NOT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
index b2ab37f22ec33b79d2ae85d2b951381d417b5661..b99ba9d38a43024b6ea7ad8b324fb53f11c31a9d 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
@@ -14,7 +14,7 @@
 
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: %0 = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{.+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
 // CHECK: call void [[TARGET_OUTLINE:@.+]]()
 // CHECK: ret void
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
index e6049145702bcb7fbddef57f5563b04a04c8b385..39ccb87462c02b35b4107ebe1d57c6580de60c4c 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -60,7 +60,7 @@ int target_teams_fun(int *g){
 // HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]],
 // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
 // HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
-// HCK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
+// HCK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
 // HCK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}},
 
 // HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[I_PAR]], i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]])
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
index 8b0eaba07f1cbd92c1f848fffd40769fb20d7b3b..19dc15b94f64b50267087d841532a7a60f37e2f5 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
@@ -43,10 +43,10 @@ int Arg;
 
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #ifdef OMP5
@@ -110,12 +110,12 @@ int tmain(T Arg) {
 
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK-NOT: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK-NOT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
diff --git a/clang/test/OpenMP/teams_distribute_codegen.cpp b/clang/test/OpenMP/teams_distribute_codegen.cpp
index 5bbb100e669ec4e80f0792534b68324a02c63ca3..aab5cced4c70f910696d8fd143620806e5ac5dc3 100644
--- a/clang/test/OpenMP/teams_distribute_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_codegen.cpp
@@ -33,7 +33,7 @@ int teams_argument_global(int n) {
   // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 
-  // CK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
+  // CK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
   // CK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i8** null, i32 {{.+}}, i32 {{.+}})
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
index b63e5aeddb7a2c0a746b4350ff1bfe8c39c8092b..8fa73e76009bcf1d99396cd662c7b75513448809 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
@@ -32,7 +32,7 @@ int teams_argument_global(int n){
   // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
   // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
-  // CK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
+  // CK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
   // CK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i8** null, i32 {{.+}}, i32 {{.+}})
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
index 3d479c4cd29dfaa4aa559f65652cbf35d1189f96..9b3855c61759ae88aaf0742264c2e7bbe4d683fd 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
@@ -33,7 +33,7 @@ int teams_argument_global(int n){
   // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 
-  // CK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
+  // CK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
   // CK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
diff --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
index fd1214d22ce9e04eec184cabbbd07a7cec615eaa..6e5d06b0c5689ea84a840735d3c7dbdba0913708 100644
--- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
@@ -35,7 +35,7 @@ int teams_argument_global(int n) {
   // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 
-  // CK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
+  // CK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
   // CK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i8** null, i32 {{.+}}, i32 1)
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h
index 183107c148a6d2e128d65867425fb97a1461e783..9beef44c89dd5ed2b49fbd320df437e42b6a5f0b 100644
--- a/llvm/include/llvm-c/Orc.h
+++ b/llvm/include/llvm-c/Orc.h
@@ -339,8 +339,7 @@ LLVMErrorRef LLVMOrcResourceTrackerRemove(LLVMOrcResourceTrackerRef RT);
  * ownership has not been passed to a JITDylib (e.g. because some error
  * prevented the client from calling LLVMOrcJITDylibAddGenerator).
  */
-void LLVMOrcDisposeDefinitionGenerator(
-    LLVMOrcDefinitionGeneratorRef DG);
+void LLVMOrcDisposeDefinitionGenerator(LLVMOrcDefinitionGeneratorRef DG);
 
 /**
  * Dispose of a MaterializationUnit.
@@ -388,7 +387,9 @@ LLVMOrcExecutionSessionCreateJITDylib(LLVMOrcExecutionSessionRef ES,
  * Returns the JITDylib with the given name, or NULL if no such JITDylib
  * exists.
  */
-LLVMOrcJITDylibRef LLVMOrcExecutionSessionGetJITDylibByName(const char *Name);
+LLVMOrcJITDylibRef
+LLVMOrcExecutionSessionGetJITDylibByName(LLVMOrcExecutionSessionRef ES,
+                                         const char *Name);
 
 /**
  * Return a reference to a newly created resource tracker associated with JD.
diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h
index 81c1d6aad49a1bdb2dcfb8b32dda5f06f04075be..26bf4ab2618ce698bda4a61d8963b98421985035 100644
--- a/llvm/include/llvm/CodeGen/FastISel.h
+++ b/llvm/include/llvm/CodeGen/FastISel.h
@@ -490,7 +490,10 @@ protected:
   /// - \c Add has a constant operand.
   bool canFoldAddIntoGEP(const User *GEP, const Value *Add);
 
-  /// Test whether the given value has exactly one use.
+  /// Test whether the register associated with this value has exactly one use,
+  /// in which case that single use is killing. Note that multiple IR values
+  /// may map onto the same register, in which case this is not the same as
+  /// checking that an IR value has one use.
   bool hasTrivialKill(const Value *V);
 
   /// Create a machine mem operand from the given instruction.
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 844046167975960f8902f625cdbaf123d32d71a3..75d360bf4237a4b728863de84acb34fac25cf109 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -375,7 +375,7 @@ __OMP_RTL(__kmpc_init_allocator, false, /* omp_allocator_handle_t */ VoidPtr,
 __OMP_RTL(__kmpc_destroy_allocator, false, Void, /* Int */ Int32,
           /* omp_allocator_handle_t */ VoidPtr)
 
-__OMP_RTL(__kmpc_push_target_tripcount, false, Void, IdentPtr, Int64, Int64)
+__OMP_RTL(__kmpc_push_target_tripcount_mapper, false, Void, IdentPtr, Int64, Int64)
 __OMP_RTL(__tgt_target_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32, VoidPtrPtr,
           VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
 __OMP_RTL(__tgt_target_nowait_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32,
@@ -844,7 +844,7 @@ __OMP_RTL_ATTRS(__kmpc_free, AllocAttrs, AttributeSet(), {})
 __OMP_RTL_ATTRS(__kmpc_init_allocator, DefaultAttrs, ReturnPtrAttrs, {})
 __OMP_RTL_ATTRS(__kmpc_destroy_allocator, AllocAttrs, AttributeSet(), {})
 
-__OMP_RTL_ATTRS(__kmpc_push_target_tripcount, SetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_push_target_tripcount_mapper, SetterAttrs, AttributeSet(), {})
 __OMP_RTL_ATTRS(__tgt_target_mapper, ForkAttrs, AttributeSet(), {})
 __OMP_RTL_ATTRS(__tgt_target_nowait_mapper, ForkAttrs, AttributeSet(), {})
 __OMP_RTL_ATTRS(__tgt_target_teams_mapper, ForkAttrs, AttributeSet(), {})
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b97c369b832daf7e6d1fe6c418d726b422acd789..b7883cbc3120fde88139afa32a2457df724ac446 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -840,9 +840,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
     // For conditional branch lowering, we might try to do something silly like
     // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
     // just re-use the existing condition vreg.
-    if (CI && CI->getZExtValue() == 1 &&
-        MRI->getType(CondLHS).getSizeInBits() == 1 &&
-        CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
+    if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI &&
+        CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
       Cond = CondLHS;
     } else {
       Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 62f7f3d98ba6d663fea8f20dd19c8895186502ff..0ff77d4ba1abb012df737d0a6f58f6ddffc6ac81 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -261,12 +261,16 @@ bool FastISel::hasTrivialKill(const Value *V) {
     if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
       return false;
 
+  // Casts and extractvalues may be trivially coalesced by fast-isel.
+  if (I->getOpcode() == Instruction::BitCast ||
+      I->getOpcode() == Instruction::PtrToInt ||
+      I->getOpcode() == Instruction::IntToPtr ||
+      I->getOpcode() == Instruction::ExtractValue)
+    return false;
+
   // Only instructions with a single use in the same basic block are considered
   // to have trivial kills.
   return I->hasOneUse() &&
-         !(I->getOpcode() == Instruction::BitCast ||
-           I->getOpcode() == Instruction::PtrToInt ||
-           I->getOpcode() == Instruction::IntToPtr) &&
          cast<Instruction>(*I->user_begin())->getParent() == I->getParent();
 }
 
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
index dfdd2c6c669f980af9172a89f86efd6ebdc4679d..834d4cc8f51455eae814dd0f13871b76d04ee582 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
@@ -393,7 +393,7 @@ void LLVMOrcDisposeJITTargetMachineBuilder(
   delete unwrap(JTMB);
 }
 
-void lLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) {
+void LLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) {
   delete unwrap(ObjLayer);
 }
 
diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index dc9bcf8683810a496b9eaa9d8a34994497e7537c..adcbd1b5f8f31a1d737dbacfd2f86eed3a40d1a9 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -402,8 +402,22 @@ std::error_code is_local(int FD, bool &Result) {
 }
 
 static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
-  // First, check if the file is on a network (non-local) drive. If so, don't
-  // set DeleteFile to true, since it prevents opening the file for writes.
+  // Clear the FILE_DISPOSITION_INFO flag first, before checking if it's a
+  // network file. On Windows 7 the function realPathFromHandle() below fails
+  // if the FILE_DISPOSITION_INFO flag was already set to 'DeleteFile = true' by
+  // a prior call.
+  FILE_DISPOSITION_INFO Disposition;
+  Disposition.DeleteFile = false;
+  if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition,
+                                  sizeof(Disposition)))
+    return mapWindowsError(::GetLastError());
+  if (!Delete)
+    return std::error_code();
+
+  // Check if the file is on a network (non-local) drive. If so, don't
+  // continue when DeleteFile is true, since it prevents opening the file for
+  // writes. Note -- this will leak temporary files on disk, but only when the
+  // target file is on a network drive.
   SmallVector<wchar_t, 128> FinalPath;
   if (std::error_code EC = realPathFromHandle(Handle, FinalPath))
     return EC;
@@ -415,9 +429,9 @@ static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
   if (!IsLocal)
     return std::error_code();
 
-  // The file is on a local drive, set the DeleteFile to true.
-  FILE_DISPOSITION_INFO Disposition;
-  Disposition.DeleteFile = Delete;
+  // The file is on a local drive, we can safely set FILE_DISPOSITION_INFO's
+  // flag.
+  Disposition.DeleteFile = true;
   if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition,
                                   sizeof(Disposition)))
     return mapWindowsError(::GetLastError());
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 693b0adaede437d4b85270a2594b37a4a962ccbd..2604218da160019aca8dd396c1cf6c4dfe986858 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -5896,7 +5896,13 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
         User->getMachineOpcode() != PPC::SELECT_I8)
       return false;
 
+    SDNode *Op1 = User->getOperand(1).getNode();
     SDNode *Op2 = User->getOperand(2).getNode();
+    // If we have a degenerate select with two equal operands, swapping will
+    // not do anything, and we may run into an infinite loop.
+    if (Op1 == Op2)
+      return false;
+
     if (!Op2->isMachineOpcode())
       return false;
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 86fbc73d81d51933ba10a68624e51f91c9609d87..b3fc76aee1614ec3a21739d872f48f70227b8239 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -504,19 +504,19 @@ def VSOXEI16_V : VIndexedStore<MOPSTIndexedOrder, LSWidth16, "vsoxei16.v">;
 def VSOXEI32_V : VIndexedStore<MOPSTIndexedOrder, LSWidth32, "vsoxei32.v">;
 def VSOXEI64_V : VIndexedStore<MOPSTIndexedOrder, LSWidth64, "vsoxei64.v">;
 
-defm VL1R : VWholeLoad<1, "vl1r">;
-defm VL2R : VWholeLoad<2, "vl2r">;
-defm VL4R : VWholeLoad<4, "vl4r">;
-defm VL8R : VWholeLoad<8, "vl8r">;
+defm VL1R : VWholeLoad<0, "vl1r">;
+defm VL2R : VWholeLoad<1, "vl2r">;
+defm VL4R : VWholeLoad<3, "vl4r">;
+defm VL8R : VWholeLoad<7, "vl8r">;
 def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>;
 def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VR:$vd, GPR:$rs1)>;
 def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VR:$vd, GPR:$rs1)>;
 def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VR:$vd, GPR:$rs1)>;
 
-def VS1R_V : VWholeStore<1, "vs1r.v">;
-def VS2R_V : VWholeStore<2, "vs2r.v">;
-def VS4R_V : VWholeStore<4, "vs4r.v">;
-def VS8R_V : VWholeStore<8, "vs8r.v">;
+def VS1R_V : VWholeStore<0, "vs1r.v">;
+def VS2R_V : VWholeStore<1, "vs2r.v">;
+def VS4R_V : VWholeStore<3, "vs4r.v">;
+def VS8R_V : VWholeStore<7, "vs8r.v">;
 
 // Vector Single-Width Integer Add and Subtract
 defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>;
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index caf158102230bbb34bea5c9dde3f72c3bd9a6418..a1a16a19f5e5258f8049755a2a23f2e00ad8f867 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -284,6 +284,14 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
       return false;
   }
 
+  // Make sure no potentially eflags clobbering phi moves can be inserted in
+  // between.
+  auto HasPhis = [](const BasicBlock *Succ) {
+    return !llvm::empty(Succ->phis());
+  };
+  if (I->isTerminator() && llvm::any_of(successors(I), HasPhis))
+    return false;
+
   CC = TmpCC;
   return true;
 }
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 0b53007bb6dc811bca875ae4a43d232fe2185825..07e68c44416d9aa1e437427525aeba9dd3c70999 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1270,6 +1270,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {
     ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
     ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
     if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
+        LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() &&
         (transformZExtICmp(LHS, CI, false) ||
          transformZExtICmp(RHS, CI, false))) {
       // zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 7cfe17618cde1808e0f6b1f9411719e4f46f0e56..de9560df9785eaf10b7190a6027fa6e11024f3f5 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1628,6 +1628,11 @@ static bool canSinkInstructions(
         I->getType()->isTokenTy())
       return false;
 
+    // Do not try to sink an instruction in an infinite loop - it can cause
+    // this algorithm to infinite loop.
+    if (I->getParent()->getSingleSuccessor() == I->getParent())
+      return false;
+
     // Conservatively return false if I is an inline-asm instruction. Sinking
     // and merging inline-asm instructions can potentially create arguments
     // that cannot satisfy the inline-asm constraints.
@@ -1714,13 +1719,13 @@ static bool canSinkInstructions(
   return true;
 }
 
-// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last
+// Assuming canSinkInstructions(Blocks) has returned true, sink the last
 // instruction of every block in Blocks to their common successor, commoning
 // into one instruction.
 static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
   auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
 
-  // canSinkLastInstruction returning true guarantees that every block has at
+  // canSinkInstructions returning true guarantees that every block has at
   // least one non-terminator instruction.
   SmallVector<Instruction*,4> Insts;
   for (auto *BB : Blocks) {
@@ -1733,9 +1738,9 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
   }
 
   // The only checking we need to do now is that all users of all instructions
-  // are the same PHI node. canSinkLastInstruction should have checked this but
-  // it is slightly over-aggressive - it gets confused by commutative instructions
-  // so double-check it here.
+  // are the same PHI node. canSinkInstructions should have checked this but
+  // it is slightly over-aggressive - it gets confused by commutative
+  // instructions so double-check it here.
   Instruction *I0 = Insts.front();
   if (!I0->user_empty()) {
     auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
@@ -1746,11 +1751,11 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
       return false;
   }
 
-  // We don't need to do any more checking here; canSinkLastInstruction should
+  // We don't need to do any more checking here; canSinkInstructions should
   // have done it all for us.
   SmallVector<Value*, 4> NewOperands;
   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
-    // This check is different to that in canSinkLastInstruction. There, we
+    // This check is different to that in canSinkInstructions. There, we
     // cared about the global view once simplifycfg (and instcombine) have
     // completed - it takes into account PHIs that become trivially
     // simplifiable.  However here we need a more local view; if an operand
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d36e078444bc291ffd83175f2cccbf6da5f9ae7c..b456a97aa4ec094f641da99691909fd6e6a1bdc0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -372,19 +372,11 @@ static Type *getMemInstValueType(Value *I) {
 
 /// A helper function that returns true if the given type is irregular. The
 /// type is irregular if its allocated size doesn't equal the store size of an
-/// element of the corresponding vector type at the given vectorization factor.
-static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) {
-  // Determine if an array of VF elements of type Ty is "bitcast compatible"
-  // with a <VF x Ty> vector.
-  if (VF.isVector()) {
-    auto *VectorTy = VectorType::get(Ty, VF);
-    return TypeSize::get(VF.getKnownMinValue() *
-                             DL.getTypeAllocSize(Ty).getFixedValue(),
-                         VF.isScalable()) != DL.getTypeStoreSize(VectorTy);
-  }
-
-  // If the vectorization factor is one, we just check if an array of type Ty
-  // requires padding between elements.
+/// element of the corresponding vector type.
+static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
+  // Determine if an array of N elements of type Ty is "bitcast compatible"
+  // with a <N x Ty> vector.
+  // This is only true if there is no padding between the array elements.
   return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
 }
 
@@ -5212,7 +5204,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
   // requires padding and will be scalarized.
   auto &DL = I->getModule()->getDataLayout();
   auto *ScalarTy = getMemInstValueType(I);
-  if (hasIrregularType(ScalarTy, DL, VF))
+  if (hasIrregularType(ScalarTy, DL))
     return false;
 
   // Check if masking is required.
@@ -5259,7 +5251,7 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
   // requires padding and will be scalarized.
   auto &DL = I->getModule()->getDataLayout();
   auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
-  if (hasIrregularType(ScalarTy, DL, VF))
+  if (hasIrregularType(ScalarTy, DL))
     return false;
 
   return true;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll b/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll
new file mode 100644
index 0000000000000000000000000000000000000000..8742a848c4af1066f6b40d3073c75d1c2441eaea
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/huge-switch.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple=arm64-apple-ios %s -o - -O0 -global-isel=1 | FileCheck %s
+define void @foo(i512 %in) {
+; CHECK-LABEL: foo:
+; CHECK: cbz
+  switch i512 %in, label %default [
+    i512 3923188584616675477397368389504791510063972152790021570560, label %l1
+    i512 3923188584616675477397368389504791510063972152790021570561, label %l2
+    i512 3923188584616675477397368389504791510063972152790021570562, label %l3
+  ]
+
+default:
+  ret void
+
+l1:
+  ret void
+
+l2:
+  ret void
+
+l3:
+  ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/pr49509.ll b/llvm/test/CodeGen/PowerPC/pr49509.ll
new file mode 100644
index 0000000000000000000000000000000000000000..f13733c1804757978152a5ec5941360e56cd0147
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr49509.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s
+
+target datalayout = "E-m:e-p:32:32-i64:64-n32"
+
+define void @test() {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    bc 12, 20, .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %bb2
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    stw 3, 0(3)
+; CHECK-NEXT:    lis 3, 256
+; CHECK-NEXT:    stw 3, 0(3)
+; CHECK-NEXT:    blr
+; CHECK-NEXT:  .LBB0_2: # %bb1
+; CHECK-NEXT:    bclr 4, 20, 0
+; CHECK-NEXT:  # %bb.3: # %bb66
+; CHECK-NEXT:    lwz 4, 12(0)
+; CHECK-NEXT:    lwz 5, 8(0)
+; CHECK-NEXT:    lwz 6, 0(0)
+; CHECK-NEXT:    lwz 7, 4(0)
+; CHECK-NEXT:    lbz 3, 0(3)
+; CHECK-NEXT:    and 5, 5, 6
+; CHECK-NEXT:    and 4, 4, 7
+; CHECK-NEXT:    and 4, 4, 5
+; CHECK-NEXT:    cmpwi 3, 0
+; CHECK-NEXT:    lis 3, 256
+; CHECK-NEXT:    lis 7, 512
+; CHECK-NEXT:    bc 12, 2, .LBB0_4
+; CHECK-NEXT:    b .LBB0_5
+; CHECK-NEXT:  .LBB0_4: # %bb66
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:  .LBB0_5: # %bb66
+; CHECK-NEXT:    cmpwi 1, 4, -1
+; CHECK-NEXT:    cmpwi 5, 4, -1
+; CHECK-NEXT:    li 6, 0
+; CHECK-NEXT:    bc 12, 6, .LBB0_6
+; CHECK-NEXT:    b .LBB0_7
+; CHECK-NEXT:  .LBB0_6: # %bb66
+; CHECK-NEXT:    addi 3, 7, 0
+; CHECK-NEXT:  .LBB0_7: # %bb66
+; CHECK-NEXT:    cror 20, 22, 2
+; CHECK-NEXT:    stw 3, 0(3)
+; CHECK-NEXT:    bc 12, 20, .LBB0_9
+; CHECK-NEXT:  # %bb.8: # %bb66
+; CHECK-NEXT:    ori 3, 6, 0
+; CHECK-NEXT:    b .LBB0_10
+; CHECK-NEXT:  .LBB0_9: # %bb66
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:  .LBB0_10: # %bb66
+; CHECK-NEXT:    stw 3, 0(3)
+; CHECK-NEXT:    blr
+bb:
+  br i1 undef, label %bb2, label %bb1
+
+bb2:                                              ; preds = %bb
+  %i = select i1 undef, i64 0, i64 72057594037927936
+  store i64 %i, i64* undef, align 8
+  ret void
+
+bb1:                                              ; preds = %bb
+  %i50 = load i8, i8* undef, align 8
+  %i52 = load i128, i128* null, align 8
+  %i62 = icmp eq i8 %i50, 0
+  br i1 undef, label %bb66, label %bb64
+
+bb64:                                             ; preds = %bb63
+  ret void
+
+bb66:                                             ; preds = %bb63
+  %i67 = lshr i128 -1, 0
+  %i68 = xor i128 %i52, -1
+  %i69 = add i128 0, %i68
+  %i70 = and i128 %i67, %i69
+  %i71 = icmp eq i128 %i70, 0
+  %i74 = select i1 %i62, i64 0, i64 72057594037927936
+  %i75 = select i1 %i71, i64 144115188075855872, i64 %i74
+  store i64 %i75, i64* undef, align 8
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/pr49467.ll b/llvm/test/CodeGen/X86/pr49467.ll
new file mode 100644
index 0000000000000000000000000000000000000000..9b350255206661e948b920929adf446927fa101d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr49467.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=x86_64 < %s | FileCheck %s
+
+declare { i8*, i64 } @get()
+
+declare void @use(i8*, i64)
+
+define void @test(i64* %p) nounwind {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    movq %rdi, (%rsp) # 8-byte Spill
+; CHECK-NEXT:    callq get@PLT
+; CHECK-NEXT:    movq (%rsp), %rdi # 8-byte Reload
+; CHECK-NEXT:    movq %rdx, %rsi
+; CHECK-NEXT:    movq %rsi, (%rdi)
+; CHECK-NEXT:    # implicit-def: $rdi
+; CHECK-NEXT:    callq use@PLT
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    retq
+  %struct = call { i8*, i64 } @get()
+  %struct.1 = extractvalue { i8*, i64 } %struct, 1
+  store i64 %struct.1, i64* %p, align 8
+  %struct.2 = extractvalue { i8*, i64 } %struct, 1
+  call void @use(i8* undef, i64 %struct.2)
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/pr49587.ll b/llvm/test/CodeGen/X86/pr49587.ll
new file mode 100644
index 0000000000000000000000000000000000000000..7dc54a526608cf13125cede2470ee47c68607306
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr49587.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 -fast-isel -mtriple=x86_64-- < %s | FileCheck %s
+
+define i32 @test(i64 %arg) nounwind {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $1, %rdi
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    testb $1, %cl
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    jne .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %no_overflow
+; CHECK-NEXT:    movl $1, %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    jmp .LBB0_2
+; CHECK-NEXT:  .LBB0_2: # %merge
+; CHECK-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; CHECK-NEXT:    retq
+entry:
+  %usubo = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %arg, i64 1)
+  %overflow = extractvalue { i64, i1 } %usubo, 1
+  br i1 %overflow, label %merge, label %no_overflow
+
+no_overflow:
+  br label %merge
+
+merge:
+  %phi = phi i32 [ 1, %no_overflow ], [ 0, %entry ]
+  ret i32 %phi
+}
+
+declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64)
diff --git a/llvm/test/MC/RISCV/rvv/aliases.s b/llvm/test/MC/RISCV/rvv/aliases.s
index 2e5120c91e4515103945dca7bcfdaf0dc92daf6d..ebe9e79399a697ea205f421302bb3d7865e0b2e7 100644
--- a/llvm/test/MC/RISCV/rvv/aliases.s
+++ b/llvm/test/MC/RISCV/rvv/aliases.s
@@ -54,17 +54,17 @@ vmset.m v0
 # ALIAS:    vmnot.m v0, v1                  # encoding: [0x57,0xa0,0x10,0x76]
 # NO-ALIAS: vmnand.mm       v0, v1, v1      # encoding: [0x57,0xa0,0x10,0x76]
 vmnot.m v0, v1
-# ALIAS:    vl1r.v          v0, (a0)        # encoding: [0x07,0x00,0x85,0x22]
-# NO-ALIAS: vl1re8.v        v0, (a0)        # encoding: [0x07,0x00,0x85,0x22]
+# ALIAS:    vl1r.v          v0, (a0)        # encoding: [0x07,0x00,0x85,0x02]
+# NO-ALIAS: vl1re8.v        v0, (a0)        # encoding: [0x07,0x00,0x85,0x02]
 vl1r.v v0, (a0) 
-# ALIAS:    vl2r.v          v0, (a0)        # encoding: [0x07,0x00,0x85,0x42]
-# NO-ALIAS: vl2re8.v        v0, (a0)        # encoding: [0x07,0x00,0x85,0x42]
+# ALIAS:    vl2r.v          v0, (a0)        # encoding: [0x07,0x00,0x85,0x22]
+# NO-ALIAS: vl2re8.v        v0, (a0)        # encoding: [0x07,0x00,0x85,0x22]
 vl2r.v v0, (a0) 
-# ALIAS:    vl4r.v          v0, (a0)        # encoding: [0x07,0x00,0x85,0x82]
-# NO-ALIAS: vl4re8.v        v0, (a0)        # encoding: [0x07,0x00,0x85,0x82]
+# ALIAS:    vl4r.v          v0, (a0)        # encoding: [0x07,0x00,0x85,0x62]
+# NO-ALIAS: vl4re8.v        v0, (a0)        # encoding: [0x07,0x00,0x85,0x62]
 vl4r.v v0, (a0) 
-# ALIAS:    vl8r.v          v0, (a0)        # encoding: [0x07,0x00,0x85,0x02]
-# NO-ALIAS: vl8re8.v        v0, (a0)        # encoding: [0x07,0x00,0x85,0x02]
+# ALIAS:    vl8r.v          v0, (a0)        # encoding: [0x07,0x00,0x85,0xe2]
+# NO-ALIAS: vl8re8.v        v0, (a0)        # encoding: [0x07,0x00,0x85,0xe2]
 vl8r.v v0, (a0) 
 # ALIAS:    vneg.v          v2, v1, v0.t    # encoding: [0x57,0x41,0x10,0x0c]
 # NO-ALIAS: vrsub.vx        v2, v1, zero, v0.t # encoding: [0x57,0x41,0x10,0x0c]
diff --git a/llvm/test/MC/RISCV/rvv/load.s b/llvm/test/MC/RISCV/rvv/load.s
index 3d0dbb15c36ec52f1c75763e883e78d4a59ade33..45a3881cb60d1b952372776c4e3c4b03a75a8f93 100644
--- a/llvm/test/MC/RISCV/rvv/load.s
+++ b/llvm/test/MC/RISCV/rvv/load.s
@@ -256,96 +256,96 @@ vloxei64.v v8, (a0), v4
 
 vl1re8.v v8, (a0)
 # CHECK-INST: vl1re8.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x85,0x22]
+# CHECK-ENCODING: [0x07,0x04,0x85,0x02]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 85 22 <unknown>
+# CHECK-UNKNOWN: 07 04 85 02 <unknown>
 
 vl1re16.v v8, (a0)
 # CHECK-INST: vl1re16.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x85,0x22]
+# CHECK-ENCODING: [0x07,0x54,0x85,0x02]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 85 22 <unknown>
+# CHECK-UNKNOWN: 07 54 85 02 <unknown>
 
 vl1re32.v v8, (a0)
 # CHECK-INST: vl1re32.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x85,0x22]
+# CHECK-ENCODING: [0x07,0x64,0x85,0x02]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 85 22 <unknown>
+# CHECK-UNKNOWN: 07 64 85 02 <unknown>
 
 vl1re64.v v8, (a0)
 # CHECK-INST: vl1re64.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x85,0x22]
+# CHECK-ENCODING: [0x07,0x74,0x85,0x02]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 85 22 <unknown>
+# CHECK-UNKNOWN: 07 74 85 02 <unknown>
 
 vl2re8.v v8, (a0)
 # CHECK-INST: vl2re8.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x85,0x42]
+# CHECK-ENCODING: [0x07,0x04,0x85,0x22]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 85 42 <unknown>
+# CHECK-UNKNOWN: 07 04 85 22 <unknown>
 
 vl2re16.v v8, (a0)
 # CHECK-INST: vl2re16.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x85,0x42]
+# CHECK-ENCODING: [0x07,0x54,0x85,0x22]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 85 42 <unknown>
+# CHECK-UNKNOWN: 07 54 85 22 <unknown>
 
 vl2re32.v v8, (a0)
 # CHECK-INST: vl2re32.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x85,0x42]
+# CHECK-ENCODING: [0x07,0x64,0x85,0x22]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 85 42 <unknown>
+# CHECK-UNKNOWN: 07 64 85 22 <unknown>
 
 vl2re64.v v8, (a0)
 # CHECK-INST: vl2re64.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x85,0x42]
+# CHECK-ENCODING: [0x07,0x74,0x85,0x22]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 85 42 <unknown>
+# CHECK-UNKNOWN: 07 74 85 22 <unknown>
 
 vl4re8.v v8, (a0)
 # CHECK-INST: vl4re8.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x85,0x82]
+# CHECK-ENCODING: [0x07,0x04,0x85,0x62]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 85 82 <unknown>
+# CHECK-UNKNOWN: 07 04 85 62 <unknown>
 
 vl4re16.v v8, (a0)
 # CHECK-INST: vl4re16.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x85,0x82]
+# CHECK-ENCODING: [0x07,0x54,0x85,0x62]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 85 82 <unknown>
+# CHECK-UNKNOWN: 07 54 85 62 <unknown>
 
 vl4re32.v v8, (a0)
 # CHECK-INST: vl4re32.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x85,0x82]
+# CHECK-ENCODING: [0x07,0x64,0x85,0x62]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 85 82 <unknown>
+# CHECK-UNKNOWN: 07 64 85 62 <unknown>
 
 vl4re64.v v8, (a0)
 # CHECK-INST: vl4re64.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x85,0x82]
+# CHECK-ENCODING: [0x07,0x74,0x85,0x62]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 85 82 <unknown>
+# CHECK-UNKNOWN: 07 74 85 62 <unknown>
 
 vl8re8.v v8, (a0)
 # CHECK-INST: vl8re8.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x85,0x02]
+# CHECK-ENCODING: [0x07,0x04,0x85,0xe2]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 85 02 <unknown>
+# CHECK-UNKNOWN: 07 04 85 e2 <unknown>
 
 vl8re16.v v8, (a0)
 # CHECK-INST: vl8re16.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x85,0x02]
+# CHECK-ENCODING: [0x07,0x54,0x85,0xe2]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 85 02 <unknown>
+# CHECK-UNKNOWN: 07 54 85 e2 <unknown>
 
 vl8re32.v v8, (a0)
 # CHECK-INST: vl8re32.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x85,0x02]
+# CHECK-ENCODING: [0x07,0x64,0x85,0xe2]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 85 02 <unknown>
+# CHECK-UNKNOWN: 07 64 85 e2 <unknown>
 
 vl8re64.v v8, (a0)
 # CHECK-INST: vl8re64.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x85,0x02]
+# CHECK-ENCODING: [0x07,0x74,0x85,0xe2]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 85 02 <unknown>
+# CHECK-UNKNOWN: 07 74 85 e2 <unknown>
diff --git a/llvm/test/MC/RISCV/rvv/store.s b/llvm/test/MC/RISCV/rvv/store.s
index e4795aa1c2c9815fde060f9c6f56c058c31e2bcf..b5a75ac2d0087a888f2be2878bb7645dbedb0887 100644
--- a/llvm/test/MC/RISCV/rvv/store.s
+++ b/llvm/test/MC/RISCV/rvv/store.s
@@ -208,24 +208,24 @@ vsoxei64.v v24, (a0), v4
 
 vs1r.v v24, (a0)
 # CHECK-INST: vs1r.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x0c,0x85,0x22]
+# CHECK-ENCODING: [0x27,0x0c,0x85,0x02]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 0c 85 22 <unknown>
+# CHECK-UNKNOWN: 27 0c 85 02 <unknown>
 
 vs2r.v v24, (a0)
 # CHECK-INST: vs2r.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x0c,0x85,0x42]
+# CHECK-ENCODING: [0x27,0x0c,0x85,0x22]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 0c 85 42 <unknown>
+# CHECK-UNKNOWN: 27 0c 85 22 <unknown>
 
 vs4r.v v24, (a0)
 # CHECK-INST: vs4r.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x0c,0x85,0x82]
+# CHECK-ENCODING: [0x27,0x0c,0x85,0x62]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 0c 85 82 <unknown>
+# CHECK-UNKNOWN: 27 0c 85 62 <unknown>
 
 vs8r.v v24, (a0)
 # CHECK-INST: vs8r.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x0c,0x85,0x02]
+# CHECK-ENCODING: [0x27,0x0c,0x85,0xe2]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 0c 85 02 <unknown>
+# CHECK-UNKNOWN: 27 0c 85 e2 <unknown>
diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
index a77aa7ac7ebd814a0872cdc06d3fd6169f66302d..5ae3d8ea0dba00761e7e43442f817edb37eee1de 100644
--- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
+++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
@@ -106,3 +106,69 @@ block2:
   %conv2 = zext i1 %cmp1 to i32
   ret i32 %conv2
 }
+
+; This should not end with more instructions than it started from.
+
+define i32 @PR49475(i32 %x, i16 %y) {
+; CHECK-LABEL: @PR49475(
+; CHECK-NEXT:    [[M:%.*]] = and i16 [[Y:%.*]], 1
+; CHECK-NEXT:    [[B1:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[B2:%.*]] = icmp eq i16 [[M]], 0
+; CHECK-NEXT:    [[T1:%.*]] = or i1 [[B1]], [[B2]]
+; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[T1]] to i32
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %m = and i16 %y, 1
+  %b1 = icmp eq i32 %x, 0
+  %b2 = icmp eq i16 %m, 0
+  %t1 = or i1 %b1, %b2
+  %z = zext i1 %t1 to i32
+  ret i32 %z
+}
+
+; This would infinite-loop.
+
+define i8 @PR49475_infloop(i32 %t0, i16 %insert, i64 %e, i8 %i162) {
+; CHECK-LABEL: @PR49475_infloop(
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[T0:%.*]], 0
+; CHECK-NEXT:    [[B2:%.*]] = icmp eq i16 [[INSERT:%.*]], 0
+; CHECK-NEXT:    [[T1:%.*]] = or i1 [[B]], [[B2]]
+; CHECK-NEXT:    [[EXT:%.*]] = zext i1 [[T1]] to i32
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[EXT]], [[T0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[AND]], 140
+; CHECK-NEXT:    [[XOR1:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[CONV16:%.*]] = sext i8 [[I162:%.*]] to i64
+; CHECK-NEXT:    [[SUB17:%.*]] = sub i64 [[CONV16]], [[E:%.*]]
+; CHECK-NEXT:    [[SEXT:%.*]] = shl i64 [[SUB17]], 32
+; CHECK-NEXT:    [[CONV18:%.*]] = ashr exact i64 [[SEXT]], 32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i64 [[CONV18]], [[XOR1]]
+; CHECK-NEXT:    [[CONV19:%.*]] = zext i1 [[CMP]] to i16
+; CHECK-NEXT:    [[OR21:%.*]] = or i16 [[CONV19]], [[INSERT]]
+; CHECK-NEXT:    [[TRUNC44:%.*]] = trunc i16 [[OR21]] to i8
+; CHECK-NEXT:    [[INC:%.*]] = or i8 [[TRUNC44]], [[I162]]
+; CHECK-NEXT:    [[TOBOOL23_NOT:%.*]] = icmp eq i16 [[OR21]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TOBOOL23_NOT]])
+; CHECK-NEXT:    ret i8 [[INC]]
+;
+  %b = icmp eq i32 %t0, 0
+  %b2 = icmp eq i16 %insert, 0
+  %t1 = or i1 %b, %b2
+  %ext = zext i1 %t1 to i32
+  %and = and i32 %t0, %ext
+  %conv13 = zext i32 %and to i64
+  %xor = xor i64 %conv13, 140
+  %conv16 = sext i8 %i162 to i64
+  %sub17 = sub i64 %conv16, %e
+  %sext = shl i64 %sub17, 32
+  %conv18 = ashr exact i64 %sext, 32
+  %cmp = icmp sge i64 %xor, %conv18
+  %conv19 = zext i1 %cmp to i16
+  %or21 = or i16 %insert, %conv19
+  %trunc44 = trunc i16 %or21 to i8
+  %inc = add i8 %i162, %trunc44
+  %tobool23.not = icmp eq i16 %or21, 0
+  call void @llvm.assume(i1 %tobool23.not)
+  ret i8 %inc
+}
+
+declare void @llvm.assume(i1 noundef)
diff --git a/llvm/test/Transforms/LoopVectorize/irregular_type.ll b/llvm/test/Transforms/LoopVectorize/irregular_type.ll
new file mode 100644
index 0000000000000000000000000000000000000000..167a1a101e6f96a2c25ea291b2a33443f3beec8b
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/irregular_type.ll
@@ -0,0 +1,27 @@
+; RUN: opt %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
+
+; Ensure the array loads/stores are not optimized into vector operations when
+; the element type has padding bits.
+
+; CHECK: foo
+; CHECK: vector.body
+; CHECK-NOT: load <4 x i7>
+; CHECK-NOT: store <4 x i7>
+; CHECK: for.body
+define void @foo(i7* %a, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i7, i7* %a, i64 %indvars.iv
+  %0 = load i7, i7* %arrayidx, align 1
+  %sub = add nuw nsw i7 %0, 0
+  store i7 %sub, i7* %arrayidx, align 1
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cmp = icmp eq i64 %indvars.iv.next, %n
+  br i1 %cmp, label %for.exit, label %for.body
+
+for.exit:
+  ret void
+}
diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll
index b294542667bdb1dfd4e4dad7142cfde2f6737233..8476f42dd5297e222ebd9be92c40336eaca6ea76 100644
--- a/llvm/test/Transforms/OpenMP/add_attributes.ll
+++ b/llvm/test/Transforms/OpenMP/add_attributes.ll
@@ -627,7 +627,7 @@ declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*)
 
 declare void @__kmpc_destroy_allocator(i32, i8*)
 
-declare void @__kmpc_push_target_tripcount(%struct.ident_t*, i64, i64)
+declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64)
 
 declare i32 @__kmpc_warp_active_thread_mask()
 
@@ -1144,7 +1144,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
 ; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(%struct.ident_t*, i64, i64)
+; CHECK-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64)
 
 ; CHECK: ; Function Attrs: convergent nounwind
 ; CHECK-NEXT: declare i32 @__kmpc_warp_active_thread_mask()
@@ -1669,7 +1669,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
 ; OPTIMISTIC-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*)
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
-; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount(%struct.ident_t*, i64, i64)
+; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare i32 @__kmpc_warp_active_thread_mask()
diff --git a/llvm/test/Transforms/SimplifyCFG/sink-inf-loop.ll b/llvm/test/Transforms/SimplifyCFG/sink-inf-loop.ll
new file mode 100644
index 0000000000000000000000000000000000000000..37399367efce7685117d107aa036377df1af34fd
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/sink-inf-loop.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -keep-loops=false -sink-common-insts=true -S | FileCheck %s
+
+; This would infinite-loop because we allowed code sinking to examine an infinite-loop block (%j).
+
+define void @PR49541(i32* %t1, i32 %a, i1 %bool) {
+; CHECK-LABEL: @PR49541(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[I:%.*]]
+; CHECK:       j:
+; CHECK-NEXT:    [[T3:%.*]] = phi i32 [ [[B:%.*]], [[J:%.*]] ], [ [[A:%.*]], [[COND_TRUE:%.*]] ], [ [[A]], [[COND_FALSE:%.*]] ]
+; CHECK-NEXT:    [[T2:%.*]] = phi i32 [ [[T2]], [[J]] ], [ [[PRE2:%.*]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ]
+; CHECK-NEXT:    [[B]] = load i32, i32* [[T1:%.*]], align 4
+; CHECK-NEXT:    br label [[J]]
+; CHECK:       i:
+; CHECK-NEXT:    [[G_1:%.*]] = phi i16 [ undef, [[ENTRY:%.*]] ], [ [[G_1]], [[COND_FALSE]] ]
+; CHECK-NEXT:    br i1 [[BOOL:%.*]], label [[COND_FALSE]], label [[COND_TRUE]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    [[TOBOOL9_NOT:%.*]] = icmp eq i16 [[G_1]], 0
+; CHECK-NEXT:    [[PRE2]] = load i32, i32* [[T1]], align 4
+; CHECK-NEXT:    br label [[J]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[T5:%.*]] = load i32, i32* [[T1]], align 4
+; CHECK-NEXT:    [[B2:%.*]] = icmp eq i32 [[T5]], 0
+; CHECK-NEXT:    br i1 [[B2]], label [[J]], label [[I]]
+;
+entry:
+  br label %i
+
+j:
+  %t3 = phi i32 [ %b, %j ], [ %a, %cond.true ], [ %a, %cond.false ]
+  %t2 = phi i32 [ %t2, %j ], [ %pre2, %cond.true ], [ 0, %cond.false ]
+  %b = load i32, i32* %t1, align 4
+  br label %j
+
+i:
+  %g.1 = phi i16 [ undef, %entry ], [ %g.1, %cond.false ]
+  br i1 %bool, label %cond.false, label %cond.true
+
+cond.true:
+  %tobool9.not = icmp eq i16 %g.1, 0
+  %pre2 = load i32, i32* %t1, align 4
+  br label %j
+
+cond.false:
+  %t5 = load i32, i32* %t1, align 4
+  %b2 = icmp eq i32 %t5, 0
+  br i1 %b2, label %j, label %i
+}
diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index 46bb8206efa1934bc5b70bd32d799a47d9dc410e..36c25c33798a66578e98d157c4a78083cb2bace2 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -283,8 +283,10 @@ int __tgt_target_teams_nowait_mapper(
     int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
     void *noAliasDepList);
 
-void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id,
-                                  uint64_t loop_tripcount);
+void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount);
+
+void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id,
+                                         uint64_t loop_tripcount);
 
 #ifdef __cplusplus
 }
diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports
index 5e09a088533da241b59370e6115d0e65393ffb4a..b7fc1c8c3c864d94b63c5a9be818ab3446224d25 100644
--- a/openmp/libomptarget/src/exports
+++ b/openmp/libomptarget/src/exports
@@ -25,6 +25,8 @@ VERS1.0 {
     __tgt_target_teams_nowait_mapper;
     __tgt_mapper_num_components;
     __tgt_push_mapper_component;
+    __kmpc_push_target_tripcount;
+    __kmpc_push_target_tripcount_mapper;
     omp_get_num_devices;
     omp_get_initial_device;
     omp_target_alloc;
@@ -34,7 +36,6 @@ VERS1.0 {
     omp_target_memcpy_rect;
     omp_target_associate_ptr;
     omp_target_disassociate_ptr;
-    __kmpc_push_target_tripcount;
   local:
     *;
 };
diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index 01f3715d6bcc8e855f25625425f0ad6d6a953038..b97676a6981b5f2308a8be6e151eedaca6f483a4 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -514,8 +514,13 @@ EXTERN void __tgt_push_mapper_component(void *rt_mapper_handle, void *base,
       MapComponentInfoTy(base, begin, size, type, name));
 }
 
-EXTERN void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id,
+EXTERN void __kmpc_push_target_tripcount(int64_t device_id,
                                          uint64_t loop_tripcount) {
+  __kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount);
+}
+
+EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id,
+                                                uint64_t loop_tripcount) {
   TIMESCOPE_WITH_IDENT(loc);
   if (IsOffloadDisabled())
     return;
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 37150aae2fe61a745170cf1609a18f231dc921b9..af6f7d09a4a2d250f077910b3388d58522845afc 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -900,8 +900,8 @@ TableMap *getTableMap(void *HostPtr) {
 
 /// Get loop trip count
 /// FIXME: This function will not work right if calling
-/// __kmpc_push_target_tripcount in one thread but doing offloading in another
-/// thread, which might occur when we call task yield.
+/// __kmpc_push_target_tripcount_mapper in one thread but doing offloading in
+/// another thread, which might occur when we call task yield.
 uint64_t getLoopTripCount(int64_t DeviceId) {
   DeviceTy &Device = PM->Devices[DeviceId];
   uint64_t LoopTripCount = 0;
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index a6e32bd008e1027e0736550701abfac04db7569e..b981f8740dbe2611d7698768f8317beb70a23311 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -920,6 +920,12 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
   if (TCR_PTR(__kmp_threads[0]) == NULL) {
     --capacity;
   }
+  // If it is not for initializing the hidden helper team, we need to take
+  // __kmp_hidden_helper_threads_num out of the capacity because it is included
+  // in __kmp_threads_capacity.
+  if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
+    capacity -= __kmp_hidden_helper_threads_num;
+  }
   if (__kmp_nth + new_nthreads -
           (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
       capacity) {
@@ -3632,6 +3638,13 @@ int __kmp_register_root(int initial_thread) {
     --capacity;
   }
 
+  // If it is not for initializing the hidden helper team, we need to take
+  // __kmp_hidden_helper_threads_num out of the capacity because it is included
+  // in __kmp_threads_capacity.
+  if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
+    capacity -= __kmp_hidden_helper_threads_num;
+  }
+
   /* see if there are too many threads */
   if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
     if (__kmp_tp_cached) {
@@ -3664,7 +3677,7 @@ int __kmp_register_root(int initial_thread) {
     /* find an available thread slot */
     // Don't reassign the zero slot since we need that to only be used by
     // initial thread. Slots for hidden helper threads should also be skipped.
-    if (initial_thread && __kmp_threads[0] == NULL) {
+    if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
       gtid = 0;
     } else {
       for (gtid = __kmp_hidden_helper_threads_num + 1;
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index b477edbbfb42bcff45762df300c0bb6792df853b..50f6a05faaf592b21e9f02fe63d8a4f24dccbe02 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -504,9 +504,10 @@ int __kmp_initial_threads_capacity(int req_nproc) {
     nth = (4 * __kmp_xproc);
 
   // If hidden helper task is enabled, we initialize the thread capacity with
-  // extra
-  // __kmp_hidden_helper_threads_num.
-  nth += __kmp_hidden_helper_threads_num;
+  // extra __kmp_hidden_helper_threads_num.
+  if (__kmp_enable_hidden_helper) {
+    nth += __kmp_hidden_helper_threads_num;
+  }
 
   if (nth > __kmp_max_nth)
     nth = __kmp_max_nth;
diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..776aee9d8e2cac91c9df3582e64b0189163dd19f
--- /dev/null
+++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp
@@ -0,0 +1,45 @@
+// RUN: %libomp-cxx-compile-and-run
+
+#include <omp.h>
+
+#include <algorithm>
+#include <cassert>
+#include <chrono>
+#include <thread>
+#include <vector>
+
+void dummy_root() {
+  // omp_get_max_threads() will do middle initialization
+  int nthreads = omp_get_max_threads();
+  std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+}
+
+int main(int argc, char *argv[]) {
+  const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()),
+                                  4 * omp_get_num_procs()),
+                         std::numeric_limits<int>::max());
+
+  std::vector<int> data(N);
+
+  // Create a new thread to initialize the OpenMP RTL. The new thread will not
+  // be taken as the "initial thread".
+  std::thread root(dummy_root);
+
+#pragma omp parallel for num_threads(N)
+  for (unsigned i = 0; i < N; ++i) {
+    data[i] = i;
+  }
+
+#pragma omp parallel for num_threads(N + 1)
+  for (unsigned i = 0; i < N; ++i) {
+    data[i] += i;
+  }
+
+  for (unsigned i = 0; i < N; ++i) {
+    assert(data[i] == 2 * i);
+  }
+
+  root.join();
+
+  return 0;
+}
diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a9d394f729e9c5171d6c22746d19a4a6653d377e
--- /dev/null
+++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp
@@ -0,0 +1,31 @@
+// RUN: %libomp-cxx-compile-and-run
+
+#include <omp.h>
+
+#include <algorithm>
+#include <cassert>
+#include <vector>
+
+int main(int argc, char *argv[]) {
+  const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()),
+                                  4 * omp_get_num_procs()),
+                         std::numeric_limits<int>::max());
+
+  std::vector<int> data(N);
+
+#pragma omp parallel for num_threads(N)
+  for (unsigned i = 0; i < N; ++i) {
+    data[i] = i;
+  }
+
+#pragma omp parallel for num_threads(N + 1)
+  for (unsigned i = 0; i < N; ++i) {
+    data[i] += i;
+  }
+
+  for (unsigned i = 0; i < N; ++i) {
+    assert(data[i] == 2 * i);
+  }
+
+  return 0;
+}