27 files changed, 775 insertions, 182 deletions
diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp
index e9093b2..a90f636 100644
--- a/clang/test/AST/ByteCode/builtin-functions.cpp
+++ b/clang/test/AST/ByteCode/builtin-functions.cpp
@@ -1856,7 +1856,8 @@ namespace InitParam {
 
 #endif
 
-namespace SAddOverflowInt {
+namespace NonBlockPointerStore {
   int a;
   void foo(void) { a *= __builtin_sadd_overflow(1, 2, 0); }
+  void foo2(void) { a *= __builtin_addc(1, 2, 0, 0); }
 }
diff --git a/clang/test/AST/ByteCode/cxx20.cpp b/clang/test/AST/ByteCode/cxx20.cpp
index 1888998..cb788fa 100644
--- a/clang/test/AST/ByteCode/cxx20.cpp
+++ b/clang/test/AST/ByteCode/cxx20.cpp
@@ -1183,3 +1183,21 @@ namespace VirtualFunctionCallThroughArrayElem {
   static_assert(a[2][3].foo()); // both-error {{not an integral constant expression}} \
                                 // both-note {{virtual function called on object 'a[2][3]' whose dynamic type is not constant}}
 }
+
+namespace NonPureVirtualCall {
+  struct A {
+    constexpr virtual void call(int) = 0;
+    constexpr void call2() { call(0); }
+  };
+
+  struct B : A {
+    constexpr void call(int) override {}
+  };
+
+  consteval void check() {
+    B b;
+    b.call2();
+  }
+
+  int main() { check(); }
+}
diff --git a/clang/test/CIR/CodeGen/try-catch.cpp b/clang/test/CIR/CodeGen/try-catch.cpp
index 1e4d2a6..27e3d8e 100644
--- a/clang/test/CIR/CodeGen/try-catch.cpp
+++ b/clang/test/CIR/CodeGen/try-catch.cpp
@@ -164,3 +164,33 @@ void try_catch_with_alloca() {
 // OGCG: %[[TMP_B:.*]] = load i32, ptr %[[B_ADDR]], align 4
 // OGCG: %[[RESULT:.*]] = add nsw i32 %[[TMP_A]], %[[TMP_B]]
 // OGCG: store i32 %[[RESULT]], ptr %[[C_ADDR]], align 4
+
+void function_with_noexcept() noexcept;
+
+void calling_noexcept_function_inside_try_block() {
+  try {
+    function_with_noexcept();
+  } catch (...) {
+  }
+}
+
+// CIR: cir.scope {
+// CIR:   cir.try {
+// CIR:     cir.call @_Z22function_with_noexceptv() nothrow : () -> ()
+// CIR:     cir.yield
+// CIR:   }
+// CIR: }
+
+// LLVM:   br label %[[LABEL_1:.*]]
+// LLVM: [[LABEL_1]]:
+// LLVM:   br label %[[LABEL_2:.*]]
+// LLVM: [[LABEL_2]]:
+// LLVM:   call void @_Z22function_with_noexceptv()
+// LLVM:   br label %[[LABEL_3:.*]]
+// LLVM: [[LABEL_3]]:
+// LLVM:   br label %[[LABEL_4:.*]]
+// LLVM: [[LABEL_4]]:
+// LLVM:   ret void
+
+// OGCG: call void @_Z22function_with_noexceptv()
+// OGCG: ret void
diff --git a/clang/test/CodeGen/AArch64/ext-vector-coercion.c b/clang/test/CodeGen/AArch64/ext-vector-coercion.c
new file mode 100644
index 0000000..354980a
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/ext-vector-coercion.c
@@ -0,0 +1,42 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -fenable-matrix -triple arm64-apple-macosx %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+typedef float float3 __attribute__((ext_vector_type(3)));
+struct Vec3 {
+  union {
+    struct {
+      float x;
+      float y;
+      float z;
+    };
+    float vec __attribute__((ext_vector_type(3)));
+  };
+};
+
+// CHECK-LABEL: define i128 @add(
+// CHECK-SAME: i128 [[A_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_VEC3:%.*]], align 16
+// CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_VEC3]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store i128 [[A_COERCE]], ptr [[COERCE_DIVE]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x float>, ptr [[TMP0]], align 16
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[LOADVECN1:%.*]] = load <4 x float>, ptr [[TMP1]], align 16
+// CHECK-NEXT:    [[EXTRACTVEC2:%.*]] = shufflevector <4 x float> [[LOADVECN1]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <3 x float> [[EXTRACTVEC]], [[EXTRACTVEC2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT:    [[EXTRACTVEC3:%.*]] = shufflevector <3 x float> [[ADD]], <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    store <4 x float> [[EXTRACTVEC3]], ptr [[TMP2]], align 16
+// CHECK-NEXT:    [[COERCE_DIVE4:%.*]] = getelementptr inbounds nuw [[STRUCT_VEC3]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load i128, ptr [[COERCE_DIVE4]], align 16
+// CHECK-NEXT:    ret i128 [[TMP3]]
+//
+struct Vec3 add(struct Vec3 a) {
+  struct Vec3 res;
+  res.vec = a.vec + a.vec;
+  return res;
+}
+
diff --git a/clang/test/CodeGen/AArch64/neon-across.c b/clang/test/CodeGen/AArch64/neon-across.c
index d365975..aae5097 100644
--- a/clang/test/CodeGen/AArch64/neon-across.c
+++ b/clang/test/CodeGen/AArch64/neon-across.c
@@ -49,7 +49,7 @@ uint32_t test_vaddlv_u16(uint16x4_t a) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vaddlvq_s8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> [[A]])
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
@@ -60,7 +60,7 @@ int16_t test_vaddlvq_s8(int8x16_t a) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vaddlvq_s16
-// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> [[A]])
 // CHECK-NEXT:    ret i32 [[VADDLV_I]]
@@ -70,7 +70,7 @@ int32_t test_vaddlvq_s16(int16x8_t a) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vaddlvq_s32
-// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> [[A]])
 // CHECK-NEXT:    ret i64 [[VADDLVQ_S32_I]]
@@ -80,7 +80,7 @@ int64_t test_vaddlvq_s32(int32x4_t a) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vaddlvq_u8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> [[A]])
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
@@ -91,7 +91,7 @@ uint16_t test_vaddlvq_u8(uint8x16_t a) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vaddlvq_u16
-// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> [[A]])
 // CHECK-NEXT:    ret i32 [[VADDLV_I]]
@@ -101,7 +101,7 @@ uint32_t test_vaddlvq_u16(uint16x8_t a) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vaddlvq_u32
-// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> [[A]])
 // CHECK-NEXT:    ret i64 [[VADDLVQ_U32_I]]
@@ -113,9 +113,8 @@ uint64_t test_vaddlvq_u32(uint32x4_t a) {
 // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_s8
 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
-// CHECK-NEXT:    ret i8 [[TMP0]]
+// CHECK-NEXT:    [[VMAXV_S8_I:%.*]] = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT:    ret i8 [[VMAXV_S8_I]]
 //
 int8_t test_vmaxv_s8(int8x8_t a) {
   return vmaxv_s8(a);
@@ -124,9 +123,8 @@ int8_t test_vmaxv_s8(int8x8_t a) {
 // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_s16
 // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16
-// CHECK-NEXT:    ret i16 [[TMP0]]
+// CHECK-NEXT:    [[VMAXV_S16_I:%.*]] = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> [[A]])
+// CHECK-NEXT:    ret i16 [[VMAXV_S16_I]]
 //
 int16_t test_vmaxv_s16(int16x4_t a) {
   return vmaxv_s16(a);
@@ -135,9 +133,8 @@ int16_t test_vmaxv_s16(int16x4_t a) {
 // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_u8
 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
-// CHECK-NEXT:    ret i8 [[TMP0]]
+// CHECK-NEXT:    [[VMAXV_U8_I:%.*]] = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT:    ret i8 [[VMAXV_U8_I]]
 //
 uint8_t test_vmaxv_u8(uint8x8_t a) {
   return vmaxv_u8(a);
@@ -146,40 +143,37 @@ uint8_t test_vmaxv_u8(uint8x8_t a) {
 // CHECK-LABEL: define {{[^@]+}}@test_vmaxv_u16
 // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16
-// CHECK-NEXT:    ret i16 [[TMP0]]
+// CHECK-NEXT:    [[VMAXV_U16_I:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[A]])
+// CHECK-NEXT:    ret i16 [[VMAXV_U16_I]]
 //
 uint16_t test_vmaxv_u16(uint16x4_t a) {
   return vmaxv_u16(a);
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
-// CHECK-NEXT:    ret i8 [[TMP0]]
+// CHECK-NEXT:    [[VMAXVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT:    ret i8 [[VMAXVQ_S8_I]]
 //
 int8_t test_vmaxvq_s8(int8x16_t a) {
   return vmaxvq_s8(a);
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s16
-// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16
-// CHECK-NEXT:    ret i16 [[TMP0]]
+// CHECK-NEXT:    [[VMAXVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> [[A]])
+// CHECK-NEXT:    ret i16 [[VMAXVQ_S16_I]]
 //
 int16_t test_vmaxvq_s16(int16x8_t a) {
   return vmaxvq_s16(a);
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_s32
-// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A]])
+// CHECK-NEXT:    [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[A]])
 // CHECK-NEXT:    ret i32 [[VMAXVQ_S32_I]]
 //
 int32_t test_vmaxvq_s32(int32x4_t a) {
@@ -187,31 +181,29 @@ int32_t test_vmaxvq_s32(int32x4_t a) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
-// CHECK-NEXT:    ret i8 [[TMP0]]
+// CHECK-NEXT:    [[VMAXVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT:    ret i8 [[VMAXVQ_U8_I]]
 //
 uint8_t test_vmaxvq_u8(uint8x16_t a) {
   return vmaxvq_u8(a);
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u16
-// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i16
-// CHECK-NEXT:    ret i16 [[TMP0]]
+// CHECK-NEXT:    [[VMAXVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> [[A]])
+// CHECK-NEXT:    ret i16 [[VMAXVQ_U16_I]]
 //
 uint16_t test_vmaxvq_u16(uint16x8_t a) {
   return vmaxvq_u16(a);
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_u32
-// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> [[A]])
+// CHECK-NEXT:    [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[A]])
 // CHECK-NEXT:    ret i32 [[VMAXVQ_U32_I]]
 //
 uint32_t test_vmaxvq_u32(uint32x4_t a) {
@@ -221,9 +213,8 @@ uint32_t test_vmaxvq_u32(uint32x4_t a) {
 // CHECK-LABEL: define {{[^@]+}}@test_vminv_s8
 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
-// CHECK-NEXT:    ret i8 [[TMP0]]
+// CHECK-NEXT:    [[VMINV_S8_I:%.*]] = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT:    ret i8 [[VMINV_S8_I]]
 //
 int8_t test_vminv_s8(int8x8_t a) {
   return vminv_s8(a);
@@ -232,9 +223,8 @@ int8_t test_vminv_s8(int8x8_t a) {
 // CHECK-LABEL: define {{[^@]+}}@test_vminv_s16
 // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16
-// CHECK-NEXT:    ret i16 [[TMP0]]
+// CHECK-NEXT:    [[VMINV_S16_I:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[A]])
+// CHECK-NEXT:    ret i16 [[VMINV_S16_I]]
 //
 int16_t test_vminv_s16(int16x4_t a) {
   return vminv_s16(a);
@@ -243,9 +233,8 @@ int16_t test_vminv_s16(int16x4_t a) {
 // CHECK-LABEL: define {{[^@]+}}@test_vminv_u8
 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
-// CHECK-NEXT:    ret i8 [[TMP0]]
+// CHECK-NEXT:    [[VMINV_U8_I:%.*]] = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT:    ret i8 [[VMINV_U8_I]]
 //
 uint8_t test_vminv_u8(uint8x8_t a) {
   return vminv_u8(a);
@@ -254,40 +243,37 @@ uint8_t test_vminv_u8(uint8x8_t a) {
 // CHECK-LABEL: define {{[^@]+}}@test_vminv_u16
 // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16
-// CHECK-NEXT:    ret i16 [[TMP0]]
+// CHECK-NEXT:    [[VMINV_U16_I:%.*]] = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> [[A]])
+// CHECK-NEXT:    ret i16 [[VMINV_U16_I]]
 //
 uint16_t test_vminv_u16(uint16x4_t a) {
   return vminv_u16(a);
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
-// CHECK-NEXT:    ret i8 [[TMP0]]
+// CHECK-NEXT:    [[VMINVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT:    ret i8 [[VMINVQ_S8_I]]
 //
 int8_t test_vminvq_s8(int8x16_t a) {
   return vminvq_s8(a);
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s16
-// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16
-// CHECK-NEXT:    ret i16 [[TMP0]]
+// CHECK-NEXT:    [[VMINVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[A]])
+// CHECK-NEXT:    ret i16 [[VMINVQ_S16_I]]
 //
 int16_t test_vminvq_s16(int16x8_t a) {
   return vminvq_s16(a);
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vminvq_s32
-// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> [[A]])
+// CHECK-NEXT:    [[VMINVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[A]])
 // CHECK-NEXT:    ret i32 [[VMINVQ_S32_I]]
 //
 int32_t test_vminvq_s32(int32x4_t a) {
@@ -295,31 +281,29 @@ int32_t test_vminvq_s32(int32x4_t a) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
-// CHECK-NEXT:    ret i8 [[TMP0]]
+// CHECK-NEXT:    [[VMINVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT:    ret i8 [[VMINVQ_U8_I]]
 //
 uint8_t test_vminvq_u8(uint8x16_t a) {
   return vminvq_u8(a);
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u16
-// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i16
-// CHECK-NEXT:    ret i16 [[TMP0]]
+// CHECK-NEXT:    [[VMINVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> [[A]])
+// CHECK-NEXT:    ret i16 [[VMINVQ_U16_I]]
 //
 uint16_t test_vminvq_u16(uint16x8_t a) {
   return vminvq_u16(a);
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vminvq_u32
-// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> [[A]])
+// CHECK-NEXT:    [[VMINVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[A]])
 // CHECK-NEXT:    ret i32 [[VMINVQ_U32_I]]
 //
 uint32_t test_vminvq_u32(uint32x4_t a) {
@@ -329,9 +313,8 @@ uint32_t test_vminvq_u32(uint32x4_t a) {
 // CHECK-LABEL: define {{[^@]+}}@test_vaddv_s8
 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
-// CHECK-NEXT:    ret i8 [[TMP0]]
+// CHECK-NEXT:    [[VADDV_S8_I:%.*]] = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT:    ret i8 [[VADDV_S8_I]]
 //
 int8_t test_vaddv_s8(int8x8_t a) {
   return vaddv_s8(a);
@@ -340,9 +323,8 @@ int8_t test_vaddv_s8(int8x8_t a) {
 // CHECK-LABEL: define {{[^@]+}}@test_vaddv_s16
 // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16
-// CHECK-NEXT:    ret i16 [[TMP0]]
+// CHECK-NEXT:    [[VADDV_S16_I:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A]])
+// CHECK-NEXT:    ret i16 [[VADDV_S16_I]]
 //
 int16_t test_vaddv_s16(int16x4_t a) {
   return vaddv_s16(a);
@@ -351,9 +333,8 @@ int16_t test_vaddv_s16(int16x4_t a) {
 // CHECK-LABEL: define {{[^@]+}}@test_vaddv_u8
 // CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
-// CHECK-NEXT:    ret i8 [[TMP0]]
+// CHECK-NEXT:    [[VADDV_U8_I:%.*]] = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> [[A]])
+// CHECK-NEXT:    ret i8 [[VADDV_U8_I]]
 //
 uint8_t test_vaddv_u8(uint8x8_t a) {
   return vaddv_u8(a);
@@ -362,40 +343,37 @@ uint8_t test_vaddv_u8(uint8x8_t a) {
 // CHECK-LABEL: define {{[^@]+}}@test_vaddv_u16
 // CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16
-// CHECK-NEXT:    ret i16 [[TMP0]]
+// CHECK-NEXT:    [[VADDV_U16_I:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A]])
+// CHECK-NEXT:    ret i16 [[VADDV_U16_I]]
 //
 uint16_t test_vaddv_u16(uint16x4_t a) {
   return vaddv_u16(a);
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
-// CHECK-NEXT:    ret i8 [[TMP0]]
+// CHECK-NEXT:    [[VADDVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT:    ret i8 [[VADDVQ_S8_I]]
 //
 int8_t test_vaddvq_s8(int8x16_t a) {
   return vaddvq_s8(a);
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s16
-// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16
-// CHECK-NEXT:    ret i16 [[TMP0]]
+// CHECK-NEXT:    [[VADDVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[A]])
+// CHECK-NEXT:    ret i16 [[VADDVQ_S16_I]]
 //
 int16_t test_vaddvq_s16(int16x8_t a) {
   return vaddvq_s16(a);
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_s32
-// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> [[A]])
+// CHECK-NEXT:    [[VADDVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[A]])
 // CHECK-NEXT:    ret i32 [[VADDVQ_S32_I]]
 //
 int32_t test_vaddvq_s32(int32x4_t a) {
@@ -403,31 +381,29 @@ int32_t test_vaddvq_s32(int32x4_t a) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
-// CHECK-NEXT:    ret i8 [[TMP0]]
+// CHECK-NEXT:    [[VADDVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[A]])
+// CHECK-NEXT:    ret i8 [[VADDVQ_U8_I]]
 //
 uint8_t test_vaddvq_u8(uint8x16_t a) {
   return vaddvq_u8(a);
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u16
-// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> [[A]])
-// CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16
-// CHECK-NEXT:    ret i16 [[TMP0]]
+// CHECK-NEXT:    [[VADDVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[A]])
+// CHECK-NEXT:    ret i16 [[VADDVQ_U16_I]]
 //
 uint16_t test_vaddvq_u16(uint16x8_t a) {
   return vaddvq_u16(a);
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vaddvq_u32
-// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> [[A]])
+// CHECK-NEXT:    [[VADDVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[A]])
 // CHECK-NEXT:    ret i32 [[VADDVQ_U32_I]]
 //
 uint32_t test_vaddvq_u32(uint32x4_t a) {
@@ -435,7 +411,7 @@ uint32_t test_vaddvq_u32(uint32x4_t a) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vmaxvq_f32
-// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> [[A]])
 // CHECK-NEXT:    ret float [[VMAXVQ_F32_I]]
@@ -445,7 +421,7 @@ float32_t test_vmaxvq_f32(float32x4_t a) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vminvq_f32
-// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> [[A]])
 // CHECK-NEXT:    ret float [[VMINVQ_F32_I]]
@@ -455,7 +431,7 @@ float32_t test_vminvq_f32(float32x4_t a) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vmaxnmvq_f32
-// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> [[A]])
 // CHECK-NEXT:    ret float [[VMAXNMVQ_F32_I]]
@@ -465,7 +441,7 @@ float32_t test_vmaxnmvq_f32(float32x4_t a) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vminnmvq_f32
-// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> [[A]])
 // CHECK-NEXT:    ret float [[VMINNMVQ_F32_I]]
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c
index 035e1ca..1c628bb 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -12643,7 +12643,7 @@ uint64_t test_vqrshld_u64(uint64_t a, int64_t b) {
 // CHECK-LABEL: define dso_local i64 @test_vpaddd_s64(
 // CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[A]])
+// CHECK-NEXT:    [[VPADDD_S64_I:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[A]])
 // CHECK-NEXT:    ret i64 [[VPADDD_S64_I]]
 //
 int64_t test_vpaddd_s64(int64x2_t a) {
@@ -23227,7 +23227,7 @@ uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) {
 // CHECK-LABEL: define dso_local i64 @test_vpaddd_u64(
 // CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[A]])
+// CHECK-NEXT:    [[VPADDD_U64_I:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[A]])
 // CHECK-NEXT:    ret i64 [[VPADDD_U64_I]]
 //
 uint64_t test_vpaddd_u64(uint64x2_t a) {
@@ -23237,7 +23237,7 @@ uint64_t test_vpaddd_u64(uint64x2_t a) {
 // CHECK-LABEL: define dso_local i64 @test_vaddvq_s64(
 // CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> [[A]])
+// CHECK-NEXT:    [[VADDVQ_S64_I:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[A]])
 // CHECK-NEXT:    ret i64 [[VADDVQ_S64_I]]
 //
 int64_t test_vaddvq_s64(int64x2_t a) {
@@ -23247,7 +23247,7 @@ int64_t test_vaddvq_s64(int64x2_t a) {
 // CHECK-LABEL: define dso_local i64 @test_vaddvq_u64(
 // CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[A]])
+// CHECK-NEXT:    [[VADDVQ_U64_I:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[A]])
 // CHECK-NEXT:    ret i64 [[VADDVQ_U64_I]]
 //
 uint64_t test_vaddvq_u64(uint64x2_t a) {
@@ -23878,7 +23878,7 @@ float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {
 // CHECK-LABEL: define dso_local i32 @test_vminv_s32(
 // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> [[A]])
+// CHECK-NEXT:    [[VMINV_S32_I:%.*]] = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> [[A]])
 // CHECK-NEXT:    ret i32 [[VMINV_S32_I]]
 //
 int32_t test_vminv_s32(int32x2_t a) {
@@ -23888,7 +23888,7 @@ int32_t test_vminv_s32(int32x2_t a) {
 // CHECK-LABEL: define dso_local i32 @test_vminv_u32(
 // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> [[A]])
+// CHECK-NEXT:    [[VMINV_U32_I:%.*]] = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> [[A]])
 // CHECK-NEXT:    ret i32 [[VMINV_U32_I]]
 //
 uint32_t test_vminv_u32(uint32x2_t a) {
@@ -23898,7 +23898,7 @@ uint32_t test_vminv_u32(uint32x2_t a) {
 // CHECK-LABEL: define dso_local i32 @test_vmaxv_s32(
 // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A]])
+// CHECK-NEXT:    [[VMAXV_S32_I:%.*]] = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> [[A]])
 // CHECK-NEXT:    ret i32 [[VMAXV_S32_I]]
 //
 int32_t test_vmaxv_s32(int32x2_t a) {
@@ -23908,7 +23908,7 @@ int32_t test_vmaxv_s32(int32x2_t a) {
 // CHECK-LABEL: define dso_local i32 @test_vmaxv_u32(
 // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> [[A]])
+// CHECK-NEXT:    [[VMAXV_U32_I:%.*]] = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> [[A]])
 // CHECK-NEXT:    ret i32 [[VMAXV_U32_I]]
 //
 uint32_t test_vmaxv_u32(uint32x2_t a) {
@@ -23918,7 +23918,7 @@ uint32_t test_vmaxv_u32(uint32x2_t a) {
 // CHECK-LABEL: define dso_local i32 @test_vaddv_s32(
 // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> [[A]])
+// CHECK-NEXT:    [[VADDV_S32_I:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[A]])
 // CHECK-NEXT:    ret i32 [[VADDV_S32_I]]
 //
 int32_t test_vaddv_s32(int32x2_t a) {
@@ -23928,7 +23928,7 @@ int32_t test_vaddv_s32(int32x2_t a) {
 // CHECK-LABEL: define dso_local i32 @test_vaddv_u32(
 // CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> [[A]])
+// CHECK-NEXT:    [[VADDV_U32_I:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[A]])
 // CHECK-NEXT:    ret i32 [[VADDV_U32_I]]
 //
 uint32_t test_vaddv_u32(uint32x2_t a) {
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index 116d86f..febef46 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -645,6 +645,21 @@ __mmask16 test_mm_cmp_epi8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)_mm_cmp_epi8_mask(__a, __b, 0);
 }
 
+TEST_CONSTEXPR(_mm_cmpeq_epi8_mask(
+    ((__m128i)(__v16qi){5, 3, 7, 2, 9, 3, 7, 1, 5, 4, 8, 2, 9, 6, 7, 5}),
+    ((__m128i)(__v16qi){5, 2, 7, 3, 9, 4, 6, 1, 5, 3, 8, 1, 9, 5, 7, 5})
+) == (__mmask16)0xd595);
+
+TEST_CONSTEXPR(_mm_cmplt_epi8_mask(
+    ((__m128i)(__v16qi){1, 5, 3, 7, 2, 8, 4, 6, 9, 5, 3, 11, 2, 6, 15, 8}),
+    ((__m128i)(__v16qi){2, 4, 6, 8, 3, 5, 7, 9, 4, 6, 8, 10, 5, 7, 9, 11})
+) == (__mmask16)0xb6dd);
+
+TEST_CONSTEXPR(_mm_cmple_epi8_mask(
+    ((__m128i)(__v16qi){1, 3, 5, 7, 2, 6, 6, 8, 1, 3, 9, 7, 2, 4, 6, 10}),
+    ((__m128i)(__v16qi){2, 3, 4, 7, 3, 4, 5, 8, 2, 3, 4, 7, 3, 4, 5, 8})
+) == (__mmask16)0x3b9b);
+
 __mmask16 test_mm_mask_cmp_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   // CHECK-LABEL: test_mm_mask_cmp_epi8_mask
   // CHECK: icmp eq <16 x i8> %{{.*}}, %{{.*}}
@@ -2894,6 +2909,12 @@ __mmask16 test_mm_test_epi8_mask(__m128i __A, __m128i __B) {
   return _mm_test_epi8_mask(__A, __B); 
 }
 
+TEST_CONSTEXPR(_mm_test_epi8_mask(
+    (__m128i)(__v16qi){1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    (__m128i)(__v16qi){1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
+)
+== (__mmask16)0xfffb);
+
 __mmask16 test_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_test_epi8_mask
   // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
@@ -2901,6 +2922,12 @@ __mmask16 test_mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK: and <16 x i1> %{{.*}}, %{{.*}}
   return _mm_mask_test_epi8_mask(__U, __A, __B); 
 }
+TEST_CONSTEXPR(_mm_mask_test_epi8_mask(
+    0xFFFF,
+    (__m128i)(__v16qi){1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    (__m128i)(__v16qi){1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
+)
+== (__mmask16)0xfffb);
 
 __mmask32 test_mm256_test_epi8_mask(__m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_test_epi8_mask
@@ -2908,6 +2935,11 @@ __mmask32 test_mm256_test_epi8_mask(__m256i __A, __m256i __B) {
   // CHECK: icmp ne <32 x i8> %{{.*}}, %{{.*}}
   return _mm256_test_epi8_mask(__A, __B); 
 }
+TEST_CONSTEXPR(_mm256_test_epi8_mask(
+    (__m256i)(__v32qi){1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    (__m256i)(__v32qi){1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 4, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
+)
+== (__mmask32)0xfffbfffb);
 
 __mmask32 test_mm256_mask_test_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_test_epi8_mask
@@ -2954,6 +2986,12 @@ __mmask16 test_mm_testn_epi8_mask(__m128i __A, __m128i __B) {
   return _mm_testn_epi8_mask(__A, __B); 
 }
 
+TEST_CONSTEXPR(_mm_testn_epi8_mask(
+    (__m128i)(__v16qi){1, 2, 77, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 16, 16},
+    (__m128i)(__v16qi){2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15}
+)
+== (__mmask16)0xe001);
+
 __mmask16 test_mm_mask_testn_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_mask_testn_epi8_mask
   // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
diff --git a/clang/test/CodeGen/lto-newpm-pipeline.c b/clang/test/CodeGen/lto-newpm-pipeline.c
index ea9784a..dceaaf1 100644
--- a/clang/test/CodeGen/lto-newpm-pipeline.c
+++ b/clang/test/CodeGen/lto-newpm-pipeline.c
@@ -32,10 +32,12 @@
 // CHECK-FULL-O0-NEXT: Running pass: AlwaysInlinerPass
 // CHECK-FULL-O0-NEXT: Running analysis: ProfileSummaryAnalysis
 // CHECK-FULL-O0-NEXT: Running pass: CoroConditionalWrapper
+// CHECK-FULL-O0-NEXT: Running pass: AllocTokenPass
+// CHECK-FULL-O0-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
+// CHECK-FULL-O0-NEXT: Running analysis: TargetLibraryAnalysis
 // CHECK-FULL-O0-NEXT: Running pass: CanonicalizeAliasesPass
 // CHECK-FULL-O0-NEXT: Running pass: NameAnonGlobalPass
 // CHECK-FULL-O0-NEXT: Running pass: AnnotationRemarksPass
-// CHECK-FULL-O0-NEXT: Running analysis: TargetLibraryAnalysis
 // CHECK-FULL-O0-NEXT: Running pass: VerifierPass
 // CHECK-FULL-O0-NEXT: Running pass: BitcodeWriterPass
 
@@ -46,10 +48,12 @@
 // CHECK-THIN-O0-NEXT: Running pass: AlwaysInlinerPass
 // CHECK-THIN-O0-NEXT: Running analysis: ProfileSummaryAnalysis
 // CHECK-THIN-O0-NEXT: Running pass: CoroConditionalWrapper
+// CHECK-THIN-O0-NEXT: Running pass: AllocTokenPass
+// CHECK-THIN-O0-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
+// CHECK-THIN-O0-NEXT: Running analysis: TargetLibraryAnalysis
 // CHECK-THIN-O0-NEXT: Running pass: CanonicalizeAliasesPass
 // CHECK-THIN-O0-NEXT: Running pass: NameAnonGlobalPass
 // CHECK-THIN-O0-NEXT: Running pass: AnnotationRemarksPass
-// CHECK-THIN-O0-NEXT: Running analysis: TargetLibraryAnalysis
 // CHECK-THIN-O0-NEXT: Running pass: VerifierPass
 // CHECK-THIN-O0-NEXT: Running pass: ThinLTOBitcodeWriterPass
 
diff --git a/clang/test/CodeGenCXX/alloc-token-builtin.cpp b/clang/test/CodeGenCXX/alloc-token-builtin.cpp
new file mode 100644
index 0000000..adadf7b
--- /dev/null
+++ b/clang/test/CodeGenCXX/alloc-token-builtin.cpp
@@ -0,0 +1,97 @@
+// To test IR generation of the builtin without evaluating the LLVM intrinsic,
+// we set the mode to a stateful mode, which prohibits constant evaluation.
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -Werror -std=c++20 -emit-llvm -falloc-token-mode=random -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-CODEGEN
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -Werror -std=c++20 -emit-llvm -falloc-token-max=2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LOWER
+
+extern "C" void *my_malloc(unsigned long, unsigned long);
+
+struct NoPtr {
+  int x;
+  long y;
+};
+
+struct WithPtr {
+  int a;
+  char *buf;
+};
+
+int unevaluated_fn();
+
+// CHECK-LABEL: @_Z16test_builtin_intv(
+// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_INT:[0-9]+]])
+// CHECK-LOWER: ret i64 0
+unsigned long test_builtin_int() {
+  return __builtin_infer_alloc_token(sizeof(1));
+}
+
+// CHECK-LABEL: @_Z16test_builtin_ptrv(
+// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_PTR:[0-9]+]])
+// CHECK-LOWER: ret i64 1
+unsigned long test_builtin_ptr() {
+  return __builtin_infer_alloc_token(sizeof(int *));
+}
+
+// CHECK-LABEL: @_Z25test_builtin_struct_noptrv(
+// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_NOPTR:[0-9]+]])
+// CHECK-LOWER: ret i64 0
+unsigned long test_builtin_struct_noptr() {
+  return __builtin_infer_alloc_token(sizeof(NoPtr));
+}
+
+// CHECK-LABEL: @_Z25test_builtin_struct_w_ptrv(
+// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_WITHPTR:[0-9]+]])
+// CHECK-LOWER: ret i64 1
+unsigned long test_builtin_struct_w_ptr() {
+  return __builtin_infer_alloc_token(sizeof(WithPtr), 123);
+}
+
+// CHECK-LABEL: @_Z24test_builtin_unevaluatedv(
+// CHECK-NOT: call{{.*}}unevaluated_fn
+// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_INT:[0-9]+]])
+// CHECK-LOWER: ret i64 0
+unsigned long test_builtin_unevaluated() {
+	return __builtin_infer_alloc_token(sizeof(int) * unevaluated_fn());
+}
+
+// CHECK-LABEL: @_Z36test_builtin_unsequenced_unevaluatedi(
+// CHECK:     add nsw
+// CHECK-NOT: add nsw
+// CHECK-CODEGEN: %[[REG:[0-9]+]] = call i64 @llvm.alloc.token.id.i64(metadata ![[META_UNKNOWN:[0-9]+]])
+// CHECK-CODEGEN: call{{.*}}@my_malloc({{.*}}, i64 noundef %[[REG]])
+// CHECK-LOWER: call{{.*}}@my_malloc({{.*}}, i64 noundef 0)
+void test_builtin_unsequenced_unevaluated(int x) {
+  my_malloc(++x, __builtin_infer_alloc_token(++x));
+}
+
+// CHECK-LABEL: @_Z20test_builtin_unknownv(
+// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_UNKNOWN:[0-9]+]])
+// CHECK-LOWER: ret i64 0
+unsigned long test_builtin_unknown() {
+  return __builtin_infer_alloc_token(4096);
+}
+
+// Test template instantiation.
+template <typename T>
+constexpr unsigned long get_token() {
+  return __builtin_infer_alloc_token(sizeof(T));
+}
+
+// CHECK-LABEL: @_Z13get_token_intv()
+// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_INT]])
+// CHECK-LOWER: ret i64 0
+unsigned long get_token_int() {
+  return get_token<int>();
+}
+
+// CHECK-LABEL: @_Z13get_token_ptrv()
+// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_PTR]])
+// CHECK-LOWER: ret i64 1
+unsigned long get_token_ptr() {
+  return get_token<int *>();
+}
+
+// CHECK-CODEGEN: ![[META_INT]] = !{!"int", i1 false}
+// CHECK-CODEGEN: ![[META_PTR]] = !{!"int *", i1 true}
+// CHECK-CODEGEN: ![[META_NOPTR]] = !{!"NoPtr", i1 false}
+// CHECK-CODEGEN: ![[META_WITHPTR]] = !{!"WithPtr", i1 true}
+// CHECK-CODEGEN: ![[META_UNKNOWN]] = !{}
diff --git a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
index 2e7531b..4be1cb3 100644
--- a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
+++ b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
@@ -19,7 +19,7 @@ using i4x3x3 = _BitInt(4) __attribute__((matrix_type(3, 3)));
 // CHECK-NEXT:    store i32 [[A_COERCE]], ptr [[A]], align 4
 // CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x i8>, ptr [[A]], align 4
 // CHECK-NEXT:    [[A1:%.*]] = shufflevector <4 x i8> [[LOADVECN]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A1]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A1]], <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    store <4 x i8> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[LOADVECN2:%.*]] = load <4 x i8>, ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[EXTRACTVEC3:%.*]] = shufflevector <4 x i8> [[LOADVECN2]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
@@ -38,7 +38,7 @@ i8x3 v1(i8x3 a) {
 // CHECK-SAME: <3 x i32> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <3 x i32>, align 16
-// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    store <4 x i32> [[EXTRACTVEC]], ptr [[A_ADDR]], align 16
 // CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
 // CHECK-NEXT:    [[EXTRACTVEC1:%.*]] = shufflevector <4 x i32> [[LOADVECN]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
@@ -57,7 +57,7 @@ i32x3 v2(i32x3 a) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <3 x i512>, align 256
 // CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x i512>, ptr [[TMP0]], align 256
 // CHECK-NEXT:    [[A:%.*]] = shufflevector <4 x i512> [[LOADVECN]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i512> [[A]], <3 x i512> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i512> [[A]], <3 x i512> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    store <4 x i512> [[EXTRACTVEC]], ptr [[A_ADDR]], align 256
 // CHECK-NEXT:    [[LOADVECN1:%.*]] = load <4 x i512>, ptr [[A_ADDR]], align 256
 // CHECK-NEXT:    [[EXTRACTVEC2:%.*]] = shufflevector <4 x i512> [[LOADVECN1]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
@@ -80,7 +80,7 @@ i512x3 v3(i512x3 a) {
 // CHECK-NEXT:    store i32 [[A_COERCE]], ptr [[A]], align 4
 // CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 4
 // CHECK-NEXT:    [[A1:%.*]] = shufflevector <4 x i4> [[LOADVECN]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[EXTRACTVEC3:%.*]] = shufflevector <4 x i4> [[LOADVECN2]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveMin.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveMin.hlsl
new file mode 100644
index 0000000..1194f84
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveMin.hlsl
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
+// RUN:   dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN:   FileCheck %s --check-prefixes=CHECK,CHECK-DXIL
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
+// RUN:   spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN:   FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV
+
+// Test basic lowering to runtime function call.
+
+// CHECK-LABEL: test_int
+int test_int(int expr) {
+  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.reduce.min.i32([[TY]] %[[#]])
+  // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.reduce.min.i32([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveMin(expr);
+}
+
+// CHECK-DXIL: declare [[TY]] @llvm.dx.wave.reduce.min.i32([[TY]]) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.reduce.min.i32([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint64_t
+uint64_t test_uint64_t(uint64_t expr) {
+  // CHECK-SPIRV:  %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.reduce.umin.i64([[TY]] %[[#]])
+  // CHECK-DXIL:  %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.reduce.umin.i64([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveMin(expr);
+}
+
+// CHECK-DXIL: declare [[TY]] @llvm.dx.wave.reduce.umin.i64([[TY]]) #[[#attr:]]
+// CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.reduce.umin.i64([[TY]]) #[[#attr:]]
+
+// Test basic lowering to runtime function call with array and float value.
+
+// CHECK-LABEL: test_floatv4
+float4 test_floatv4(float4 expr) {
+  // CHECK-SPIRV:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn spir_func [[TY1:.*]] @llvm.spv.wave.reduce.min.v4f32([[TY1]] %[[#]]
+  // CHECK-DXIL:  %[[RET1:.*]] = call reassoc nnan ninf nsz arcp afn [[TY1:.*]] @llvm.dx.wave.reduce.min.v4f32([[TY1]] %[[#]])
+  // CHECK:  ret [[TY1]] %[[RET1]]
+  return WaveActiveMin(expr);
+}
+
+// CHECK-DXIL: declare [[TY1]] @llvm.dx.wave.reduce.min.v4f32([[TY1]]) #[[#attr]]
+// CHECK-SPIRV: declare [[TY1]] @llvm.spv.wave.reduce.min.v4f32([[TY1]]) #[[#attr]]
+
+// CHECK: attributes #[[#attr]] = {{{.*}} convergent {{.*}}}
+
diff --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl b/clang/test/CodeGenOpenCL/preserve_vec3.cl
index e76aa81..0017169 100644
--- a/clang/test/CodeGenOpenCL/preserve_vec3.cl
+++ b/clang/test/CodeGenOpenCL/preserve_vec3.cl
@@ -12,7 +12,7 @@ typedef float float4 __attribute__((ext_vector_type(4)));
 // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type [[META10:![0-9]+]] !kernel_arg_type_qual [[META11:![0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16
-// CHECK-NEXT:    [[EXTRACTVEC1_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[EXTRACTVEC1_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> <float undef, float poison, float poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    store <4 x float> [[EXTRACTVEC1_I]], ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA12:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
@@ -24,7 +24,7 @@ void kernel foo(global float3 *a, global float3 *b) {
 // CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[A:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META14:![0-9]+]] !kernel_arg_type_qual [[META11]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[CHAR_TBAA12]]
-// CHECK-NEXT:    [[EXTRACTVEC_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[EXTRACTVEC_I:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> <float undef, float poison, float poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 16, !tbaa [[CHAR_TBAA12]]
 // CHECK-NEXT:    ret void
 //
@@ -60,7 +60,7 @@ void kernel float3_to_double2(global float3 *a, global double2 *b) {
 // CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], ptr addrspace(1) noundef readonly align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META18:![0-9]+]] !kernel_arg_type_qual [[META11]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[CHAR_TBAA12]]
-// CHECK-NEXT:    [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x i16> <i16 undef, i16 poison, i16 poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(1) [[A]], align 8, !tbaa [[CHAR_TBAA12]]
 // CHECK-NEXT:    ret void
 //
@@ -71,8 +71,8 @@ void kernel char8_to_short3(global short3 *a, global char8 *b) {
 // CHECK-LABEL: define dso_local spir_func void @from_char3(
 // CHECK-SAME: <3 x i8> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[INT_TBAA3:![0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i8> [[A]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x i8> [[TMP0]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[INT_TBAA3:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void from_char3(char3 a, global int *out) {
@@ -82,8 +82,8 @@ void from_char3(char3 a, global int *out) {
 // CHECK-LABEL: define dso_local spir_func void @from_short3(
 // CHECK-SAME: <3 x i16> noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[A]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[LONG_TBAA19:![0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i16> [[A]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    store <4 x i16> [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[LONG_TBAA19:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void from_short3(short3 a, global long *out) {
@@ -94,7 +94,8 @@ void from_short3(short3 a, global long *out) {
 // CHECK-SAME: i32 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32 [[A]] to <4 x i8>
-// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[ASTYPE:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[ASTYPE]], <3 x i8> <i8 undef, i8 poison, i8 poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    store <4 x i8> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 4, !tbaa [[CHAR_TBAA12]]
 // CHECK-NEXT:    ret void
 //
@@ -106,7 +107,8 @@ void scalar_to_char3(int a, global char3 *out) {
 // CHECK-SAME: i64 noundef [[A:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64 [[A]] to <4 x i16>
-// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[ASTYPE:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[ASTYPE]], <3 x i16> <i16 undef, i16 poison, i16 poison>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[CHAR_TBAA12]]
 // CHECK-NEXT:    ret void
 //
diff --git a/clang/test/DebugInfo/Generic/bit-int.c b/clang/test/DebugInfo/Generic/bit-int.c
new file mode 100644
index 0000000..88ecc13
--- /dev/null
+++ b/clang/test/DebugInfo/Generic/bit-int.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -x c++ %s -debug-info-kind=standalone -gno-column-info -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -x c   %s -debug-info-kind=standalone -gno-column-info -emit-llvm -o - | FileCheck %s
+
+unsigned _BitInt(17) a;
+_BitInt(2) b;
+
+// CHECK: !DIBasicType(name: "_BitInt(2)", size: 8, dataSize: 2, encoding: DW_ATE_signed)
+// CHECK: !DIBasicType(name: "unsigned _BitInt(17)", size: 32,  dataSize: 17, encoding: DW_ATE_unsigned)
diff --git a/clang/test/Driver/aarch64-mlr-for-calls-only.c b/clang/test/Driver/aarch64-mlr-for-calls-only.c
new file mode 100644
index 0000000..e71a4cd
--- /dev/null
+++ b/clang/test/Driver/aarch64-mlr-for-calls-only.c
@@ -0,0 +1,3 @@
+// RUN: %clang --target=aarch64-none-gnu -mlr-for-calls-only -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK < %t %s
+// CHECK: "-target-feature" "+reserve-lr-for-ra"
diff --git a/clang/test/Driver/aarch64-ptrauth.c b/clang/test/Driver/aarch64-ptrauth.c
index b080a77..a67e98f 100644
--- a/clang/test/Driver/aarch64-ptrauth.c
+++ b/clang/test/Driver/aarch64-ptrauth.c
@@ -4,7 +4,8 @@
 // NONE:     "-cc1"
 // NONE-NOT: "-fptrauth-
 
-// RUN: %clang -### -c --target=aarch64 \
+//// -fptauth-* driver flags on Linux are only supported with pauthtest ABI.
+// RUN: %clang -### -c --target=aarch64-linux -mabi=pauthtest \
 // RUN:   -fno-ptrauth-intrinsics -fptrauth-intrinsics \
 // RUN:   -fno-ptrauth-calls -fptrauth-calls \
 // RUN:   -fno-ptrauth-returns -fptrauth-returns \
@@ -15,9 +16,43 @@
 // RUN:   -fno-ptrauth-indirect-gotos -fptrauth-indirect-gotos \
 // RUN:   -fno-ptrauth-init-fini -fptrauth-init-fini \
 // RUN:   -fno-ptrauth-init-fini-address-discrimination -fptrauth-init-fini-address-discrimination \
+// RUN:   -fno-ptrauth-elf-got -fptrauth-elf-got \
 // RUN:   -fno-aarch64-jump-table-hardening -faarch64-jump-table-hardening \
-// RUN:   %s 2>&1 | FileCheck %s --check-prefix=ALL
-// ALL: "-cc1"{{.*}} "-fptrauth-intrinsics" "-fptrauth-calls" "-fptrauth-returns" "-fptrauth-auth-traps" "-fptrauth-vtable-pointer-address-discrimination" "-fptrauth-vtable-pointer-type-discrimination" "-fptrauth-type-info-vtable-pointer-discrimination" "-fptrauth-indirect-gotos" "-fptrauth-init-fini" "-fptrauth-init-fini-address-discrimination" "-faarch64-jump-table-hardening"
+// RUN:   %s 2>&1 | FileCheck %s --check-prefix=ALL-LINUX-PAUTHABI
+// RUN: %clang -### -c --target=aarch64-linux-pauthtest \
+// RUN:   -fno-ptrauth-intrinsics -fptrauth-intrinsics \
+// RUN:   -fno-ptrauth-calls -fptrauth-calls \
+// RUN:   -fno-ptrauth-returns -fptrauth-returns \
+// RUN:   -fno-ptrauth-auth-traps -fptrauth-auth-traps \
+// RUN:   -fno-ptrauth-vtable-pointer-address-discrimination -fptrauth-vtable-pointer-address-discrimination \
+// RUN:   -fno-ptrauth-vtable-pointer-type-discrimination -fptrauth-vtable-pointer-type-discrimination \
+// RUN:   -fno-ptrauth-type-info-vtable-pointer-discrimination -fptrauth-type-info-vtable-pointer-discrimination \
+// RUN:   -fno-ptrauth-indirect-gotos -fptrauth-indirect-gotos \
+// RUN:   -fno-ptrauth-init-fini -fptrauth-init-fini \
+// RUN:   -fno-ptrauth-init-fini-address-discrimination -fptrauth-init-fini-address-discrimination \
+// RUN:   -fno-ptrauth-elf-got -fptrauth-elf-got \
+// RUN:   -fno-aarch64-jump-table-hardening -faarch64-jump-table-hardening \
+// RUN:   %s 2>&1 | FileCheck %s --check-prefix=ALL-LINUX-PAUTHABI
+// ALL-LINUX-PAUTHABI: "-cc1"{{.*}} "-fptrauth-intrinsics" "-fptrauth-calls" "-fptrauth-returns" "-fptrauth-auth-traps" "-fptrauth-vtable-pointer-address-discrimination" "-fptrauth-vtable-pointer-type-discrimination" "-fptrauth-type-info-vtable-pointer-discrimination" "-fptrauth-indirect-gotos" "-fptrauth-init-fini" "-fptrauth-init-fini-address-discrimination" "-fptrauth-elf-got"{{.*}} "-faarch64-jump-table-hardening"
+
+// RUN: %clang -### -c --target=aarch64-linux \
+// RUN:   -fno-aarch64-jump-table-hardening -faarch64-jump-table-hardening \
+// RUN:   %s 2>&1 | FileCheck %s --check-prefix=ALL-LINUX
+// ALL-LINUX: "-cc1"{{.*}} "-faarch64-jump-table-hardening"
+
+//// Some -fptrauth-* flags are supported for ARM64 Darwin.
+// RUN: %clang -### -c --target=arm64-darwin \
+// RUN:   -fno-ptrauth-intrinsics -fptrauth-intrinsics \
+// RUN:   -fno-ptrauth-calls -fptrauth-calls \
+// RUN:   -fno-ptrauth-returns -fptrauth-returns \
+// RUN:   -fno-ptrauth-auth-traps -fptrauth-auth-traps \
+// RUN:   -fno-ptrauth-vtable-pointer-address-discrimination -fptrauth-vtable-pointer-address-discrimination \
+// RUN:   -fno-ptrauth-vtable-pointer-type-discrimination -fptrauth-vtable-pointer-type-discrimination \
+// RUN:   -fno-ptrauth-type-info-vtable-pointer-discrimination -fptrauth-type-info-vtable-pointer-discrimination \
+// RUN:   -fno-ptrauth-indirect-gotos -fptrauth-indirect-gotos \
+// RUN:   -fno-aarch64-jump-table-hardening -faarch64-jump-table-hardening \
+// RUN:   %s 2>&1 | FileCheck %s --check-prefix=ALL-DARWIN
+// ALL-DARWIN: "-cc1"{{.*}} "-fptrauth-intrinsics" "-fptrauth-calls" "-fptrauth-returns" "-fptrauth-auth-traps" "-fptrauth-vtable-pointer-address-discrimination" "-fptrauth-vtable-pointer-type-discrimination" "-fptrauth-type-info-vtable-pointer-discrimination" "-fptrauth-indirect-gotos"{{.*}} "-faarch64-jump-table-hardening"
 
 // RUN: %clang -### -c --target=aarch64-linux -mabi=pauthtest %s 2>&1 | FileCheck %s --check-prefix=PAUTHABI1
 // RUN: %clang -### -c --target=aarch64-linux-pauthtest %s 2>&1 | FileCheck %s --check-prefix=PAUTHABI1
@@ -40,7 +75,7 @@
 // RUN:   -fno-aarch64-jump-table-hardening %s 2>&1 | FileCheck %s --check-prefix=PAUTHABI2
 
 //// Non-linux OS: pauthtest ABI has no effect in terms of passing ptrauth cc1 flags.
-//// An error about unsupported ABI will be emitted later in pipeline (see ERR2 below)
+//// An error about unsupported ABI will be emitted later in pipeline (see ERR3 below)
 // RUN: %clang -### -c --target=aarch64 -mabi=pauthtest %s 2>&1 | FileCheck %s --check-prefix=PAUTHABI2
 
 // PAUTHABI2:      "-cc1"
@@ -55,10 +90,11 @@
 // PAUTHABI3-NOT:  "-fptrauth-
 // PAUTHABI3-NOT: "-faarch64-jump-table-hardening"
 
-// RUN: not %clang -### -c --target=x86_64 -fptrauth-intrinsics -fptrauth-calls -fptrauth-returns -fptrauth-auth-traps \
+//// Non-pauthtest ABI.
+// RUN: not %clang -### -c --target=aarch64-linux -fptrauth-intrinsics -fptrauth-calls -fptrauth-returns -fptrauth-auth-traps \
 // RUN:   -fptrauth-vtable-pointer-address-discrimination -fptrauth-vtable-pointer-type-discrimination \
 // RUN:   -fptrauth-type-info-vtable-pointer-discrimination -fptrauth-indirect-gotos -fptrauth-init-fini \
-// RUN:   -fptrauth-init-fini-address-discrimination -faarch64-jump-table-hardening %s 2>&1 | FileCheck %s --check-prefix=ERR1
+// RUN:   -fptrauth-init-fini-address-discrimination -fptrauth-elf-got %s 2>&1 | FileCheck %s --check-prefix=ERR1
 // ERR1:      error: unsupported option '-fptrauth-intrinsics' for target '{{.*}}'
 // ERR1-NEXT: error: unsupported option '-fptrauth-calls' for target '{{.*}}'
 // ERR1-NEXT: error: unsupported option '-fptrauth-returns' for target '{{.*}}'
@@ -69,59 +105,64 @@
 // ERR1-NEXT: error: unsupported option '-fptrauth-indirect-gotos' for target '{{.*}}'
 // ERR1-NEXT: error: unsupported option '-fptrauth-init-fini' for target '{{.*}}'
 // ERR1-NEXT: error: unsupported option '-fptrauth-init-fini-address-discrimination' for target '{{.*}}'
-// ERR1-NEXT: error: unsupported option '-faarch64-jump-table-hardening' for target '{{.*}}'
+// ERR1-NEXT: error: unsupported option '-fptrauth-elf-got' for target '{{.*}}'
 
+//// Non-AArch64.
+// RUN: not %clang -### -c --target=x86_64-linux -faarch64-jump-table-hardening %s 2>&1 | FileCheck %s --check-prefix=ERR2
+// ERR2: error: unsupported option '-faarch64-jump-table-hardening' for target '{{.*}}'
+
+//// Only support PAuth ABI for Linux as for now.
+// RUN: not %clang -c --target=aarch64 -mabi=pauthtest %s 2>&1 | FileCheck %s --check-prefix=ERR3
+// ERR3: error: unknown target ABI 'pauthtest'
 
-// RUN: not %clang -c --target=aarch64 -mabi=pauthtest %s 2>&1 | FileCheck %s --check-prefix=ERR2
 //// The ABI is not specified explicitly, and for non-Linux pauthtest environment does not correspond
 //// to pauthtest ABI (each OS target defines this behavior separately). Do not emit an error.
-// RUN:     %clang -c --target=aarch64-pauthtest       %s -o /dev/null
-// ERR2: error: unknown target ABI 'pauthtest'
+// RUN: %clang -c --target=aarch64-pauthtest %s -o /dev/null
 
 //// PAuth ABI is encoded as environment part of the triple, so don't allow to explicitly set other environments.
-// RUN: not %clang -### -c --target=aarch64-linux-gnu -mabi=pauthtest %s 2>&1 | FileCheck %s --check-prefix=ERR3
-// ERR3: error: unsupported option '-mabi=pauthtest' for target 'aarch64-unknown-linux-gnu'
+// RUN: not %clang -### -c --target=aarch64-linux-gnu -mabi=pauthtest %s 2>&1 | FileCheck %s --check-prefix=ERR4
+// ERR4: error: unsupported option '-mabi=pauthtest' for target 'aarch64-unknown-linux-gnu'
 // RUN: %clang -### -c --target=aarch64-linux-pauthtest -mabi=pauthtest %s
 
 //// The only branch protection option compatible with PAuthABI is BTI.
 // RUN: not %clang -### -c --target=aarch64-linux -mabi=pauthtest -mbranch-protection=pac-ret %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR4_1
+// RUN:   FileCheck %s --check-prefix=ERR5_1
 // RUN: not %clang -### -c --target=aarch64-linux-pauthtest       -mbranch-protection=pac-ret %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR4_1
+// RUN:   FileCheck %s --check-prefix=ERR5_1
 // RUN: not %clang -### -c --target=aarch64 -fptrauth-returns     -mbranch-protection=pac-ret %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR4_2
-// ERR4_1: error: unsupported option '-mbranch-protection=pac-ret' for target 'aarch64-unknown-linux-pauthtest'
-// ERR4_2: error: the combination of '-mbranch-protection=pac-ret' and '-fptrauth-returns' is incompatible
+// RUN:   FileCheck %s --check-prefix=ERR5_2
+// ERR5_1: error: unsupported option '-mbranch-protection=pac-ret' for target 'aarch64-unknown-linux-pauthtest'
+// ERR5_2: error: the combination of '-mbranch-protection=pac-ret' and '-fptrauth-returns' is incompatible
 
 // RUN: not %clang -### -c --target=aarch64-linux -mabi=pauthtest -mbranch-protection=gcs %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR5_1
+// RUN:   FileCheck %s --check-prefix=ERR6_1
 // RUN: not %clang -### -c --target=aarch64-linux-pauthtest       -mbranch-protection=gcs %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR5_1
+// RUN:   FileCheck %s --check-prefix=ERR6_1
 // RUN: not %clang -### -c --target=aarch64 -fptrauth-returns     -mbranch-protection=gcs %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR5_2
-// ERR5_1: error: unsupported option '-mbranch-protection=gcs' for target 'aarch64-unknown-linux-pauthtest'
-// ERR5_2: error: the combination of '-mbranch-protection=gcs' and '-fptrauth-returns' is incompatible
+// RUN:   FileCheck %s --check-prefix=ERR6_2
+// ERR6_1: error: unsupported option '-mbranch-protection=gcs' for target 'aarch64-unknown-linux-pauthtest'
+// ERR6_2: error: the combination of '-mbranch-protection=gcs' and '-fptrauth-returns' is incompatible
 
 // RUN: not %clang -### -c --target=aarch64-linux -mabi=pauthtest -mbranch-protection=standard %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR6_1
+// RUN:   FileCheck %s --check-prefix=ERR7_1
 // RUN: not %clang -### -c --target=aarch64-linux-pauthtest       -mbranch-protection=standard %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR6_1
+// RUN:   FileCheck %s --check-prefix=ERR7_1
 // RUN: not %clang -### -c --target=aarch64 -fptrauth-returns     -mbranch-protection=standard %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR6_2
-// ERR6_1: error: unsupported option '-mbranch-protection=standard' for target 'aarch64-unknown-linux-pauthtest'
-// ERR6_2: error: the combination of '-mbranch-protection=standard' and '-fptrauth-returns' is incompatible
+// RUN:   FileCheck %s --check-prefix=ERR7_2
+// ERR7_1: error: unsupported option '-mbranch-protection=standard' for target 'aarch64-unknown-linux-pauthtest'
+// ERR7_2: error: the combination of '-mbranch-protection=standard' and '-fptrauth-returns' is incompatible
 
 // RUN: not %clang -### -c --target=aarch64-linux -mabi=pauthtest -msign-return-address=all %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR7
+// RUN:   FileCheck %s --check-prefix=ERR8
 // RUN: not %clang -### -c --target=aarch64-linux-pauthtest       -msign-return-address=all %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR7
-// ERR7: error: unsupported option '-msign-return-address=all' for target 'aarch64-unknown-linux-pauthtest'
+// RUN:   FileCheck %s --check-prefix=ERR8
+// ERR8: error: unsupported option '-msign-return-address=all' for target 'aarch64-unknown-linux-pauthtest'
 
 // RUN: not %clang -### -c --target=aarch64-linux -mabi=pauthtest -msign-return-address=non-leaf %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR8
+// RUN:   FileCheck %s --check-prefix=ERR9
 // RUN: not %clang -### -c --target=aarch64-linux-pauthtest       -msign-return-address=non-leaf %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR8
-// ERR8: error: unsupported option '-msign-return-address=non-leaf' for target 'aarch64-unknown-linux-pauthtest'
+// RUN:   FileCheck %s --check-prefix=ERR9
+// ERR9: error: unsupported option '-msign-return-address=non-leaf' for target 'aarch64-unknown-linux-pauthtest'
 
 // RUN: %clang -### -c --target=aarch64-linux -mabi=pauthtest -msign-return-address=none %s
 // RUN: %clang -### -c --target=aarch64-linux-pauthtest       -msign-return-address=none %s
diff --git a/clang/test/Driver/fuchsia.c b/clang/test/Driver/fuchsia.c
index d0fec18..99e5018 100644
--- a/clang/test/Driver/fuchsia.c
+++ b/clang/test/Driver/fuchsia.c
@@ -130,6 +130,11 @@
 // RUN:     -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
 // RUN:     -fuse-ld=ld \
 // RUN:     | FileCheck %s -check-prefix=CHECK-SAFESTACK
+// RUN: %clang -### %s --target=x86_64-unknown-fuchsia -m32 \
+// RUN:     -fsanitize=safe-stack 2>&1 \
+// RUN:     -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
+// RUN:     -fuse-ld=ld \
+// RUN:     | FileCheck %s -check-prefix=CHECK-SAFESTACK
 // CHECK-SAFESTACK: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-SAFESTACK: "-fsanitize=safe-stack"
 // CHECK-SAFESTACK-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-fuchsia{{/|\\\\}}libclang_rt.safestack.a"
diff --git a/clang/test/Frontend/aarch64-ignore-branch-protection-attribute.c b/clang/test/Frontend/aarch64-ignore-branch-protection-attribute.c
index 32cc98d..e6605ce 100644
--- a/clang/test/Frontend/aarch64-ignore-branch-protection-attribute.c
+++ b/clang/test/Frontend/aarch64-ignore-branch-protection-attribute.c
@@ -1,7 +1,11 @@
 // REQUIRES: aarch64-registered-target
 
-// RUN: %clang -target aarch64-linux-pauthtest   %s -S -emit-llvm -o - 2>&1 | FileCheck --implicit-check-not=warning: %s
-// RUN: %clang -target aarch64 -fptrauth-returns %s -S -emit-llvm -o - 2>&1 | FileCheck --implicit-check-not=warning: %s
+// RUN:     %clang -target aarch64-linux-pauthtest   %s -S -emit-llvm -o - 2>&1 | FileCheck --implicit-check-not=warning: %s
+// RUN: not %clang -target aarch64 -fptrauth-returns %s -S -emit-llvm -o - 2>&1 | FileCheck --implicit-check-not=warning: --check-prefix=PTRAUTH-RETURNS %s
+
+// Clang fails early, no LLVM IR output produced.
+// PTRAUTH-RETURNS: clang: error: unsupported option '-fptrauth-returns' for target 'aarch64'
+// PTRAUTH-RETURNS-NOT: attributes
 
 /// Unsupported with pauthtest, warning emitted
 __attribute__((target("branch-protection=pac-ret"))) void f1() {}
diff --git a/clang/test/Frontend/diag-wrap-colors.cpp b/clang/test/Frontend/diag-wrap-colors.cpp
new file mode 100644
index 0000000..e3dccb1
--- /dev/null
+++ b/clang/test/Frontend/diag-wrap-colors.cpp
@@ -0,0 +1,6 @@
+// RUN: not %clang_cc1 %s -fmessage-length=50 -fcolor-diagnostics -fno-show-source-location -o - 2>&1 | FileCheck %s
+
+struct F {
+  float a : 10;
+};
+// CHECK: bit-field 'a' has non-integral type 'float'
diff --git a/clang/test/Interpreter/pretty-print.c b/clang/test/Interpreter/pretty-print.c
index d0712fb..9a7bf75 100644
--- a/clang/test/Interpreter/pretty-print.c
+++ b/clang/test/Interpreter/pretty-print.c
@@ -78,14 +78,16 @@ int * null_ptr = (int*)0; null_ptr
 union U { int I; float F; } u; u.I = 12; u.I
 // CHECK-NEXT: (int) 12
 
-// TODO: _Bool, _Complex, _Atomic, and _BitInt
-// struct S1{} s1; s1
-// TODO-CHECK-NEXT: (S1 &) @0x{{[0-9a-f]+}}
+struct S1{} s1; s1
+// CHECK-NEXT: (S1 &) @0x{{[0-9a-f]+}}
+
+struct S2 {int d;} E = {22}; E
+// CHECK-NEXT: (S2 &) @0x{{[0-9a-f]+}}
 
-// struct S2 {int d;} E = {22}; E
-// TODO-CHECK-NEXT: (struct S2 &) @0x{{[0-9a-f]+}}
-// E.d
-// TODO-CHECK-NEXT: (int) 22
+E.d
+// CHECK-NEXT: (int) 22
+
+// TODO: _Bool, _Complex, _Atomic, and _BitInt
 
 // -----------------------------------------------------------------------------
 // Tentative definition handling (C99 6.9.2)
diff --git a/clang/test/OpenMP/task_ast_print.cpp b/clang/test/OpenMP/task_ast_print.cpp
index 30fb7ab..b059f18 100644
--- a/clang/test/OpenMP/task_ast_print.cpp
+++ b/clang/test/OpenMP/task_ast_print.cpp
@@ -1,8 +1,10 @@
 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify -Wno-vla %s -ast-print | FileCheck %s
 
 // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60
 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -verify -Wno-vla %s -ast-print | FileCheck %s
 // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -ast-dump  %s | FileCheck %s --check-prefix=DUMP
@@ -101,8 +103,8 @@ T tmain(T argc, T *argv) {
   a = 2;
 #pragma omp task default(none), private(argc, b) firstprivate(argv) shared(d) if (argc > 0) final(S<T>::TS > 0) priority(argc) affinity(argc, argv[b:argc], arr[:], ([argc][sizeof(T)])argv)
   foo();
-#pragma omp taskgroup task_reduction(-: argc)
-#pragma omp task if (C) mergeable priority(C) in_reduction(-: argc)
+#pragma omp taskgroup task_reduction(+: argc)
+#pragma omp task if (C) mergeable priority(C) in_reduction(+: argc)
   foo();
   return 0;
 }
@@ -119,8 +121,8 @@ T tmain(T argc, T *argv) {
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<T>::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(T)])argv)
 // CHECK-NEXT: foo()
-// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc)
-// CHECK-NEXT: #pragma omp task if(C) mergeable priority(C) in_reduction(-: argc)
+// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc)
+// CHECK-NEXT: #pragma omp task if(C) mergeable priority(C) in_reduction(+: argc)
 // CHECK-NEXT: foo()
 // CHECK: template<> int tmain<int, 5>(int argc, int *argv) {
 // CHECK-NEXT: int b = argc, c, d, e, f, g;
@@ -134,8 +136,8 @@ T tmain(T argc, T *argv) {
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<int>::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(int)])argv)
 // CHECK-NEXT: foo()
-// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc)
-// CHECK-NEXT: #pragma omp task if(5) mergeable priority(5) in_reduction(-: argc)
+// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc)
+// CHECK-NEXT: #pragma omp task if(5) mergeable priority(5) in_reduction(+: argc)
 // CHECK-NEXT: foo()
 // CHECK: template<> long tmain<long, 1>(long argc, long *argv) {
 // CHECK-NEXT: long b = argc, c, d, e, f, g;
@@ -149,8 +151,8 @@ T tmain(T argc, T *argv) {
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<long>::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(long)])argv)
 // CHECK-NEXT: foo()
-// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc)
-// CHECK-NEXT: #pragma omp task if(1) mergeable priority(1) in_reduction(-: argc)
+// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc)
+// CHECK-NEXT: #pragma omp task if(1) mergeable priority(1) in_reduction(+: argc)
 // CHECK-NEXT: foo()
 
 enum Enum {};
@@ -199,6 +201,14 @@ int main(int argc, char **argv) {
 #pragma omp task depend(inout: omp_all_memory)
   foo();
   // CHECK-NEXT: foo();
+#ifdef OMP60
+#pragma omp task threadset(omp_pool)
+#pragma omp task threadset(omp_team)
+  foo();
+#endif
+  // CHECK60: #pragma omp task threadset(omp_pool)
+  // CHECK60: #pragma omp task threadset(omp_team)
+  // CHECK60-NEXT: foo();
   return tmain<int, 5>(b, &b) + tmain<long, 1>(x, &x);
 }
 
diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp
index c3e6d9e6b..ba8e694 100644
--- a/clang/test/OpenMP/task_codegen.cpp
+++ b/clang/test/OpenMP/task_codegen.cpp
@@ -41,6 +41,9 @@
 // RUN: -emit-llvm -o - -DOMP51 | FileCheck %s \
 // RUN: --implicit-check-not="{{__kmpc|__tgt}}"
 
+// RUN: %clang_cc1 -verify -Wno-vla -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK6
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK6
 
 // expected-no-diagnostics
 #ifndef HEADER
@@ -65,6 +68,7 @@ struct S {
   S(const S &s) : a(s.a) {}
   ~S() {}
 };
+
 int a;
 int main() {
   char b;
@@ -147,6 +151,7 @@ int main() {
 
 
 
+
 // s1 = S();
 
 
@@ -215,6 +220,19 @@ void test_omp_all_memory()
   }
 }
 #endif // OMP51
+
+#ifdef OMP60
+void test_threadset()
+{
+#pragma omp task threadset(omp_team)
+  {
+  }
+#pragma omp task threadset(omp_pool)
+  {
+  }
+}
+#endif // OMP60
+
 #endif
 // CHECK1-LABEL: define {{[^@]+}}@main
 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
@@ -10243,3 +10261,18 @@ void test_omp_all_memory()
 // CHECK4-51-NEXT:    call void @__cxx_global_var_init()
 // CHECK4-51-NEXT:    ret void
 //
+// CHECK6-LABEL: define void @_Z14test_threadsetv()
+// CHECK6-NEXT:  entry:
+// CHECK6-NEXT:       [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 1
+// CHECK6-NEXT:       [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 1
+// CHECK6-NEXT:       call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]])
+// CHECK6-NEXT:       [[TMP0:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num, i32 1, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]])
+// CHECK6-NEXT:       getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %0, i32 0, i32 0
+// CHECK6-NEXT:       call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]])
+// CHECK6-NEXT:       call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num1, ptr %0)
+// CHECK6-NEXT:       call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]])
+// CHECK6-NEXT:       [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num3, i32 129, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY2:[0-9]+]])
+// CHECK6-NEXT:       getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %3, i32 0, i32 0
+// CHECK6-NEXT:       call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]])
+// CHECK6-NEXT:       call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num4, ptr %3)
+// CHECK6-NEXT:       ret void
diff --git a/clang/test/OpenMP/task_threadset_messages.cpp b/clang/test/OpenMP/task_threadset_messages.cpp
new file mode 100755
index 0000000..f553a2d
--- /dev/null
+++ b/clang/test/OpenMP/task_threadset_messages.cpp
@@ -0,0 +1,99 @@
+// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 200 -o - %s
+// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 200 -o - %s
+// RUN: %clang_cc1 -verify=expected,omp51 -fopenmp -fopenmp-version=51 -std=c++11 -ferror-limit 200 -o - %s
+// RUN: %clang_cc1 -verify=expected -DOMP60 -fopenmp -fopenmp-version=60 -std=c++11 -ferror-limit 200 -o - %s
+
+// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 200 -o - %s
+// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 200 -o - %s
+// RUN: %clang_cc1 -verify=expected,omp51 -fopenmp-simd -fopenmp-version=51 -std=c++11 -ferror-limit 200 -o - %s
+// RUN: %clang_cc1 -verify=expected -DOMP60 -fopenmp-simd -fopenmp-version=60 -std=c++11 -ferror-limit 200 -o - %s
+
+#ifdef OMP60
+struct ComplexStruct {
+  int data[10];
+  struct InnerStruct {
+    float value;
+  } inner;
+};
+
+// Template class with member functions using 'threadset'.
+template <typename T>
+class TemplateClass {
+public:
+  void foo() {
+    #pragma omp task threadset(omp_pool)
+    {
+      T temp;
+    }
+  }
+  void bar() {
+    #pragma omp taskloop threadset(omp_team)
+    for (int i = 0; i < 10; ++i) {}
+  }
+};
+
+// Valid uses of 'threadset' with 'omp_pool' and 'omp_team' in task directive.
+void test_task_threadset_valid() {
+  int a;
+  #pragma omp task threadset(omp_pool)
+  #pragma omp task threadset(omp_team)
+  #pragma omp task threadset(omp_pool) if(1)
+  #pragma omp task threadset(omp_team) priority(5)
+  #pragma omp task threadset(omp_pool) depend(out: a)
+  #pragma omp parallel
+  {
+    #pragma omp task threadset(omp_pool)
+    {
+      #pragma omp taskloop threadset(omp_team)
+      for (int i = 0; i < 5; ++i) {}
+    }
+  }
+
+  TemplateClass<int> obj;
+  obj.foo();
+  obj.bar();
+}
+
+// Invalid uses of 'threadset' with incorrect arguments in task directive.
+void test_task_threadset_invalid_args() {
+  #pragma omp task threadset(invalid_arg) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}}
+  #pragma omp task threadset(123) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}}
+  #pragma omp task threadset(omp_pool, omp_team) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task threadset() // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}}
+  {}
+}
+
+// Valid uses of 'threadset' with 'omp_pool' and 'omp_team' in taskloop directive.
+void test_taskloop_threadset_valid() {
+  #pragma omp taskloop threadset(omp_pool)
+  for (int i = 0; i < 10; ++i) {}
+  #pragma omp taskloop threadset(omp_team)
+  for (int i = 0; i < 10; ++i) {}
+  #pragma omp taskloop threadset(omp_pool) grainsize(5)
+  for (int i = 0; i < 10; ++i) {}
+  #pragma omp taskloop threadset(omp_team) num_tasks(2)
+  for (int i = 0; i < 10; ++i) {}
+}
+
+// Invalid uses of 'threadset' with incorrect arguments in taskloop directive.
+void test_taskloop_threadset_invalid_args() {
+  #pragma omp taskloop threadset(invalid_arg) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}}
+  for (int i = 0; i < 10; ++i) {}
+  #pragma omp taskloop threadset(123) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}}
+  for (int i = 0; i < 10; ++i) {}
+  #pragma omp taskloop threadset(omp_pool, omp_team) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i) {}
+  #pragma omp taskloop threadset() // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}}
+  for (int i = 0; i < 10; ++i) {}
+}
+
+#else
+void test_threadset_not_supported() {
+  #pragma omp task threadset(omp_pool) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}}
+  #pragma omp task threadset(omp_team) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}}
+  #pragma omp taskloop threadset(omp_team) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}}
+  for (int i = 0; i < 10; ++i) {}
+  #pragma omp taskloop threadset(omp_pool) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}}
+  for (int i = 0; i < 10; ++i) {}
+}
+#endif
diff --git a/clang/test/OpenMP/taskloop_ast_print.cpp b/clang/test/OpenMP/taskloop_ast_print.cpp
index 1b6d724..e4bf20a 100644
--- a/clang/test/OpenMP/taskloop_ast_print.cpp
+++ b/clang/test/OpenMP/taskloop_ast_print.cpp
@@ -1,8 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60
 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
@@ -87,6 +89,20 @@ int main(int argc, char **argv) {
   // CHECK-NEXT: #pragma omp cancel taskgroup
   // CHECK-NEXT: #pragma omp cancellation point taskgroup
   // CHECK-NEXT: foo();
+#ifdef OMP60
+#pragma omp taskloop threadset(omp_team)
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskloop threadset(omp_pool)
+  for (int j = 0; j < 10; ++j) {
+    foo();
+  }
+}
+#endif
+ // CHECK60: #pragma omp taskloop threadset(omp_team)
+ // CHECK60-NEXT: for (int i = 0; i < 10; ++i) {
+ // CHECK60: #pragma omp taskloop threadset(omp_pool)
+ // CHECK60-NEXT: for (int j = 0; j < 10; ++j) {
+ // CHECK60-NEXT: foo();
   return (tmain<int, 5>(argc) + tmain<char, 1>(argv[0][0]));
 }
 
diff --git a/clang/test/OpenMP/taskloop_codegen.cpp b/clang/test/OpenMP/taskloop_codegen.cpp
index 69f8d3b..d119760 100644
--- a/clang/test/OpenMP/taskloop_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_codegen.cpp
@@ -5,7 +5,12 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+
 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK6
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK6
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -241,4 +246,52 @@ void taskloop_with_class() {
   }
 }
 
+#ifdef OMP60
+void test_threadset()
+{
+#pragma omp taskloop threadset(omp_team)
+  for (int i = 0; i < 10; ++i) {
+  }
+#pragma omp taskloop threadset(omp_pool)
+  for (int i = 0; i < 10; ++i) {
+  }
+}
+#endif // OMP60
+// CHECK6-LABEL: define void @_Z14test_threadsetv()
+// CHECK6-NEXT:  entry:
+// CHECK6-NEXT:       [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 1
+// CHECK6-NEXT:       %[[TMP:.*]] = alloca i32, align 4
+// CHECK6-NEXT:       [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 1
+// CHECK6-NEXT:       %[[TMP2:.*]] = alloca i32, align 4
+// CHECK6-NEXT:       %[[TID0:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]])
+// CHECK6-NEXT:       call void @__kmpc_taskgroup(ptr @1, i32 %[[TID0:.*]])
+// CHECK6-NEXT:       %[[TID1:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]])
+// CHECK6-NEXT:       %[[TID2:.*]] = getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %[[TID1:.*]], i32 0, i32 0
+// CHECK6-NEXT:       %[[TID3:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 5
+// CHECK6-NEXT:       store i64 0, ptr %[[TID3:.*]], align 8
+// CHECK6-NEXT:       %[[TID4:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 6
+// CHECK6-NEXT:       store i64 9, ptr %[[TID4:.*]], align 8
+// CHECK6-NEXT:       %[[TID5:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 7
+// CHECK6-NEXT:       store i64 1, ptr %[[TID5:.*]], align 8
+// CHECK6-NEXT:       %[[TID6:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 9
+// CHECK6-NEXT:       call void @llvm.memset.p0.i64(ptr align 8 %[[TID6:.*]], i8 0, i64 8, i1 false)
+// CHECK6-NEXT:       %[[TID7:.*]] = load i64, ptr %[[TID5:.*]], align 8
+// CHECK6-NEXT:       call void @__kmpc_taskloop(ptr @1, i32 %[[TID0:.*]], ptr %[[TID1:.*]], i32 1, ptr %[[TID3:.*]], ptr %4, i64 %[[TID7:.*]], i32 1, i32 0, i64 0, ptr null)
+// CHECK6-NEXT:       call void @__kmpc_end_taskgroup(ptr @1, i32 %[[TID0:.*]])
+// CHECK6-NEXT:       call void @__kmpc_taskgroup(ptr @1, i32 %[[TID0:.*]])
+// CHECK6-NEXT:       %[[TID8:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 129, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]])
+// CHECK6-NEXT:       %[[TID9:.*]] = getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %[[TID8:.*]], i32 0, i32 0
+// CHECK6-NEXT:       %[[TID10:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 5
+// CHECK6-NEXT:       store i64 0, ptr %[[TID10:.*]], align 8
+// CHECK6-NEXT:       %[[TID11:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 6
+// CHECK6-NEXT:       store i64 9, ptr %[[TID11:.*]], align 8
+// CHECK6-NEXT:       %[[TID12:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 7
+// CHECK6-NEXT:       store i64 1, ptr %[[TID12:.*]], align 8
+// CHECK6-NEXT:       %[[TID13:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 9
+// CHECK6-NEXT:       call void @llvm.memset.p0.i64(ptr align 8 [[TID13:.*]], i8 0, i64 8, i1 false)
+// CHECK6-NEXT:       %[[TID14:.*]] = load i64, ptr [[TID12:.*]], align 8
+// CHECK6-NEXT:       call void @__kmpc_taskloop(ptr @1, i32 %[[TID0:.*]], ptr %[[TID8:.*]], i32 1, ptr %[[TID10:.*]], ptr %[[TID11:.*]], i64 %[[TID14:.*]], i32 1, i32 0, i64 0, ptr null)
+// CHECK6-NEXT:       call void @__kmpc_end_taskgroup(ptr @1, i32 %[[TID0:.*]])
+// CHECK6-NEXT:       ret void
+
 #endif
diff --git a/clang/test/Preprocessor/bpf-predefined-macros.c b/clang/test/Preprocessor/bpf-predefined-macros.c
index cd8a2ec..a9ae8c5 100644
--- a/clang/test/Preprocessor/bpf-predefined-macros.c
+++ b/clang/test/Preprocessor/bpf-predefined-macros.c
@@ -70,6 +70,9 @@ int u;
 #ifdef __BPF_FEATURE_LOAD_ACQ_STORE_REL
 int v;
 #endif
+#ifdef __BPF_FEATURE_GOTOX
+int w;
+#endif
 
 // CHECK: int b;
 // CHECK: int c;
@@ -110,6 +113,7 @@ int v;
 // CPU_V4: int u;
 
 // CPU_V4: int v;
+// CPU_V4: int w;
 
 // CPU_GENERIC: int g;
 
diff --git a/clang/test/SemaHLSL/BuiltIns/WaveActiveMin.hlsl b/clang/test/SemaHLSL/BuiltIns/WaveActiveMin.hlsl
new file mode 100644
index 0000000..3b12faf
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/WaveActiveMin.hlsl
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify
+
+int test_too_few_arg() {
+  return __builtin_hlsl_wave_active_min();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+}
+
+float2 test_too_many_arg(float2 p0) {
+  return __builtin_hlsl_wave_active_min(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+}
+
+bool test_expr_bool_type_check(bool p0) {
+  return __builtin_hlsl_wave_active_min(p0);
+  // expected-error@-1 {{invalid operand of type 'bool'}}
+}
+
+bool2 test_expr_bool_vec_type_check(bool2 p0) {
+  return __builtin_hlsl_wave_active_min(p0);
+  // expected-error@-1 {{invalid operand of type 'bool2' (aka 'vector<bool, 2>')}}
+}
+
+struct S { float f; };
+
+S test_expr_struct_type_check(S p0) {
+  return __builtin_hlsl_wave_active_min(p0);
+  // expected-error@-1 {{invalid operand of type 'S' where a scalar or vector is required}}
+}
+
diff --git a/clang/test/SemaTemplate/concepts.cpp b/clang/test/SemaTemplate/concepts.cpp
index becf546..c90af41 100644
--- a/clang/test/SemaTemplate/concepts.cpp
+++ b/clang/test/SemaTemplate/concepts.cpp
@@ -1632,3 +1632,29 @@ void fn3() {
 }
 
 }
+
+namespace GH165238 {
+
+namespace std {
+template <typename, typename _Tp>
+concept output_iterator = requires(_Tp __t) { __t; };
+template <typename _Out> struct basic_format_context {
+  static_assert(output_iterator<_Out, int>);
+  using char_type = _Out;
+};
+template <typename> class basic_format_parse_context;
+template <typename, typename _Context, typename _Formatter,
+          typename = basic_format_parse_context<typename _Context::char_type>>
+concept __parsable_with = requires(_Formatter __f) { __f; };
+template <typename _Tp, typename _CharT,
+          typename _Context = basic_format_context<_CharT>>
+concept __formattable_impl = __parsable_with<_Tp, _Context, _Context>;
+template <typename _Tp, typename _CharT>
+concept formattable = __formattable_impl<_Tp, _CharT>;
+} // namespace std
+struct {
+  void operator()(std::formattable<char> auto);
+} call;
+void foo() { call(""); }
+
+}