18 files changed, 785 insertions, 371 deletions
diff --git a/clang/test/AST/ByteCode/cxx11.cpp b/clang/test/AST/ByteCode/cxx11.cpp
index 7aecf23b..08caca0 100644
--- a/clang/test/AST/ByteCode/cxx11.cpp
+++ b/clang/test/AST/ByteCode/cxx11.cpp
@@ -330,3 +330,33 @@ namespace ReadMutableInCopyCtor {
                        // both-note {{read of mutable member 'u'}} \
                        // both-note {{in call to 'G(g1)'}}
 }
+
+namespace GH150709 {
+  struct C { };
+  struct D : C {
+    constexpr int f() const { return 1; };
+  };
+  struct E : C { };
+  struct F : D { };
+  struct G : E { };
+  
+  constexpr C c1, c2[2];
+  constexpr D d1, d2[2];
+  constexpr E e1, e2[2];
+  constexpr F f;
+  constexpr G g;
+
+  constexpr auto mp = static_cast<int (C::*)() const>(&D::f);
+
+  // sanity checks for fix of GH150709 (unchanged behavior)
+  static_assert((c1.*mp)() == 1, ""); // both-error {{constant expression}}
+  static_assert((d1.*mp)() == 1, "");
+  static_assert((f.*mp)() == 1, "");
+  static_assert((c2[0].*mp)() == 1, ""); // ref-error {{constant expression}}
+  static_assert((d2[0].*mp)() == 1, "");
+
+  // incorrectly undiagnosed before fix of GH150709
+  static_assert((e1.*mp)() == 1, ""); // ref-error {{constant expression}}
+  static_assert((e2[0].*mp)() == 1, ""); // ref-error {{constant expression}}
+  static_assert((g.*mp)() == 1, ""); // ref-error {{constant expression}}
+}
diff --git a/clang/test/AST/ByteCode/cxx2a.cpp b/clang/test/AST/ByteCode/cxx2a.cpp
index ac2f988..744c99e 100644
--- a/clang/test/AST/ByteCode/cxx2a.cpp
+++ b/clang/test/AST/ByteCode/cxx2a.cpp
@@ -225,3 +225,17 @@ namespace Dtor {
   static_assert(pseudo(true, false)); // both-error {{constant expression}} both-note {{in call}}
   static_assert(pseudo(false, true));
 }
+
+namespace GH150705 {
+  struct A { };
+  struct B : A { };
+  struct C : A {
+    constexpr virtual int foo() const { return 0; }
+  };
+
+  constexpr auto p = &C::foo;
+  constexpr auto q = static_cast<int (A::*)() const>(p);
+  constexpr B b;
+  constexpr const A& a = b;
+  constexpr auto x = (a.*q)(); // both-error {{constant expression}}
+}
diff --git a/clang/test/CIR/CodeGen/complex-compound-assignment.cpp b/clang/test/CIR/CodeGen/complex-compound-assignment.cpp
index 35a8aa6..8245025 100644
--- a/clang/test/CIR/CodeGen/complex-compound-assignment.cpp
+++ b/clang/test/CIR/CodeGen/complex-compound-assignment.cpp
@@ -286,3 +286,211 @@ void foo4() {
 // CXX_OGCG: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[C_ADDR]], i32 0, i32 1
 // CXX_OGCG: store i32 %[[B_REAL]], ptr %[[C_REAL_PTR]], align 4
 // CXX_OGCG: store i32 %[[B_IMAG]], ptr %[[C_IMAG_PTR]], align 4
+
+void foo5() {
+  float _Complex a;
+  float b;
+  a += b;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["b"]
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.float>, !cir.float
+// CIR: %[[CONST_ZERO:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.float
+// CIR: %[[COMPLEX_B:.*]] = cir.complex.create %[[TMP_B]], %[[CONST_ZERO]] : !cir.float -> !cir.complex<!cir.float>
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[RESULT:.*]] = cir.complex.add %[[TMP_A]], %[[COMPLEX_B]] : !cir.complex<!cir.float>
+// CIR: cir.store{{.*}} %[[RESULT]], %[[A_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca float, i64 1, align 4
+// LLVM: %[[TMP_B:.*]] = load float, ptr %[[B_ADDR]], align 4
+// LLVM: %[[TMP_COMPLEX_B:.*]] = insertvalue { float, float } {{.*}}, float %[[TMP_B]], 0
+// LLVM: %[[COMPLEX_B:.*]] = insertvalue { float, float } %[[TMP_COMPLEX_B]], float 0.000000e+00, 1
+// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0
+// LLVM: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1
+// LLVM: %[[B_REAL:.*]] = extractvalue { float, float } %[[COMPLEX_B]], 0
+// LLVM: %[[B_IMAG:.*]] = extractvalue { float, float } %[[COMPLEX_B]], 1
+// LLVM: %[[ADD_REAL:.*]] = fadd float %[[A_REAL]], %[[B_REAL]]
+// LLVM: %[[ADD_IMAG:.*]] = fadd float %[[A_IMAG]], %[[B_IMAG]]
+// LLVM: %[[TMP_RESULT:.*]] = insertvalue { float, float } poison, float %[[ADD_REAL]], 0
+// LLVM: %[[RESULT:.*]] = insertvalue { float, float } %[[TMP_RESULT]], float %[[ADD_IMAG]], 1
+// LLVM: store { float, float } %[[RESULT]], ptr %[[A_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca float, align 4
+// OGCG: %[[TMP_B:.*]] = load float, ptr %[[B_ADDR]], align 4
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// OGCG: %[[ADD_REAL:.*]] = fadd float %[[A_REAL]], %[[TMP_B]]
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: store float %[[ADD_REAL]], ptr %[[A_REAL_PTR]], align 4
+// OGCG: store float %[[A_IMAG]], ptr %[[A_IMAG_PTR]], align 4
+
+void foo6() {
+  int _Complex a;
+  int _Complex b;
+  b *= a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["b"]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!s32i> -> !s32i
+// CIR: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!s32i> -> !s32i
+// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!s32i> -> !s32i
+// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!s32i> -> !s32i
+// CIR: %[[MUL_BR_AR:.*]] = cir.binop(mul, %[[B_REAL]], %[[A_REAL]]) : !s32i
+// CIR: %[[MUL_BI_AI:.*]] = cir.binop(mul, %[[B_IMAG]], %[[A_IMAG]]) : !s32i
+// CIR: %[[MUL_BR_AI:.*]] = cir.binop(mul, %[[B_REAL]], %[[A_IMAG]]) : !s32i
+// CIR: %[[MUL_BI_AR:.*]] = cir.binop(mul, %[[B_IMAG]], %[[A_REAL]]) : !s32i
+// CIR: %[[RESULT_REAL:.*]] = cir.binop(sub, %[[MUL_BR_AR]], %[[MUL_BI_AI]]) : !s32i
+// CIR: %[[RESULT_IMAG:.*]] = cir.binop(add, %[[MUL_BR_AI]], %[[MUL_BI_AR]]) : !s32i
+// CIR: %[[RESULT:.*]] = cir.complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : !s32i -> !cir.complex<!s32i>
+// CIR: cir.store{{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load { i32, i32 }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[TMP_B:.*]] = load { i32, i32 }, ptr %[[B_ADDR]], align 4
+// LLVM: %[[B_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 0
+// LLVM: %[[B_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 1
+// LLVM: %[[A_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 0
+// LLVM: %[[A_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 1
+// LLVM: %[[MUL_BR_AR:.*]] = mul i32 %[[B_REAL]], %[[A_REAL]]
+// LLVM: %[[MUL_BI_AI:.*]] = mul i32 %[[B_IMAG]], %[[A_IMAG]]
+// LLVM: %[[MUL_BR_AI:.*]] = mul i32 %[[B_REAL]], %[[A_IMAG]]
+// LLVM: %[[MUL_BI_AR:.*]] = mul i32 %[[B_IMAG]], %[[A_REAL]]
+// LLVM: %[[RESULT_REAL:.*]] = sub i32 %[[MUL_BR_AR]], %[[MUL_BI_AI]]
+// LLVM: %[[RESULT_IMAG:.*]] = add i32 %[[MUL_BR_AI]], %[[MUL_BI_AR]]
+// LLVM: %[[MUL_A_B:.*]] = insertvalue { i32, i32 } {{.*}}, i32 %[[RESULT_REAL]], 0
+// LLVM: %[[RESULT:.*]] = insertvalue { i32, i32 } %[[MUL_A_B]], i32 %[[RESULT_IMAG]], 1
+// LLVM: store { i32, i32 } %[[RESULT]], ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca { i32, i32 }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca { i32, i32 }, align 4
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_REAL:.*]] = load i32, ptr %[[A_REAL_PTR]], align 4
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load i32, ptr %[[A_IMAG_PTR]], align 4
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_REAL:.*]] = load i32, ptr %[[B_REAL_PTR]], align 4
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: %[[B_IMAG:.*]] = load i32, ptr %[[B_IMAG_PTR]], align 4
+// OGCG: %[[MUL_BR_AR:.*]] = mul i32 %[[B_REAL]], %[[A_REAL]]
+// OGCG: %[[MUL_BI_AI:.*]] = mul i32 %[[B_IMAG]], %[[A_IMAG]]
+// OGCG: %[[RESULT_REAL:.*]] = sub i32 %[[MUL_BR_AR]], %[[MUL_BI_AI]]
+// OGCG: %[[MUL_BI_AR:.*]] = mul i32 %[[B_IMAG]], %[[A_REAL]]
+// OGCG: %[[MUL_BR_AI:.*]] = mul i32 %[[B_REAL]], %[[A_IMAG]]
+// OGCG: %[[RESULT_IMAG:.*]] = add i32 %[[MUL_BI_AR]], %[[MUL_BR_AI]]
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: store i32 %[[RESULT_REAL]], ptr %[[B_REAL_PTR]], align 4
+// OGCG: store i32 %[[RESULT_IMAG]], ptr %[[B_IMAG_PTR]], align 4
+
+void foo7() {
+  float _Complex a;
+  float _Complex b;
+  b *= a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[MUL_BR_AR:.*]] = cir.binop(mul, %[[B_REAL]], %[[A_REAL]]) : !cir.float
+// CIR: %[[MUL_BI_AI:.*]] = cir.binop(mul, %[[B_IMAG]], %[[A_IMAG]]) : !cir.float
+// CIR: %[[MUL_BR_AI:.*]] = cir.binop(mul, %[[B_REAL]], %[[A_IMAG]]) : !cir.float
+// CIR: %[[MUL_BI_AR:.*]] = cir.binop(mul, %[[B_IMAG]], %[[A_REAL]]) : !cir.float
+// CIR: %[[C_REAL:.*]] = cir.binop(sub, %[[MUL_BR_AR]], %[[MUL_BI_AI]]) : !cir.float
+// CIR: %[[C_IMAG:.*]] = cir.binop(add, %[[MUL_BR_AI]], %[[MUL_BI_AR]]) : !cir.float
+// CIR: %[[COMPLEX:.*]] = cir.complex.create %[[C_REAL]], %[[C_IMAG]] : !cir.float -> !cir.complex<!cir.float>
+// CIR: %[[IS_C_REAL_NAN:.*]] = cir.cmp(ne, %[[C_REAL]], %[[C_REAL]]) : !cir.float, !cir.bool
+// CIR: %[[IS_C_IMAG_NAN:.*]] = cir.cmp(ne, %[[C_IMAG]], %[[C_IMAG]]) : !cir.float, !cir.bool
+// CIR: %[[CONST_FALSE:.*]] = cir.const #false
+// CIR: %[[SELECT_CONDITION:.*]] = cir.select if %[[IS_C_REAL_NAN]] then %[[IS_C_IMAG_NAN]] else %[[CONST_FALSE]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+// CIR: %[[RESULT:.*]] = cir.ternary(%[[SELECT_CONDITION]], true {
+// CIR:   %[[LIBC_COMPLEX:.*]] = cir.call @__mulsc3(%[[B_REAL]], %[[B_IMAG]], %[[A_REAL]], %[[A_IMAG]]) : (!cir.float, !cir.float, !cir.float, !cir.float) -> !cir.complex<!cir.float>
+// CIR:   cir.yield %[[LIBC_COMPLEX]] : !cir.complex<!cir.float>
+// CIR: }, false {
+// CIR:   cir.yield %[[COMPLEX]] : !cir.complex<!cir.float>
+// CIR: }) : (!cir.bool) -> !cir.complex<!cir.float>
+// CIR: cir.store{{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4
+// LLVM: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0
+// LLVM: %[[B_IMAG:.*]] = extractvalue { float, float } %[[TMP_B]], 1
+// LLVM: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0
+// LLVM: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1
+// LLVM: %[[MUL_BR_AR:.*]] = fmul float %[[B_REAL]], %[[A_REAL]]
+// LLVM: %[[MUL_BI_AI:.*]] = fmul float %[[B_IMAG]], %[[A_IMAG]]
+// LLVM: %[[MUL_BR_AI:.*]] = fmul float %[[B_REAL]], %[[A_IMAG]]
+// LLVM: %[[MUL_BI_AR:.*]] = fmul float %[[B_IMAG]], %[[A_REAL]]
+// LLVM: %[[C_REAL:.*]] = fsub float %[[MUL_BR_AR]], %[[MUL_BI_AI]]
+// LLVM: %[[C_IMAG:.*]] = fadd float %[[MUL_BR_AI]], %[[MUL_BI_AR]]
+// LLVM: %[[MUL_A_B:.*]] = insertvalue { float, float } {{.*}}, float %[[C_REAL]], 0
+// LLVM: %[[COMPLEX:.*]] = insertvalue { float, float } %[[MUL_A_B]], float %[[C_IMAG]], 1
+// LLVM: %[[IS_C_REAL_NAN:.*]] = fcmp une float %[[C_REAL]], %[[C_REAL]]
+// LLVM: %[[IS_C_IMAG_NAN:.*]] = fcmp une float %[[C_IMAG]], %[[C_IMAG]]
+// LLVM: %[[SELECT_CONDITION:.*]] = and i1 %[[IS_C_REAL_NAN]], %[[IS_C_IMAG_NAN]]
+// LLVM: br i1 %[[SELECT_CONDITION]], label %[[THEN_LABEL:.*]], label %[[ELSE_LABEL:.*]]
+// LLVM: [[THEN_LABEL]]:
+// LLVM:  %[[LIBC_COMPLEX:.*]] = call { float, float } @__mulsc3(float %[[B_REAL]], float %[[B_IMAG]], float %[[A_REAL]], float %[[A_IMAG]])
+// LLVM:  br label %[[PHI_BRANCH:.*]]
+// LLVM: [[ELSE_LABEL]]:
+// LLVM:  br label %[[PHI_BRANCH:]]
+// LLVM: [[PHI_BRANCH:]]:
+// LLVM:  %[[RESULT:.*]] = phi { float, float } [ %[[COMPLEX]], %[[ELSE_LABEL]] ], [ %[[LIBC_COMPLEX]], %[[THEN_LABEL]] ]
+// LLVM:  br label %[[END_LABEL:.*]]
+// LLVM: [[END_LABEL]]:
+// LLVM:  store { float, float } %[[RESULT]], ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[COMPLEX_CALL_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4
+// OGCG: %[[MUL_BR_AR:.*]] = fmul float %[[B_REAL]], %[[A_REAL]]
+// OGCG: %[[MUL_BI_AI:.*]] = fmul float %[[B_IMAG]], %[[A_IMAG]]
+// OGCG: %[[MUL_BR_AI:.*]] = fmul float %[[B_REAL]], %[[A_IMAG]]
+// OGCG: %[[MUL_BI_AR:.*]] = fmul float %[[B_IMAG]], %[[A_REAL]]
+// OGCG: %[[C_REAL:.*]] = fsub float %[[MUL_BR_AR]], %[[MUL_BI_AI]]
+// OGCG: %[[C_IMAG:.*]] = fadd float %[[MUL_BR_AI]], %[[MUL_BI_AR]]
+// OGCG: %[[IS_C_REAL_NAN:.*]] = fcmp uno float %[[C_REAL]], %[[C_REAL]]
+// OGCG: br i1 %[[IS_C_REAL_NAN]], label %[[COMPLEX_IS_IMAG_NAN:.*]], label %[[END_LABEL:.*]], !prof !2
+// OGCG: [[COMPLEX_IS_IMAG_NAN]]:
+// OGCG:  %[[IS_C_IMAG_NAN:.*]] = fcmp uno float %[[C_IMAG]], %[[C_IMAG]]
+// OGCG:  br i1 %[[IS_C_IMAG_NAN]], label %[[COMPLEX_LIB_CALL:.*]], label %[[END_LABEL]], !prof !2
+// OGCG: [[COMPLEX_LIB_CALL]]:
+// OGCG:  %[[CALL_RESULT:.*]] = call{{.*}} <2 x float> @__mulsc3(float noundef %[[B_REAL]], float noundef %[[B_IMAG]], float noundef %[[A_REAL]], float noundef %[[A_IMAG]])
+// OGCG:  store <2 x float> %[[CALL_RESULT]], ptr %[[COMPLEX_CALL_ADDR]], align 4
+// OGCG:  %[[COMPLEX_CALL_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_CALL_ADDR]], i32 0, i32 0
+// OGCG:  %[[COMPLEX_CALL_REAL:.*]] = load float, ptr %[[COMPLEX_CALL_REAL_PTR]], align 4
+// OGCG:  %[[COMPLEX_CALL_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_CALL_ADDR]], i32 0, i32 1
+// OGCG:  %[[COMPLEX_CALL_IMAG:.*]] = load float, ptr %[[COMPLEX_CALL_IMAG_PTR]], align 4
+// OGCG:  br label %[[END_LABEL]]
+// OGCG: [[END_LABEL]]:
+// OGCG:  %[[FINAL_REAL:.*]] = phi float [ %[[C_REAL]], %[[ENTRY:.*]] ], [ %[[C_REAL]], %[[COMPLEX_IS_IMAG_NAN]] ], [ %[[COMPLEX_CALL_REAL]], %[[COMPLEX_LIB_CALL]] ]
+// OGCG:  %[[FINAL_IMAG:.*]] = phi float [ %[[C_IMAG]], %[[ENTRY]] ], [ %[[C_IMAG]], %[[COMPLEX_IS_IMAG_NAN]] ], [ %[[COMPLEX_CALL_IMAG]], %[[COMPLEX_LIB_CALL]] ]
+// OGCG:  %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG:  %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG:  store float %[[FINAL_REAL]], ptr %[[C_REAL_PTR]], align 4
+// OGCG:  store float %[[FINAL_IMAG]], ptr %[[C_IMAG_PTR]], align 4
diff --git a/clang/test/CIR/CodeGen/module-asm.c b/clang/test/CIR/CodeGen/module-asm.c
new file mode 100644
index 0000000..e6cec5e
--- /dev/null
+++ b/clang/test/CIR/CodeGen/module-asm.c
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o %t.cir 
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK:  cir.module_asm = [".globl bar", ".globl foo"]
+__asm (".globl bar");
+__asm (".globl foo");
diff --git a/clang/test/CIR/IR/invalid-vtable.cir b/clang/test/CIR/IR/invalid-vtable.cir
new file mode 100644
index 0000000..b3afb581
--- /dev/null
+++ b/clang/test/CIR/IR/invalid-vtable.cir
@@ -0,0 +1,9 @@
+// RUN: cir-opt %s -verify-diagnostics
+
+!s8i = !cir.int<s, 8>
+!u32i = !cir.int<u, 32>
+cir.func @reference_unknown_vtable() {
+  // expected-error @below {{'cir.vtable.address_point' op 'some_vtable' does not reference a valid cir.global}}
+  %0 = cir.vtable.address_point(@some_vtable, address_point = <index = 0, offset = 2>) : !cir.vptr
+  cir.return
+}
diff --git a/clang/test/CIR/IR/vtable-addrpt.cir b/clang/test/CIR/IR/vtable-addrpt.cir
new file mode 100644
index 0000000..0b809cc
--- /dev/null
+++ b/clang/test/CIR/IR/vtable-addrpt.cir
@@ -0,0 +1,23 @@
+// RUN: cir-opt %s | FileCheck %s
+
+// Test the parsing and printing of a constructor that uses a vtable addess_point op.
+
+!u32i = !cir.int<u, 32>
+!u8i = !cir.int<u, 8>
+!rec_anon_struct = !cir.record<struct  {!cir.array<!cir.ptr<!u8i> x 4>}>
+!rec_S = !cir.record<struct "S" {!cir.vptr}>
+
+module {
+  cir.global "private" external @_ZTV1S : !rec_anon_struct {alignment = 8 : i64}
+  cir.func @_ZN1SC2Ev(%arg0: !cir.ptr<!rec_S>) {
+    %0 = cir.alloca !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>, ["this", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
+    %1 = cir.load %0 : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
+    %2 = cir.vtable.address_point(@_ZTV1S, address_point = <index = 0, offset = 2>) : !cir.vptr
+    %3 = cir.cast(bitcast, %1 : !cir.ptr<!rec_S>), !cir.ptr<!cir.vptr>
+    cir.store align(8) %2, %3 : !cir.vptr, !cir.ptr<!cir.vptr>
+    cir.return
+  }
+}
+
+// CHECK: cir.vtable.address_point(@_ZTV1S, address_point = <index = 0, offset = 2>) : !cir.vptr
diff --git a/clang/test/CIR/Lowering/module-asm.cir b/clang/test/CIR/Lowering/module-asm.cir
new file mode 100644
index 0000000..b802cda
--- /dev/null
+++ b/clang/test/CIR/Lowering/module-asm.cir
@@ -0,0 +1,11 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir
+
+// RUN: cir-translate -cir-to-llvmir --disable-cc-lowering -o %t.ll %s
+// RUN: FileCheck -check-prefix=LLVM --input-file=%t.ll %s
+
+// CHECK: llvm.module_asm =  [".globl bar", ".globl foo"]
+// LLVM: module asm ".globl bar"
+// LLVM: module asm ".globl foo"
+module attributes {cir.module_asm = [".globl bar", ".globl foo"]} {
+}
diff --git a/clang/test/CodeGen/WebAssembly/builtins-test-fp-sig.c b/clang/test/CodeGen/WebAssembly/builtins-test-fp-sig.c
new file mode 100644
index 0000000..88447f7
--- /dev/null
+++ b/clang/test/CodeGen/WebAssembly/builtins-test-fp-sig.c
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +gc -O3 -emit-llvm -DSINGLE_VALUE -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY-SV
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown -target-feature +gc -O3 -emit-llvm -DSINGLE_VALUE -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY-SV
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown -target-feature +gc -target-abi experimental-mv -O3 -emit-llvm  -o - %s 2>&1 | FileCheck %s -check-prefixes WEBASSEMBLY
+// RUN: not %clang_cc1 -triple wasm64-unknown-unknown -O3 -emit-llvm -o - %s 2>&1 | FileCheck %s -check-prefixes MISSING-GC
+
+void use(int);
+
+typedef void (*Fvoid)(void);
+void test_function_pointer_signature_void(Fvoid func) {
+  // MISSING-GC: error: '__builtin_wasm_test_function_pointer_signature' needs target feature gc
+  // WEBASSEMBLY:  %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, token poison)
+  use(__builtin_wasm_test_function_pointer_signature(func));
+}
+
+typedef float (*Ffloats)(float, double, int);
+void test_function_pointer_signature_floats(Ffloats func) {
+  // WEBASSEMBLY:  %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, float poison, token poison, float poison, double poison, i32 poison)
+  use(__builtin_wasm_test_function_pointer_signature(func));
+}
+
+typedef void (*Fpointers)(Fvoid, Ffloats, void*, int*, int***, char[5]);
+void test_function_pointer_signature_pointers(Fpointers func) {
+  // WEBASSEMBLY:  %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, token poison, ptr poison, ptr poison, ptr poison, ptr poison, ptr poison, ptr poison)
+  use(__builtin_wasm_test_function_pointer_signature(func));
+}
+
+typedef void (*FVarArgs)(int, ...);
+void test_function_pointer_signature_varargs(FVarArgs func) {
+  // WEBASSEMBLY:  %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, token poison, i32 poison, ptr poison)
+  use(__builtin_wasm_test_function_pointer_signature(func));
+}
+
+typedef __externref_t (*FExternRef)(__externref_t, __externref_t);
+void test_function_pointer_externref(FExternRef func) {
+  // WEBASSEMBLY:  %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, ptr addrspace(10) poison, token poison, ptr addrspace(10) poison, ptr addrspace(10) poison)
+  use(__builtin_wasm_test_function_pointer_signature(func));
+}
+
+typedef __funcref Fpointers (*FFuncRef)(__funcref Fvoid, __funcref Ffloats);
+void test_function_pointer_funcref(FFuncRef func) {
+  // WEBASSEMBLY:  %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, ptr addrspace(20) poison, token poison, ptr addrspace(20) poison, ptr addrspace(20) poison)
+  use(__builtin_wasm_test_function_pointer_signature(func));
+}
+
+#ifdef SINGLE_VALUE
+// Some tests that we get struct ABIs correct. There is no special code in
+// __builtin_wasm_test_function_pointer_signature for this, it gets handled by
+// the normal type lowering code.
+// Single element structs are unboxed, multi element structs are passed on
+// stack.
+typedef struct {double x;} (*Fstructs1)(struct {double x;}, struct {float x;}, struct {double x; float y;});
+void test_function_pointer_structs1(Fstructs1 func) {
+  // WEBASSEMBLY-SV:  %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, double poison, token poison, double poison, float poison, ptr poison)
+  use(__builtin_wasm_test_function_pointer_signature(func));
+}
+
+// Two element return struct ==> return ptr on stack
+typedef struct {double x; double y;} (*Fstructs2)(void);
+void test_function_pointer_structs2(Fstructs2 func) {
+  // WEBASSEMBLY-SV:  %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, token poison, ptr poison)
+  use(__builtin_wasm_test_function_pointer_signature(func));
+}
+
+// Return union ==> return ptr on stack, one element union => unboxed
+typedef union {double x; float y;} (*FUnions)(union {double x; float y;}, union {double x;});
+void test_function_pointer_unions(FUnions func) {
+  // WEBASSEMBLY-SV:  %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, token poison, ptr poison, ptr poison, double poison)
+  use(__builtin_wasm_test_function_pointer_signature(func));
+}
+#endif
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 2d43764..e2c9f96 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -20,12 +20,14 @@ __m256d test_mm256_add_pd(__m256d A, __m256d B) {
   // CHECK: fadd <4 x double>
   return _mm256_add_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d( _mm256_add_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-4.0, -5.0, +6.0, +7.0}), -8.0, -10.0, +12.0, +14.0));
 
 __m256 test_mm256_add_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_add_ps
   // CHECK: fadd <8 x float>
   return _mm256_add_ps(A, B);
 }
+TEST_CONSTEXPR(match_m256(_mm256_add_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}), -8.0f, -10.0f, +12.0f, +14.0f, +14.0f, +12.0f, -10.0f, -8.0f));
 
 __m256d test_mm256_addsub_pd(__m256d A, __m256d B) {
   // CHECK-LABEL: test_mm256_addsub_pd
@@ -977,12 +979,14 @@ __m256d test_mm256_div_pd(__m256d A, __m256d B) {
   // CHECK: fdiv <4 x double>
   return _mm256_div_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d( _mm256_div_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-1.0, +1.0, -1.0, +1.0}), +4.0, -5.0, -6.0, +7.0));
 
 __m256 test_mm256_div_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_div_ps
   // CHECK: fdiv <8 x float>
   return _mm256_div_ps(A, B);
 }
+TEST_CONSTEXPR(match_m256( _mm256_div_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-1.0f, +1.0f, -1.0f, +1.0f, +1.0f, -1.0f, +1.0f, -1.0f}), +4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, +4.0f));
 
 __m256 test_mm256_dp_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_dp_ps
@@ -1295,12 +1299,14 @@ __m256d test_mm256_mul_pd(__m256d A, __m256d B) {
   // CHECK: fmul <4 x double>
   return _mm256_mul_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d( _mm256_mul_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-4.0, -5.0, +6.0, +7.0}), +16.0, +25.0, +36.0, +49.0));
 
 __m256 test_mm256_mul_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_mul_ps
   // CHECK: fmul <8 x float>
   return _mm256_mul_ps(A, B);
 }
+TEST_CONSTEXPR(match_m256( _mm256_mul_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}), +16.0f, +25.0f, +36.0f, +49.0f, +49.0f, +36.0f, +25.0f, +16.0f));
 
 __m256d test_mm256_or_pd(__m256d A, __m256d B) {
   // CHECK-LABEL: test_mm256_or_pd
@@ -1933,12 +1939,14 @@ __m256d test_mm256_sub_pd(__m256d A, __m256d B) {
   // CHECK: fsub <4 x double>
   return _mm256_sub_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d( _mm256_sub_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-0.0, +0.0, +2.0, -1.0}), -4.0, -5.0, 4.0, 8.0));
 
 __m256 test_mm256_sub_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_sub_ps
   // CHECK: fsub <8 x float>
   return _mm256_sub_ps(A, B);
 }
+TEST_CONSTEXPR(match_m256( _mm256_sub_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-0.0f, +0.0f, +2.0f, -1.0f, -1.0f, +2.0f, +0.0f, -0.0f}), -4.0f, -5.0f, 4.0f, 8.0f, 8.0f, 4.0f, -5.0f, -4.0f));
 
 int test_mm_testc_pd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_testc_pd
@@ -2062,24 +2070,28 @@ __m256d test_mm256_unpackhi_pd(__m256d A, __m256d B) {
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   return _mm256_unpackhi_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_unpackhi_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+5.0, +6.0, +7.0, +8.0}), +2.0, +6.0, +4.0, +8.0));
 
 __m256 test_mm256_unpackhi_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_unpackhi_ps
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
   return _mm256_unpackhi_ps(A, B);
 }
+TEST_CONSTEXPR(match_m256(_mm256_unpackhi_ps((__m256){+0.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f}, (__m256){+10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f, +17.0f}), +2.0f, +12.0f, +3.0f, +13.0f, +6.0f, +16.0f, +7.0f, +17.0f));
 
 __m256d test_mm256_unpacklo_pd(__m256d A, __m256d B) {
   // CHECK-LABEL: test_mm256_unpacklo_pd
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   return _mm256_unpacklo_pd(A, B);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_unpacklo_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+5.0, +6.0, +7.0, +8.0}), +1.0, +5.0, +3.0, +7.0));
 
 __m256 test_mm256_unpacklo_ps(__m256 A, __m256 B) {
   // CHECK-LABEL: test_mm256_unpacklo_ps
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
   return _mm256_unpacklo_ps(A, B);
 }
+TEST_CONSTEXPR(match_m256(_mm256_unpacklo_ps((__m256){+0.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f}, (__m256){+10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f, +17.0f}), +0.0f, +10.0f, +1.0f, +11.0f, +4.0f, +14.0f, +5.0f, +15.0f));
 
 __m256d test_mm256_xor_pd(__m256d A, __m256d B) {
   // CHECK-LABEL: test_mm256_xor_pd
diff --git a/clang/test/CodeGen/X86/avx512-reduceIntrin.c b/clang/test/CodeGen/X86/avx512-reduceIntrin.c
index 2ceac3a..4069b46 100644
--- a/clang/test/CodeGen/X86/avx512-reduceIntrin.c
+++ b/clang/test/CodeGen/X86/avx512-reduceIntrin.c
@@ -1,162 +1,174 @@
-// RUN: %clang_cc1 -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=i386-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=i386-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 long long test_mm512_reduce_add_epi64(__m512i __W){
-// CHECK-LABEL: @test_mm512_reduce_add_epi64(
-// CHECK:    call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_add_epi64
+// CHECK:    call {{.*}}i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %{{.*}})
   return _mm512_reduce_add_epi64(__W);
 }
+TEST_CONSTEXPR(_mm512_reduce_add_epi64((__m512i)(__v8di){-4, -3, -2, -1, 0, 1, 2, 3}) == -4);
 
 long long test_mm512_reduce_mul_epi64(__m512i __W){
-// CHECK-LABEL: @test_mm512_reduce_mul_epi64(
-// CHECK:    call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_mul_epi64
+// CHECK:    call {{.*}}i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> %{{.*}})
   return _mm512_reduce_mul_epi64(__W);
 }
+TEST_CONSTEXPR(_mm512_reduce_mul_epi64((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}) == 40320);
 
 long long test_mm512_reduce_or_epi64(__m512i __W){
-// CHECK-LABEL: @test_mm512_reduce_or_epi64(
-// CHECK:    call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_or_epi64
+// CHECK:    call {{.*}}i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %{{.*}})
   return _mm512_reduce_or_epi64(__W);
 }
+TEST_CONSTEXPR(_mm512_reduce_or_epi64((__m512i)(__v8di){0x100, 0x200, 0x400, 0x800, 0, 0, 0, 0}) == 0xF00);
 
 long long test_mm512_reduce_and_epi64(__m512i __W){
-// CHECK-LABEL: @test_mm512_reduce_and_epi64(
-// CHECK:    call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_and_epi64
+// CHECK:    call {{.*}}i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %{{.*}})
   return _mm512_reduce_and_epi64(__W);
 }
+TEST_CONSTEXPR(_mm512_reduce_and_epi64((__m512i)(__v8di){0xFFFF, 0xFF00, 0x00FF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFF00, 0x00FF}) == 0x0000);
 
 long long test_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_add_epi64(
+// CHECK-LABEL: test_mm512_mask_reduce_add_epi64
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-// CHECK:    call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %{{.*}})
+// CHECK:    call {{.*}}i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %{{.*}})
   return _mm512_mask_reduce_add_epi64(__M, __W);
 }
 
 long long test_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_mul_epi64(
+// CHECK-LABEL: test_mm512_mask_reduce_mul_epi64
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-// CHECK:    call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> %{{.*}})
+// CHECK:    call {{.*}}i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> %{{.*}})
   return _mm512_mask_reduce_mul_epi64(__M, __W);
 }
 
 long long test_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_and_epi64(
+// CHECK-LABEL: test_mm512_mask_reduce_and_epi64
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-// CHECK:    call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %{{.*}})
+// CHECK:    call {{.*}}i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %{{.*}})
   return _mm512_mask_reduce_and_epi64(__M, __W);
 }
 
 long long test_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_or_epi64(
+// CHECK-LABEL: test_mm512_mask_reduce_or_epi64
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-// CHECK:    call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %{{.*}})
+// CHECK:    call {{.*}}i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %{{.*}})
   return _mm512_mask_reduce_or_epi64(__M, __W);
 }
 
 int test_mm512_reduce_add_epi32(__m512i __W){
-// CHECK-LABEL: @test_mm512_reduce_add_epi32(
-// CHECK:    call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_add_epi32
+// CHECK:    call {{.*}}i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %{{.*}})
   return _mm512_reduce_add_epi32(__W);
 }
+TEST_CONSTEXPR(_mm512_reduce_add_epi32((__m512i)(__v16si){-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7}) == -8);
 
 int test_mm512_reduce_mul_epi32(__m512i __W){
-// CHECK-LABEL: @test_mm512_reduce_mul_epi32(
-// CHECK:    call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_mul_epi32
+// CHECK:    call {{.*}}i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %{{.*}})
   return _mm512_reduce_mul_epi32(__W);
 }
+TEST_CONSTEXPR(_mm512_reduce_mul_epi32((__m512i)(__v16si){1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 1, 1, -3, 1, 1}) == -36);
 
 int test_mm512_reduce_or_epi32(__m512i __W){
-// CHECK:    call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %{{.*}})
+// CHECK:    call {{.*}}i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %{{.*}})
   return _mm512_reduce_or_epi32(__W);
 }
+TEST_CONSTEXPR(_mm512_reduce_or_epi32((__m512i)(__v16si){0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0, 0, 0, 0, 0, 0, 0, 0}) == 0xFF);
 
 int test_mm512_reduce_and_epi32(__m512i __W){
-// CHECK-LABEL: @test_mm512_reduce_and_epi32(
-// CHECK:    call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_and_epi32
+// CHECK:    call {{.*}}i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %{{.*}})
   return _mm512_reduce_and_epi32(__W);
 }
+TEST_CONSTEXPR(_mm512_reduce_and_epi32((__m512i)(__v16si){0xFF, 0xF0, 0x0F, 0xFF, 0xFF, 0xFF, 0xF0, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xF0, 0xF0, 0x0F, 0x0F}) == 0x00);
 
 int test_mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_add_epi32(
+// CHECK-LABEL: test_mm512_mask_reduce_add_epi32
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-// CHECK:    call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %{{.*}})
+// CHECK:    call {{.*}}i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %{{.*}})
   return _mm512_mask_reduce_add_epi32(__M, __W);
 }
 
 int test_mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_mul_epi32(
+// CHECK-LABEL: test_mm512_mask_reduce_mul_epi32
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-// CHECK:    call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %{{.*}})
+// CHECK:    call {{.*}}i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %{{.*}})
   return _mm512_mask_reduce_mul_epi32(__M, __W);
 }
 
 int test_mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_and_epi32(
+// CHECK-LABEL: test_mm512_mask_reduce_and_epi32
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-// CHECK:    call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %{{.*}})
+// CHECK:    call {{.*}}i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %{{.*}})
   return _mm512_mask_reduce_and_epi32(__M, __W);
 }
 
 int test_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_or_epi32(
+// CHECK-LABEL: test_mm512_mask_reduce_or_epi32
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-// CHECK:    call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %{{.*}})
+// CHECK:    call {{.*}}i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %{{.*}})
   return _mm512_mask_reduce_or_epi32(__M, __W);
 }
 
 double test_mm512_reduce_add_pd(__m512d __W, double ExtraAddOp){
-// CHECK-LABEL: @test_mm512_reduce_add_pd(
+// CHECK-LABEL: test_mm512_reduce_add_pd
 // CHECK-NOT: reassoc
-// CHECK:    call reassoc double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %{{.*}})
+// CHECK:    call reassoc {{.*}}double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %{{.*}})
 // CHECK-NOT: reassoc
   return _mm512_reduce_add_pd(__W) + ExtraAddOp;
 }
 
 double test_mm512_reduce_mul_pd(__m512d __W, double ExtraMulOp){
-// CHECK-LABEL: @test_mm512_reduce_mul_pd(
+// CHECK-LABEL: test_mm512_reduce_mul_pd
 // CHECK-NOT: reassoc
-// CHECK:    call reassoc double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}})
+// CHECK:    call reassoc {{.*}}double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}})
 // CHECK-NOT: reassoc
   return _mm512_reduce_mul_pd(__W) * ExtraMulOp;
 }
 
 float test_mm512_reduce_add_ps(__m512 __W){
-// CHECK-LABEL: @test_mm512_reduce_add_ps(
-// CHECK:    call reassoc float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_add_ps
+// CHECK:    call reassoc {{.*}}float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %{{.*}})
   return _mm512_reduce_add_ps(__W);
 }
 
 float test_mm512_reduce_mul_ps(__m512 __W){
-// CHECK-LABEL: @test_mm512_reduce_mul_ps(
-// CHECK:    call reassoc float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_mul_ps
+// CHECK:    call reassoc {{.*}}float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}})
   return _mm512_reduce_mul_ps(__W);
 }
 
 double test_mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_add_pd(
+// CHECK-LABEL: test_mm512_mask_reduce_add_pd
 // CHECK:    select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
-// CHECK:    call reassoc double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %{{.*}})
+// CHECK:    call reassoc {{.*}}double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %{{.*}})
   return _mm512_mask_reduce_add_pd(__M, __W);
 }
 
 double test_mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_mul_pd(
+// CHECK-LABEL: test_mm512_mask_reduce_mul_pd
 // CHECK:    select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
-// CHECK:    call reassoc double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}})
+// CHECK:    call reassoc {{.*}}double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}})
   return _mm512_mask_reduce_mul_pd(__M, __W);
 }
 
 float test_mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_add_ps(
+// CHECK-LABEL: test_mm512_mask_reduce_add_ps
 // CHECK:    select <16 x i1> %{{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}}
-// CHECK:    call reassoc float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %{{.*}})
+// CHECK:    call reassoc {{.*}}float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %{{.*}})
   return _mm512_mask_reduce_add_ps(__M, __W);
 }
 
 float test_mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_mul_ps(
+// CHECK-LABEL: test_mm512_mask_reduce_mul_ps
 // CHECK:    select <16 x i1> %{{.*}}, <16 x float> {{.*}}, <16 x float> %{{.*}}
-// CHECK:    call reassoc float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}})
+// CHECK:    call reassoc {{.*}}float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}})
   return _mm512_mask_reduce_mul_ps(__M, __W);
 }
diff --git a/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c b/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c
index 3e33ec5..0110079 100644
--- a/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c
+++ b/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c
@@ -1,164 +1,175 @@
-// RUN: %clang_cc1 -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=i386-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=i386-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 long long test_mm512_reduce_max_epi64(__m512i __W){
-// CHECK-LABEL: @test_mm512_reduce_max_epi64(
-// CHECK:    call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_max_epi64
+// CHECK:    call {{.*}}i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %{{.*}})
   return _mm512_reduce_max_epi64(__W);
 }
+TEST_CONSTEXPR(_mm512_reduce_max_epi64((__m512i)(__v8di){-4, -3, -2, -1, 0, 1, 2, 3}) == 3);
 
 unsigned long long test_mm512_reduce_max_epu64(__m512i __W){
-// CHECK-LABEL: @test_mm512_reduce_max_epu64(
-// CHECK:    call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_max_epu64
+// CHECK:    call {{.*}}i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %{{.*}})
   return _mm512_reduce_max_epu64(__W);
 }
+TEST_CONSTEXPR(_mm512_reduce_max_epu64((__m512i)(__v8du){0, 1, 2, 3, 4, 5, 6, 7}) == 7);
 
 double test_mm512_reduce_max_pd(__m512d __W, double ExtraAddOp){
-// CHECK-LABEL: @test_mm512_reduce_max_pd(
+// CHECK-LABEL: test_mm512_reduce_max_pd
 // CHECK-NOT: nnan
-// CHECK:    call nnan double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}})
+// CHECK:    call nnan {{.*}}double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}})
 // CHECK-NOT: nnan
   return _mm512_reduce_max_pd(__W) + ExtraAddOp;
 }
 
 long long test_mm512_reduce_min_epi64(__m512i __W){
-// CHECK-LABEL: @test_mm512_reduce_min_epi64(
-// CHECK:    call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_min_epi64
+// CHECK:    call {{.*}}i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %{{.*}})
   return _mm512_reduce_min_epi64(__W);
 }
+TEST_CONSTEXPR(_mm512_reduce_min_epi64((__m512i)(__v8di){-4, -3, -2, -1, 0, 1, 2, 3}) == -4);
 
 unsigned long long test_mm512_reduce_min_epu64(__m512i __W){
-// CHECK-LABEL: @test_mm512_reduce_min_epu64(
-// CHECK:    call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_min_epu64
+// CHECK:    call {{.*}}i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %{{.*}})
   return _mm512_reduce_min_epu64(__W);
 }
+TEST_CONSTEXPR(_mm512_reduce_min_epu64((__m512i)(__v8du){0, 1, 2, 3, 4, 5, 6, 7}) == 0);
 
 double test_mm512_reduce_min_pd(__m512d __W, double ExtraMulOp){
-// CHECK-LABEL: @test_mm512_reduce_min_pd(
+// CHECK-LABEL: test_mm512_reduce_min_pd
 // CHECK-NOT: nnan
-// CHECK:    call nnan double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}})
+// CHECK:    call nnan {{.*}}double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}})
 // CHECK-NOT: nnan
   return _mm512_reduce_min_pd(__W) * ExtraMulOp;
 }
 
 long long test_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_max_epi64(
+// CHECK-LABEL: test_mm512_mask_reduce_max_epi64
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-// CHECK:    call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %{{.*}})
+// CHECK:    call {{.*}}i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %{{.*}})
   return _mm512_mask_reduce_max_epi64(__M, __W);
 }
 
 unsigned long test_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_max_epu64(
+// CHECK-LABEL: test_mm512_mask_reduce_max_epu64
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-// CHECK:    call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %{{.*}})
+// CHECK:    call {{.*}}i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %{{.*}})
   return _mm512_mask_reduce_max_epu64(__M, __W);
 }
 
 double test_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_max_pd(
+// CHECK-LABEL: test_mm512_mask_reduce_max_pd
 // CHECK:    select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
-// CHECK:    call nnan double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}})
+// CHECK:    call nnan {{.*}}double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}})
   return _mm512_mask_reduce_max_pd(__M, __W);
 }
 
 long long test_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_min_epi64(
+// CHECK-LABEL: test_mm512_mask_reduce_min_epi64
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-// CHECK:    call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %{{.*}})
+// CHECK:    call {{.*}}i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %{{.*}})
   return _mm512_mask_reduce_min_epi64(__M, __W);
 }
 
 unsigned long long test_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_min_epu64(
+// CHECK-LABEL: test_mm512_mask_reduce_min_epu64
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-// CHECK:    call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %{{.*}})
+// CHECK:    call {{.*}}i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %{{.*}})
   return _mm512_mask_reduce_min_epu64(__M, __W);
 }
 
 double test_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_min_pd(
+// CHECK-LABEL: test_mm512_mask_reduce_min_pd
 // CHECK:    select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
-// CHECK:    call nnan double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}})
+// CHECK:    call nnan {{.*}}double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}})
   return _mm512_mask_reduce_min_pd(__M, __W);
 }
 
 int test_mm512_reduce_max_epi32(__m512i __W){
-// CHECK-LABEL: @test_mm512_reduce_max_epi32(
-// CHECK:    call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_max_epi32
+// CHECK:    call {{.*}}i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %{{.*}})
   return _mm512_reduce_max_epi32(__W);
 }
+TEST_CONSTEXPR(_mm512_reduce_max_epi32((__m512i)(__v16si){-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7}) == 7);
 
 unsigned int test_mm512_reduce_max_epu32(__m512i __W){
-// CHECK-LABEL: @test_mm512_reduce_max_epu32(
-// CHECK:    call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_max_epu32
+// CHECK:    call {{.*}}i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %{{.*}})
   return _mm512_reduce_max_epu32(__W);
 }
+TEST_CONSTEXPR(_mm512_reduce_max_epu32((__m512i)(__v16su){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}) == 15);
 
 float test_mm512_reduce_max_ps(__m512 __W){
-// CHECK-LABEL: @test_mm512_reduce_max_ps(
-// CHECK:    call nnan float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_max_ps
+// CHECK:    call nnan {{.*}}float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}})
   return _mm512_reduce_max_ps(__W);
 }
 
 int test_mm512_reduce_min_epi32(__m512i __W){
-// CHECK-LABEL: @test_mm512_reduce_min_epi32(
-// CHECK:    call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_min_epi32
+// CHECK:    call {{.*}}i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %{{.*}})
   return _mm512_reduce_min_epi32(__W);
 }
+TEST_CONSTEXPR(_mm512_reduce_min_epi32((__m512i)(__v16si){-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7}) == -8);
 
 unsigned int test_mm512_reduce_min_epu32(__m512i __W){
-// CHECK-LABEL: @test_mm512_reduce_min_epu32(
-// CHECK:    call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_min_epu32
+// CHECK:    call {{.*}}i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %{{.*}})
   return _mm512_reduce_min_epu32(__W);
 }
+TEST_CONSTEXPR(_mm512_reduce_min_epu32((__m512i)(__v16su){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}) == 0);
 
 float test_mm512_reduce_min_ps(__m512 __W){
-// CHECK-LABEL: @test_mm512_reduce_min_ps(
-// CHECK:    call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}})
+// CHECK-LABEL: test_mm512_reduce_min_ps
+// CHECK:    call nnan {{.*}}float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}})
   return _mm512_reduce_min_ps(__W);
 }
 
 int test_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_max_epi32(
+// CHECK-LABEL: test_mm512_mask_reduce_max_epi32
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-// CHECK:    call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %{{.*}})
+// CHECK:    call {{.*}}i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %{{.*}})
   return _mm512_mask_reduce_max_epi32(__M, __W);
 }
 
 unsigned int test_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_max_epu32(
+// CHECK-LABEL: test_mm512_mask_reduce_max_epu32
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-// CHECK:    call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %{{.*}})
+// CHECK:    call {{.*}}i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %{{.*}})
   return _mm512_mask_reduce_max_epu32(__M, __W);
 }
 
 float test_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_max_ps(
+// CHECK-LABEL: test_mm512_mask_reduce_max_ps
 // CHECK:    select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
-// CHECK:    call nnan float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}})
+// CHECK:    call nnan {{.*}}float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}})
   return _mm512_mask_reduce_max_ps(__M, __W);
 }
 
 int test_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_min_epi32(
+// CHECK-LABEL: test_mm512_mask_reduce_min_epi32
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-// CHECK:    call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %{{.*}})
+// CHECK:    call {{.*}}i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %{{.*}})
   return _mm512_mask_reduce_min_epi32(__M, __W);
 }
 
 unsigned int test_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_min_epu32(
+// CHECK-LABEL: test_mm512_mask_reduce_min_epu32
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-// CHECK:    call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %{{.*}})
+// CHECK:    call {{.*}}i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %{{.*}})
   return _mm512_mask_reduce_min_epu32(__M, __W);
 }
 
 float test_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __W){
-// CHECK-LABEL: @test_mm512_mask_reduce_min_ps(
+// CHECK-LABEL: test_mm512_mask_reduce_min_ps
 // CHECK:    select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
-// CHECK:    call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}})
+// CHECK:    call nnan {{.*}}float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}})
   return _mm512_mask_reduce_min_ps(__M, __W);
 }
-
diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index 39fb92d..1c01695 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -1,18 +1,21 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
 
 __mmask8 test_knot_mask8(__mmask8 a) {
-  // CHECK-LABEL: @test_knot_mask8
+  // CHECK-LABEL: test_knot_mask8
   // CHECK: [[IN:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[NOT:%.*]] = xor <8 x i1> [[IN]], splat (i1 true)
   return _knot_mask8(a);
 }
 
 __mmask8 test_kand_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
-  // CHECK-LABEL: @test_kand_mask8
+  // CHECK-LABEL: test_kand_mask8
   // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RES:%.*]] = and <8 x i1> [[LHS]], [[RHS]]
@@ -22,7 +25,7 @@ __mmask8 test_kand_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m
 }
 
 __mmask8 test_kandn_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
-  // CHECK-LABEL: @test_kandn_mask8
+  // CHECK-LABEL: test_kandn_mask8
   // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], splat (i1 true)
@@ -33,7 +36,7 @@ __mmask8 test_kandn_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __
 }
 
 __mmask8 test_kor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
-  // CHECK-LABEL: @test_kor_mask8
+  // CHECK-LABEL: test_kor_mask8
   // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RES:%.*]] = or <8 x i1> [[LHS]], [[RHS]]
@@ -43,7 +46,7 @@ __mmask8 test_kor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m5
 }
 
 __mmask8 test_kxnor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
-  // CHECK-LABEL: @test_kxnor_mask8
+  // CHECK-LABEL: test_kxnor_mask8
   // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], splat (i1 true)
@@ -54,7 +57,7 @@ __mmask8 test_kxnor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __
 }
 
 __mmask8 test_kxor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
-  // CHECK-LABEL: @test_kxor_mask8
+  // CHECK-LABEL: test_kxor_mask8
   // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RES:%.*]] = xor <8 x i1> [[LHS]], [[RHS]]
@@ -64,7 +67,7 @@ __mmask8 test_kxor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m
 }
 
 unsigned char test_kortestz_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
-  // CHECK-LABEL: @test_kortestz_mask8_u8
+  // CHECK-LABEL: test_kortestz_mask8_u8
   // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[OR:%.*]] = or <8 x i1> [[LHS]], [[RHS]]
@@ -77,7 +80,7 @@ unsigned char test_kortestz_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m5
 }
 
 unsigned char test_kortestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
-  // CHECK-LABEL: @test_kortestc_mask8_u8
+  // CHECK-LABEL: test_kortestc_mask8_u8
   // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[OR:%.*]] = or <8 x i1> [[LHS]], [[RHS]]
@@ -90,7 +93,7 @@ unsigned char test_kortestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m5
 }
 
 unsigned char test_kortest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
-  // CHECK-LABEL: @test_kortest_mask8_u8
+  // CHECK-LABEL: test_kortest_mask8_u8
   // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[OR:%.*]] = or <8 x i1> [[LHS]], [[RHS]]
@@ -110,7 +113,7 @@ unsigned char test_kortest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m51
 }
 
 unsigned char test_ktestz_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
-  // CHECK-LABEL: @test_ktestz_mask8_u8
+  // CHECK-LABEL: test_ktestz_mask8_u8
   // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.b(<8 x i1> [[LHS]], <8 x i1> [[RHS]])
@@ -120,7 +123,7 @@ unsigned char test_ktestz_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512
 }
 
 unsigned char test_ktestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
-  // CHECK-LABEL: @test_ktestc_mask8_u8
+  // CHECK-LABEL: test_ktestc_mask8_u8
   // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.b(<8 x i1> [[LHS]], <8 x i1> [[RHS]])
@@ -130,7 +133,7 @@ unsigned char test_ktestc_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512
 }
 
 unsigned char test_ktest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
-  // CHECK-LABEL: @test_ktest_mask8_u8
+  // CHECK-LABEL: test_ktest_mask8_u8
   // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.b(<8 x i1> [[LHS]], <8 x i1> [[RHS]])
@@ -144,7 +147,7 @@ unsigned char test_ktest_mask8_u8(__m512i __A, __m512i __B, __m512i __C, __m512i
 }
 
 unsigned char test_ktestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
-  // CHECK-LABEL: @test_ktestz_mask16_u8
+  // CHECK-LABEL: test_ktestz_mask16_u8
   // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
   // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
   // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestz.w(<16 x i1> [[LHS]], <16 x i1> [[RHS]])
@@ -154,7 +157,7 @@ unsigned char test_ktestz_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m51
 }
 
 unsigned char test_ktestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
-  // CHECK-LABEL: @test_ktestc_mask16_u8
+  // CHECK-LABEL: test_ktestc_mask16_u8
   // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
   // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
   // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.w(<16 x i1> [[LHS]], <16 x i1> [[RHS]])
@@ -164,7 +167,7 @@ unsigned char test_ktestc_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m51
 }
 
 unsigned char test_ktest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, unsigned char *CF) {
-  // CHECK-LABEL: @test_ktest_mask16_u8
+  // CHECK-LABEL: test_ktest_mask16_u8
   // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
   // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
   // CHECK: [[RES:%.*]] = call i32 @llvm.x86.avx512.ktestc.w(<16 x i1> [[LHS]], <16 x i1> [[RHS]])
@@ -178,7 +181,7 @@ unsigned char test_ktest_mask16_u8(__m512i __A, __m512i __B, __m512i __C, __m512
 }
 
 __mmask8 test_kadd_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
-  // CHECK-LABEL: @test_kadd_mask8
+  // CHECK-LABEL: test_kadd_mask8
   // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RES:%.*]] = call <8 x i1> @llvm.x86.avx512.kadd.b(<8 x i1> [[LHS]], <8 x i1> [[RHS]])
@@ -188,7 +191,7 @@ __mmask8 test_kadd_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m
 }
 
 __mmask16 test_kadd_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
-  // CHECK-LABEL: @test_kadd_mask16
+  // CHECK-LABEL: test_kadd_mask16
   // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
   // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
   // CHECK: [[RES:%.*]] = call <16 x i1> @llvm.x86.avx512.kadd.w(<16 x i1> [[LHS]], <16 x i1> [[RHS]])
@@ -198,71 +201,71 @@ __mmask16 test_kadd_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _
 }
 
 __mmask8 test_kshiftli_mask8(__m512i A, __m512i B, __m512i C, __m512i D) {
-  // CHECK-LABEL: @test_kshiftli_mask8
+  // CHECK-LABEL: test_kshiftli_mask8
   // CHECK: [[VAL:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RES:%.*]] = shufflevector <8 x i1> zeroinitializer, <8 x i1> [[VAL]], <8 x i32> <i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13>
   return _mm512_mask_cmpneq_epu64_mask(_kshiftli_mask8(_mm512_cmpneq_epu64_mask(A, B), 2), C, D);
 }
 
 __mmask8 test_kshiftri_mask8(__m512i A, __m512i B, __m512i C, __m512i D) {
-  // CHECK-LABEL: @test_kshiftri_mask8
+  // CHECK-LABEL: test_kshiftri_mask8
   // CHECK: [[VAL:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: [[RES:%.*]] = shufflevector <8 x i1> [[VAL]], <8 x i1> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
   return _mm512_mask_cmpneq_epu64_mask(_kshiftri_mask8(_mm512_cmpneq_epu64_mask(A, B), 2), C, D);
 }
 
 unsigned int test_cvtmask8_u32(__m512i A, __m512i B) {
-  // CHECK-LABEL: @test_cvtmask8_u32
+  // CHECK-LABEL: test_cvtmask8_u32
   // CHECK: zext i8 %{{.*}} to i32
   return _cvtmask8_u32(_mm512_cmpneq_epu64_mask(A, B));
 }
 
 __mmask8 test_cvtu32_mask8(__m512i A, __m512i B, unsigned int C) {
-  // CHECK-LABEL: @test_cvtu32_mask8
+  // CHECK-LABEL: test_cvtu32_mask8
   // CHECK: trunc i32 %{{.*}} to i8
   return _mm512_mask_cmpneq_epu64_mask(_cvtu32_mask8(C), A, B);
 }
 
 __mmask8 test_load_mask8(__mmask8 *A, __m512i B, __m512i C) {
-  // CHECK-LABEL: @test_load_mask8
+  // CHECK-LABEL: test_load_mask8
   // CHECK: [[LOAD:%.*]] = load i8, ptr %{{.*}}
   return _mm512_mask_cmpneq_epu64_mask(_load_mask8(A), B, C);
 }
 
 void test_store_mask8(__mmask8 *A, __m512i B, __m512i C) {
-  // CHECK-LABEL: @test_store_mask8
+  // CHECK-LABEL: test_store_mask8
   // CHECK: store i8 %{{.*}}, ptr %{{.*}}
   _store_mask8(A, _mm512_cmpneq_epu64_mask(B, C));
 }
 
 __m512i test_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
-  // CHECK-LABEL: @test_mm512_mullo_epi64
+  // CHECK-LABEL: test_mm512_mullo_epi64
   // CHECK: mul <8 x i64>
   return (__m512i) _mm512_mullo_epi64(__A, __B);
 }
 
 __m512i test_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
-  // CHECK-LABEL: @test_mm512_mask_mullo_epi64
+  // CHECK-LABEL: test_mm512_mask_mullo_epi64
   // CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return (__m512i) _mm512_mask_mullo_epi64(__W, __U, __A, __B);
 }
 
 __m512i test_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) {
-  // CHECK-LABEL: @test_mm512_maskz_mullo_epi64
+  // CHECK-LABEL: test_mm512_maskz_mullo_epi64
   // CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return (__m512i) _mm512_maskz_mullo_epi64(__U, __A, __B);
 }
 
 __m512d test_mm512_xor_pd (__m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_xor_pd
+  // CHECK-LABEL: test_mm512_xor_pd
   // CHECK: xor <8 x i64>
   return (__m512d) _mm512_xor_pd(__A, __B);
 }
 
 __m512d test_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_mask_xor_pd
+  // CHECK-LABEL: test_mm512_mask_xor_pd
   // CHECK: xor <8 x i64>
   // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
@@ -271,7 +274,7 @@ __m512d test_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d
 TEST_CONSTEXPR(match_m512d(_mm512_xor_pd((__m512d){-4.0, -5.0, +6.0, +7.0, +7.0, +6.0, -5.0, -4.0}, (__m512d){+0.0, -0.0, -0.0, +7.0, +7.0, -0.0, -0.0, +0.0}), -4.0, +5.0, -6.0, +0.0, +0.0, -6.0, +5.0, -4.0));
 
 __m512d test_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_maskz_xor_pd
+  // CHECK-LABEL: test_mm512_maskz_xor_pd
   // CHECK: xor <8 x i64>
   // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
@@ -279,14 +282,14 @@ __m512d test_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
 }
 
 __m512 test_mm512_xor_ps (__m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_xor_ps
+  // CHECK-LABEL: test_mm512_xor_ps
   // CHECK: xor <16 x i32>
   return (__m512) _mm512_xor_ps(__A, __B);
 }
-TEST_CONSTEXPR(match_m512(_mm512_xor_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -4.0f, +5.0f, -6.0f, +0.0f, +0.0f, -6.0f, +5.0f, -4.0f, -4.0f, +5.0f, -6.0f, +0.0f, +0.0f, -6.0f, +5.0f, -4.0f));
+TEST_CONSTEXPR(match_m512(_mm512_xor_ps((__m512){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m512){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -4.0f, +5.0f, -6.0f, +0.0f, +0.0f, -6.0f, +5.0f, -4.0f, -4.0f, +5.0f, -6.0f, +0.0f, +0.0f, -6.0f, +5.0f, -4.0f));
 
 __m512 test_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_mask_xor_ps
+  // CHECK-LABEL: test_mm512_mask_xor_ps
   // CHECK: xor <16 x i32>
   // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
   // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
@@ -294,7 +297,7 @@ __m512 test_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B
 }
 
 __m512 test_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_maskz_xor_ps
+  // CHECK-LABEL: test_mm512_maskz_xor_ps
   // CHECK: xor <16 x i32>
   // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
   // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
@@ -302,14 +305,14 @@ __m512 test_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
 }
 
 __m512d test_mm512_or_pd (__m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_or_pd
+  // CHECK-LABEL: test_mm512_or_pd
   // CHECK: or <8 x i64>
   return (__m512d) _mm512_or_pd(__A, __B);
 }
 TEST_CONSTEXPR(match_m512d(_mm512_or_pd((__m512d){-4.0, -5.0, +6.0, +7.0, +7.0, +6.0, -5.0, -4.0}, (__m512d){+0.0, -0.0, -0.0, +7.0, +7.0, -0.0, -0.0, +0.0}), -4.0, -5.0, -6.0, +7.0, +7.0, -6.0, -5.0, -4.0));
 
 __m512d test_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_mask_or_pd
+  // CHECK-LABEL: test_mm512_mask_or_pd
   // CHECK: or <8 x i64>
   // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
@@ -317,7 +320,7 @@ __m512d test_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d _
 }
 
 __m512d test_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_maskz_or_pd
+  // CHECK-LABEL: test_mm512_maskz_or_pd
   // CHECK: or <8 x i64>
   // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
@@ -325,14 +328,14 @@ __m512d test_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
 }
 
 __m512 test_mm512_or_ps (__m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_or_ps
+  // CHECK-LABEL: test_mm512_or_ps
   // CHECK: or <16 x i32>
   return (__m512) _mm512_or_ps(__A, __B);
 }
-TEST_CONSTEXPR(match_m512(_mm512_or_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, -4.0f, -4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, -4.0f));
+TEST_CONSTEXPR(match_m512(_mm512_or_ps((__m512){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m512){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, -4.0f, -4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, -4.0f));
 
 __m512 test_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_mask_or_ps
+  // CHECK-LABEL: test_mm512_mask_or_ps
   // CHECK: or <16 x i32>
   // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
   // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
@@ -340,7 +343,7 @@ __m512 test_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
 }
 
 __m512 test_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_maskz_or_ps
+  // CHECK-LABEL: test_mm512_maskz_or_ps
   // CHECK: or <16 x i32>
   // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
   // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
@@ -348,14 +351,14 @@ __m512 test_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
 }
 
 __m512d test_mm512_and_pd (__m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_and_pd
+  // CHECK-LABEL: test_mm512_and_pd
   // CHECK: and <8 x i64>
   return (__m512d) _mm512_and_pd(__A, __B);
 }
 TEST_CONSTEXPR(match_m512d(_mm512_and_pd((__m512d){-4.0, -5.0, +6.0, +7.0, +7.0, +6.0, -5.0, -4.0}, (__m512d){+0.0, -0.0, -0.0, +7.0, +7.0, -0.0, -0.0, +0.0}), -0.0, -0.0, +0.0, +7.0, +7.0, +0.0, -0.0, -0.0));
 
 __m512d test_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_mask_and_pd
+  // CHECK-LABEL: test_mm512_mask_and_pd
   // CHECK: and <8 x i64>
   // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
@@ -363,7 +366,7 @@ __m512d test_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d
 }
 
 __m512d test_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_maskz_and_pd
+  // CHECK-LABEL: test_mm512_maskz_and_pd
   // CHECK: and <8 x i64>
   // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}}
@@ -371,14 +374,14 @@ __m512d test_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
 }
 
 __m512 test_mm512_and_ps (__m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_and_ps
+  // CHECK-LABEL: test_mm512_and_ps
   // CHECK: and <16 x i32>
   return (__m512) _mm512_and_ps(__A, __B);
 }
-TEST_CONSTEXPR(match_m512(_mm512_and_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -0.0f, -0.0f, +0.0f, +7.0f, +7.0f, +0.0f, -0.0f, -0.0f, -0.0f, -0.0f, +0.0f, +7.0f, +7.0f, +0.0f, -0.0f, -0.0f));
+TEST_CONSTEXPR(match_m512(_mm512_and_ps((__m512){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m512){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), -0.0f, -0.0f, +0.0f, +7.0f, +7.0f, +0.0f, -0.0f, -0.0f, -0.0f, -0.0f, +0.0f, +7.0f, +7.0f, +0.0f, -0.0f, -0.0f));
 
 __m512 test_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_mask_and_ps
+  // CHECK-LABEL: test_mm512_mask_and_ps
   // CHECK: and <16 x i32>
   // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
   // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
@@ -386,7 +389,7 @@ __m512 test_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B
 }
 
 __m512 test_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_maskz_and_ps
+  // CHECK-LABEL: test_mm512_maskz_and_ps
   // CHECK: and <16 x i32>
   // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1>
   // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}}
@@ -394,7 +397,7 @@ __m512 test_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {
 }
 
 __m512d test_mm512_andnot_pd (__m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_andnot_pd
+  // CHECK-LABEL: test_mm512_andnot_pd
   // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1)
   // CHECK: and <8 x i64>
   return (__m512d) _mm512_andnot_pd(__A, __B);
@@ -402,7 +405,7 @@ __m512d test_mm512_andnot_pd (__m512d __A, __m512d __B) {
 TEST_CONSTEXPR(match_m512d(_mm512_andnot_pd((__m512d){-4.0, -5.0, +6.0, +7.0, +7.0, +6.0, -5.0, -4.0}, (__m512d){+0.0, -0.0, -0.0, +7.0, +7.0, -0.0, -0.0, +0.0}), +0.0, +0.0, +0.0, +0.0, +0.0, +0.0, +0.0, +0.0));
 
 __m512d test_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_mask_andnot_pd
+  // CHECK-LABEL: test_mm512_mask_andnot_pd
   // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1)
   // CHECK: and <8 x i64> %{{.*}}, %{{.*}}
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
@@ -410,7 +413,7 @@ __m512d test_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m51
 }
 
 __m512d test_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_maskz_andnot_pd
+  // CHECK-LABEL: test_mm512_maskz_andnot_pd
   // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1)
   // CHECK: and <8 x i64> %{{.*}}, %{{.*}}
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
@@ -418,15 +421,15 @@ __m512d test_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
 }
 
 __m512 test_mm512_andnot_ps (__m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_andnot_ps
+  // CHECK-LABEL: test_mm512_andnot_ps
   // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1)
   // CHECK: and <16 x i32>
   return (__m512) _mm512_andnot_ps(__A, __B);
 }
-TEST_CONSTEXPR(match_m512(_mm512_andnot_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f));
+TEST_CONSTEXPR(match_m512(_mm512_andnot_ps((__m512){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f, -4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m512){+0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +7.0f, +7.0f, -0.0f, -0.0f, +0.0f}), +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f));
 
 __m512 test_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_mask_andnot_ps
+  // CHECK-LABEL: test_mm512_mask_andnot_ps
   // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1)
   // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
@@ -434,7 +437,7 @@ __m512 test_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512
 }
 
 __m512 test_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_maskz_andnot_ps
+  // CHECK-LABEL: test_mm512_maskz_andnot_ps
   // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1)
   // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
@@ -442,491 +445,491 @@ __m512 test_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
 }
 
 __m512i test_mm512_cvtpd_epi64(__m512d __A) {
-  // CHECK-LABEL: @test_mm512_cvtpd_epi64
+  // CHECK-LABEL: test_mm512_cvtpd_epi64
   // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
   return _mm512_cvtpd_epi64(__A); 
 }
 
 __m512i test_mm512_mask_cvtpd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvtpd_epi64
+  // CHECK-LABEL: test_mm512_mask_cvtpd_epi64
   // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
   return _mm512_mask_cvtpd_epi64(__W, __U, __A); 
 }
 
 __m512i test_mm512_maskz_cvtpd_epi64(__mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvtpd_epi64
+  // CHECK-LABEL: test_mm512_maskz_cvtpd_epi64
   // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
   return _mm512_maskz_cvtpd_epi64(__U, __A); 
 }
 
 __m512i test_mm512_cvt_roundpd_epi64(__m512d __A) {
-  // CHECK-LABEL: @test_mm512_cvt_roundpd_epi64
+  // CHECK-LABEL: test_mm512_cvt_roundpd_epi64
   // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
   return _mm512_cvt_roundpd_epi64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512i test_mm512_mask_cvt_roundpd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_epi64
+  // CHECK-LABEL: test_mm512_mask_cvt_roundpd_epi64
   // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
   return _mm512_mask_cvt_roundpd_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512i test_mm512_maskz_cvt_roundpd_epi64(__mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_epi64
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundpd_epi64
   // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
   return _mm512_maskz_cvt_roundpd_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512i test_mm512_cvtpd_epu64(__m512d __A) {
-  // CHECK-LABEL: @test_mm512_cvtpd_epu64
+  // CHECK-LABEL: test_mm512_cvtpd_epu64
   // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
   return _mm512_cvtpd_epu64(__A); 
 }
 
 __m512i test_mm512_mask_cvtpd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvtpd_epu64
+  // CHECK-LABEL: test_mm512_mask_cvtpd_epu64
   // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
   return _mm512_mask_cvtpd_epu64(__W, __U, __A); 
 }
 
 __m512i test_mm512_maskz_cvtpd_epu64(__mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvtpd_epu64
+  // CHECK-LABEL: test_mm512_maskz_cvtpd_epu64
   // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
   return _mm512_maskz_cvtpd_epu64(__U, __A); 
 }
 
 __m512i test_mm512_cvt_roundpd_epu64(__m512d __A) {
-  // CHECK-LABEL: @test_mm512_cvt_roundpd_epu64
+  // CHECK-LABEL: test_mm512_cvt_roundpd_epu64
   // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
   return _mm512_cvt_roundpd_epu64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512i test_mm512_mask_cvt_roundpd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_epu64
+  // CHECK-LABEL: test_mm512_mask_cvt_roundpd_epu64
   // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
   return _mm512_mask_cvt_roundpd_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512i test_mm512_maskz_cvt_roundpd_epu64(__mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_epu64
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundpd_epu64
   // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
   return _mm512_maskz_cvt_roundpd_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512i test_mm512_cvtps_epi64(__m256 __A) {
-  // CHECK-LABEL: @test_mm512_cvtps_epi64
+  // CHECK-LABEL: test_mm512_cvtps_epi64
   // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
   return _mm512_cvtps_epi64(__A); 
 }
 
 __m512i test_mm512_mask_cvtps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvtps_epi64
+  // CHECK-LABEL: test_mm512_mask_cvtps_epi64
   // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
   return _mm512_mask_cvtps_epi64(__W, __U, __A); 
 }
 
 __m512i test_mm512_maskz_cvtps_epi64(__mmask8 __U, __m256 __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvtps_epi64
+  // CHECK-LABEL: test_mm512_maskz_cvtps_epi64
   // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
   return _mm512_maskz_cvtps_epi64(__U, __A); 
 }
 
 __m512i test_mm512_cvt_roundps_epi64(__m256 __A) {
-  // CHECK-LABEL: @test_mm512_cvt_roundps_epi64
+  // CHECK-LABEL: test_mm512_cvt_roundps_epi64
   // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
   return _mm512_cvt_roundps_epi64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512i test_mm512_mask_cvt_roundps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvt_roundps_epi64
+  // CHECK-LABEL: test_mm512_mask_cvt_roundps_epi64
   // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
   return _mm512_mask_cvt_roundps_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512i test_mm512_maskz_cvt_roundps_epi64(__mmask8 __U, __m256 __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_epi64
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundps_epi64
   // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
   return _mm512_maskz_cvt_roundps_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512i test_mm512_cvtps_epu64(__m256 __A) {
-  // CHECK-LABEL: @test_mm512_cvtps_epu64
+  // CHECK-LABEL: test_mm512_cvtps_epu64
   // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
   return _mm512_cvtps_epu64(__A); 
 }
 
 __m512i test_mm512_mask_cvtps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvtps_epu64
+  // CHECK-LABEL: test_mm512_mask_cvtps_epu64
   // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
   return _mm512_mask_cvtps_epu64(__W, __U, __A); 
 }
 
 __m512i test_mm512_maskz_cvtps_epu64(__mmask8 __U, __m256 __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvtps_epu64
+  // CHECK-LABEL: test_mm512_maskz_cvtps_epu64
   // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
   return _mm512_maskz_cvtps_epu64(__U, __A); 
 }
 
 __m512i test_mm512_cvt_roundps_epu64(__m256 __A) {
-  // CHECK-LABEL: @test_mm512_cvt_roundps_epu64
+  // CHECK-LABEL: test_mm512_cvt_roundps_epu64
   // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
   return _mm512_cvt_roundps_epu64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512i test_mm512_mask_cvt_roundps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvt_roundps_epu64
+  // CHECK-LABEL: test_mm512_mask_cvt_roundps_epu64
   // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
   return _mm512_mask_cvt_roundps_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512i test_mm512_maskz_cvt_roundps_epu64(__mmask8 __U, __m256 __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_epu64
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundps_epu64
   // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
   return _mm512_maskz_cvt_roundps_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512d test_mm512_cvtepi64_pd(__m512i __A) {
-  // CHECK-LABEL: @test_mm512_cvtepi64_pd
+  // CHECK-LABEL: test_mm512_cvtepi64_pd
   // CHECK: sitofp <8 x i64> %{{.*}} to <8 x double>
   return _mm512_cvtepi64_pd(__A); 
 }
 
 __m512d test_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvtepi64_pd
+  // CHECK-LABEL: test_mm512_mask_cvtepi64_pd
   // CHECK: sitofp <8 x i64> %{{.*}} to <8 x double>
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_cvtepi64_pd(__W, __U, __A); 
 }
 
 __m512d test_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvtepi64_pd
+  // CHECK-LABEL: test_mm512_maskz_cvtepi64_pd
   // CHECK: sitofp <8 x i64> %{{.*}} to <8 x double>
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_cvtepi64_pd(__U, __A); 
 }
 
 __m512d test_mm512_cvt_roundepi64_pd(__m512i __A) {
-  // CHECK-LABEL: @test_mm512_cvt_roundepi64_pd
+  // CHECK-LABEL: test_mm512_cvt_roundepi64_pd
   // CHECK: @llvm.x86.avx512.sitofp.round.v8f64.v8i64
   return _mm512_cvt_roundepi64_pd(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512d test_mm512_mask_cvt_roundepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvt_roundepi64_pd
+  // CHECK-LABEL: test_mm512_mask_cvt_roundepi64_pd
   // CHECK: @llvm.x86.avx512.sitofp.round.v8f64.v8i64
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_cvt_roundepi64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512d test_mm512_maskz_cvt_roundepi64_pd(__mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvt_roundepi64_pd
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundepi64_pd
   // CHECK: @llvm.x86.avx512.sitofp.round.v8f64.v8i64
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_cvt_roundepi64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_cvtepi64_ps(__m512i __A) {
-  // CHECK-LABEL: @test_mm512_cvtepi64_ps
+  // CHECK-LABEL: test_mm512_cvtepi64_ps
   // CHECK: sitofp <8 x i64> %{{.*}} to <8 x float>
   return _mm512_cvtepi64_ps(__A); 
 }
 
 __m256 test_mm512_mask_cvtepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvtepi64_ps
+  // CHECK-LABEL: test_mm512_mask_cvtepi64_ps
   // CHECK: sitofp <8 x i64> %{{.*}} to <8 x float>
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_mask_cvtepi64_ps(__W, __U, __A); 
 }
 
 __m256 test_mm512_maskz_cvtepi64_ps(__mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvtepi64_ps
+  // CHECK-LABEL: test_mm512_maskz_cvtepi64_ps
   // CHECK: sitofp <8 x i64> %{{.*}} to <8 x float>
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_maskz_cvtepi64_ps(__U, __A); 
 }
 
 __m256 test_mm512_cvt_roundepi64_ps(__m512i __A) {
-  // CHECK-LABEL: @test_mm512_cvt_roundepi64_ps
+  // CHECK-LABEL: test_mm512_cvt_roundepi64_ps
   // CHECK: @llvm.x86.avx512.sitofp.round.v8f32.v8i64
   return _mm512_cvt_roundepi64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_mask_cvt_roundepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvt_roundepi64_ps
+  // CHECK-LABEL: test_mm512_mask_cvt_roundepi64_ps
   // CHECK: @llvm.x86.avx512.sitofp.round.v8f32.v8i64
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_mask_cvt_roundepi64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_maskz_cvt_roundepi64_ps(__mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvt_roundepi64_ps
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundepi64_ps
   // CHECK: @llvm.x86.avx512.sitofp.round.v8f32.v8i64
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_maskz_cvt_roundepi64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512i test_mm512_cvttpd_epi64(__m512d __A) {
-  // CHECK-LABEL: @test_mm512_cvttpd_epi64
+  // CHECK-LABEL: test_mm512_cvttpd_epi64
   // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
   return _mm512_cvttpd_epi64(__A); 
 }
 
 __m512i test_mm512_mask_cvttpd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvttpd_epi64
+  // CHECK-LABEL: test_mm512_mask_cvttpd_epi64
   // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
   return _mm512_mask_cvttpd_epi64(__W, __U, __A); 
 }
 
 __m512i test_mm512_maskz_cvttpd_epi64(__mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvttpd_epi64
+  // CHECK-LABEL: test_mm512_maskz_cvttpd_epi64
   // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
   return _mm512_maskz_cvttpd_epi64(__U, __A); 
 }
 
 __m512i test_mm512_cvtt_roundpd_epi64(__m512d __A) {
-  // CHECK-LABEL: @test_mm512_cvtt_roundpd_epi64
+  // CHECK-LABEL: test_mm512_cvtt_roundpd_epi64
   // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
   return _mm512_cvtt_roundpd_epi64(__A, _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_mask_cvtt_roundpd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvtt_roundpd_epi64
+  // CHECK-LABEL: test_mm512_mask_cvtt_roundpd_epi64
   // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
   return _mm512_mask_cvtt_roundpd_epi64(__W, __U, __A, _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_maskz_cvtt_roundpd_epi64(__mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvtt_roundpd_epi64
+  // CHECK-LABEL: test_mm512_maskz_cvtt_roundpd_epi64
   // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
   return _mm512_maskz_cvtt_roundpd_epi64(__U, __A, _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_cvttpd_epu64(__m512d __A) {
-  // CHECK-LABEL: @test_mm512_cvttpd_epu64
+  // CHECK-LABEL: test_mm512_cvttpd_epu64
   // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
   return _mm512_cvttpd_epu64(__A); 
 }
 
 __m512i test_mm512_mask_cvttpd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvttpd_epu64
+  // CHECK-LABEL: test_mm512_mask_cvttpd_epu64
   // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
   return _mm512_mask_cvttpd_epu64(__W, __U, __A); 
 }
 
 __m512i test_mm512_maskz_cvttpd_epu64(__mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvttpd_epu64
+  // CHECK-LABEL: test_mm512_maskz_cvttpd_epu64
   // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
   return _mm512_maskz_cvttpd_epu64(__U, __A); 
 }
 
 __m512i test_mm512_cvtt_roundpd_epu64(__m512d __A) {
-  // CHECK-LABEL: @test_mm512_cvtt_roundpd_epu64
+  // CHECK-LABEL: test_mm512_cvtt_roundpd_epu64
   // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
   return _mm512_cvtt_roundpd_epu64(__A, _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_mask_cvtt_roundpd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvtt_roundpd_epu64
+  // CHECK-LABEL: test_mm512_mask_cvtt_roundpd_epu64
   // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
   return _mm512_mask_cvtt_roundpd_epu64(__W, __U, __A, _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_maskz_cvtt_roundpd_epu64(__mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvtt_roundpd_epu64
+  // CHECK-LABEL: test_mm512_maskz_cvtt_roundpd_epu64
   // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
   return _mm512_maskz_cvtt_roundpd_epu64(__U, __A, _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_cvttps_epi64(__m256 __A) {
-  // CHECK-LABEL: @test_mm512_cvttps_epi64
+  // CHECK-LABEL: test_mm512_cvttps_epi64
   // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
   return _mm512_cvttps_epi64(__A); 
 }
 
 __m512i test_mm512_mask_cvttps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvttps_epi64
+  // CHECK-LABEL: test_mm512_mask_cvttps_epi64
   // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
   return _mm512_mask_cvttps_epi64(__W, __U, __A); 
 }
 
 __m512i test_mm512_maskz_cvttps_epi64(__mmask8 __U, __m256 __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvttps_epi64
+  // CHECK-LABEL: test_mm512_maskz_cvttps_epi64
   // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
   return _mm512_maskz_cvttps_epi64(__U, __A); 
 }
 
 __m512i test_mm512_cvtt_roundps_epi64(__m256 __A) {
-  // CHECK-LABEL: @test_mm512_cvtt_roundps_epi64
+  // CHECK-LABEL: test_mm512_cvtt_roundps_epi64
   // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
   return _mm512_cvtt_roundps_epi64(__A, _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_mask_cvtt_roundps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epi64
+  // CHECK-LABEL: test_mm512_mask_cvtt_roundps_epi64
   // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
   return _mm512_mask_cvtt_roundps_epi64(__W, __U, __A, _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_maskz_cvtt_roundps_epi64(__mmask8 __U, __m256 __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epi64
+  // CHECK-LABEL: test_mm512_maskz_cvtt_roundps_epi64
   // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
   return _mm512_maskz_cvtt_roundps_epi64(__U, __A, _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_cvttps_epu64(__m256 __A) {
-  // CHECK-LABEL: @test_mm512_cvttps_epu64
+  // CHECK-LABEL: test_mm512_cvttps_epu64
   // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
   return _mm512_cvttps_epu64(__A); 
 }
 
 __m512i test_mm512_mask_cvttps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvttps_epu64
+  // CHECK-LABEL: test_mm512_mask_cvttps_epu64
   // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
   return _mm512_mask_cvttps_epu64(__W, __U, __A); 
 }
 
 __m512i test_mm512_maskz_cvttps_epu64(__mmask8 __U, __m256 __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvttps_epu64
+  // CHECK-LABEL: test_mm512_maskz_cvttps_epu64
   // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
   return _mm512_maskz_cvttps_epu64(__U, __A); 
 }
 
 __m512i test_mm512_cvtt_roundps_epu64(__m256 __A) {
-  // CHECK-LABEL: @test_mm512_cvtt_roundps_epu64
+  // CHECK-LABEL: test_mm512_cvtt_roundps_epu64
   // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
   return _mm512_cvtt_roundps_epu64(__A, _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_mask_cvtt_roundps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epu64
+  // CHECK-LABEL: test_mm512_mask_cvtt_roundps_epu64
   // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
   return _mm512_mask_cvtt_roundps_epu64(__W, __U, __A, _MM_FROUND_NO_EXC);
 }
 
 __m512i test_mm512_maskz_cvtt_roundps_epu64(__mmask8 __U, __m256 __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epu64
+  // CHECK-LABEL: test_mm512_maskz_cvtt_roundps_epu64
   // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
   return _mm512_maskz_cvtt_roundps_epu64(__U, __A, _MM_FROUND_NO_EXC);
 }
 
 __m512d test_mm512_cvtepu64_pd(__m512i __A) {
-  // CHECK-LABEL: @test_mm512_cvtepu64_pd
+  // CHECK-LABEL: test_mm512_cvtepu64_pd
   // CHECK: uitofp <8 x i64> %{{.*}} to <8 x double>
   return _mm512_cvtepu64_pd(__A); 
 }
 
 __m512d test_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvtepu64_pd
+  // CHECK-LABEL: test_mm512_mask_cvtepu64_pd
   // CHECK: uitofp <8 x i64> %{{.*}} to <8 x double>
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_cvtepu64_pd(__W, __U, __A); 
 }
 
 __m512d test_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvtepu64_pd
+  // CHECK-LABEL: test_mm512_maskz_cvtepu64_pd
   // CHECK: uitofp <8 x i64> %{{.*}} to <8 x double>
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_cvtepu64_pd(__U, __A); 
 }
 
 __m512d test_mm512_cvt_roundepu64_pd(__m512i __A) {
-  // CHECK-LABEL: @test_mm512_cvt_roundepu64_pd
+  // CHECK-LABEL: test_mm512_cvt_roundepu64_pd
   // CHECK: @llvm.x86.avx512.uitofp.round.v8f64.v8i64
   return _mm512_cvt_roundepu64_pd(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512d test_mm512_mask_cvt_roundepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvt_roundepu64_pd
+  // CHECK-LABEL: test_mm512_mask_cvt_roundepu64_pd
   // CHECK: @llvm.x86.avx512.uitofp.round.v8f64.v8i64
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_cvt_roundepu64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512d test_mm512_maskz_cvt_roundepu64_pd(__mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvt_roundepu64_pd
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundepu64_pd
   // CHECK: @llvm.x86.avx512.uitofp.round.v8f64.v8i64
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_cvt_roundepu64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_cvtepu64_ps(__m512i __A) {
-  // CHECK-LABEL: @test_mm512_cvtepu64_ps
+  // CHECK-LABEL: test_mm512_cvtepu64_ps
   // CHECK: uitofp <8 x i64> %{{.*}} to <8 x float>
   return _mm512_cvtepu64_ps(__A); 
 }
 
 __m256 test_mm512_mask_cvtepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvtepu64_ps
+  // CHECK-LABEL: test_mm512_mask_cvtepu64_ps
   // CHECK: uitofp <8 x i64> %{{.*}} to <8 x float>
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_mask_cvtepu64_ps(__W, __U, __A); 
 }
 
 __m256 test_mm512_maskz_cvtepu64_ps(__mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvtepu64_ps
+  // CHECK-LABEL: test_mm512_maskz_cvtepu64_ps
   // CHECK: uitofp <8 x i64> %{{.*}} to <8 x float>
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_maskz_cvtepu64_ps(__U, __A); 
 }
 
 __m256 test_mm512_cvt_roundepu64_ps(__m512i __A) {
-  // CHECK-LABEL: @test_mm512_cvt_roundepu64_ps
+  // CHECK-LABEL: test_mm512_cvt_roundepu64_ps
   // CHECK: @llvm.x86.avx512.uitofp.round.v8f32.v8i64
   return _mm512_cvt_roundepu64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_mask_cvt_roundepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_mask_cvt_roundepu64_ps
+  // CHECK-LABEL: test_mm512_mask_cvt_roundepu64_ps
   // CHECK: @llvm.x86.avx512.uitofp.round.v8f32.v8i64
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_mask_cvt_roundepu64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_maskz_cvt_roundepu64_ps(__mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_maskz_cvt_roundepu64_ps
+  // CHECK-LABEL: test_mm512_maskz_cvt_roundepu64_ps
   // CHECK: @llvm.x86.avx512.uitofp.round.v8f32.v8i64
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_maskz_cvt_roundepu64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512d test_mm512_range_pd(__m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_range_pd
+  // CHECK-LABEL: test_mm512_range_pd
   // CHECK: @llvm.x86.avx512.mask.range.pd.512
   return _mm512_range_pd(__A, __B, 4); 
 }
 
 __m512d test_mm512_mask_range_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_mask_range_pd
+  // CHECK-LABEL: test_mm512_mask_range_pd
   // CHECK: @llvm.x86.avx512.mask.range.pd.512
   return _mm512_mask_range_pd(__W, __U, __A, __B, 4); 
 }
 
 __m512d test_mm512_maskz_range_pd(__mmask8 __U, __m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_maskz_range_pd
+  // CHECK-LABEL: test_mm512_maskz_range_pd
   // CHECK: @llvm.x86.avx512.mask.range.pd.512
   return _mm512_maskz_range_pd(__U, __A, __B, 4); 
 }
 
 __m512d test_mm512_range_round_pd(__m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_range_round_pd
+  // CHECK-LABEL: test_mm512_range_round_pd
   // CHECK: @llvm.x86.avx512.mask.range.pd.512
   return _mm512_range_round_pd(__A, __B, 4, 8); 
 }
 
 __m512d test_mm512_mask_range_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_mask_range_round_pd
+  // CHECK-LABEL: test_mm512_mask_range_round_pd
   // CHECK: @llvm.x86.avx512.mask.range.pd.512
   return _mm512_mask_range_round_pd(__W, __U, __A, __B, 4, 8); 
 }
 
 __m512d test_mm512_maskz_range_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {
-  // CHECK-LABEL: @test_mm512_maskz_range_round_pd
+  // CHECK-LABEL: test_mm512_maskz_range_round_pd
   // CHECK: @llvm.x86.avx512.mask.range.pd.512
   return _mm512_maskz_range_round_pd(__U, __A, __B, 4, 8); 
 }
 
 __m128d test_mm512_range_round_sd(__m128d __A, __m128d __B) {
-  // CHECK-LABEL: @test_mm512_range_round_sd
+  // CHECK-LABEL: test_mm512_range_round_sd
   // CHECK: @llvm.x86.avx512.mask.range.sd
   return _mm_range_round_sd(__A, __B, 4, 8); 
 }
@@ -938,31 +941,31 @@ __m128d test_mm512_mask_range_round_sd(__m128d __W, __mmask8 __U, __m128d __A, _
 }
 
 __m128d test_mm512_maskz_range_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
-  // CHECK-LABEL: @test_mm512_maskz_range_round_sd
+  // CHECK-LABEL: test_mm512_maskz_range_round_sd
   // CHECK: @llvm.x86.avx512.mask.range.sd
   return _mm_maskz_range_round_sd(__U, __A, __B, 4, 8); 
 }
 
 __m128 test_mm512_range_round_ss(__m128 __A, __m128 __B) {
-  // CHECK-LABEL: @test_mm512_range_round_ss
+  // CHECK-LABEL: test_mm512_range_round_ss
   // CHECK: @llvm.x86.avx512.mask.range.ss
   return _mm_range_round_ss(__A, __B, 4, 8); 
 }
 
 __m128 test_mm512_mask_range_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
-  // CHECK-LABEL: @test_mm512_mask_range_round_ss
+  // CHECK-LABEL: test_mm512_mask_range_round_ss
   // CHECK: @llvm.x86.avx512.mask.range.ss
   return _mm_mask_range_round_ss(__W, __U, __A, __B, 4, 8); 
 }
 
 __m128 test_mm512_maskz_range_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
-  // CHECK-LABEL: @test_mm512_maskz_range_round_ss
+  // CHECK-LABEL: test_mm512_maskz_range_round_ss
   // CHECK: @llvm.x86.avx512.mask.range.ss
   return _mm_maskz_range_round_ss(__U, __A, __B, 4, 8); 
 }
 
 __m128d test_mm_range_sd(__m128d __A, __m128d __B) {
-  // CHECK-LABEL: @test_mm_range_sd
+  // CHECK-LABEL: test_mm_range_sd
   // CHECK: @llvm.x86.avx512.mask.range.sd
   return _mm_range_sd(__A, __B, 4); 
 }
@@ -974,558 +977,558 @@ __m128d test_mm_mask_range_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __
 }
 
 __m128d test_mm_maskz_range_sd(__mmask8 __U, __m128d __A, __m128d __B) {
-  // CHECK-LABEL: @test_mm_maskz_range_sd
+  // CHECK-LABEL: test_mm_maskz_range_sd
   // CHECK: @llvm.x86.avx512.mask.range.sd
   return _mm_maskz_range_sd(__U, __A, __B, 4); 
 }
 
 __m128 test_mm_range_ss(__m128 __A, __m128 __B) {
-  // CHECK-LABEL: @test_mm_range_ss
+  // CHECK-LABEL: test_mm_range_ss
   // CHECK: @llvm.x86.avx512.mask.range.ss
   return _mm_range_ss(__A, __B, 4); 
 }
 
 __m128 test_mm_mask_range_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
-  // CHECK-LABEL: @test_mm_mask_range_ss
+  // CHECK-LABEL: test_mm_mask_range_ss
   // CHECK: @llvm.x86.avx512.mask.range.ss
   return _mm_mask_range_ss(__W, __U, __A, __B, 4); 
 }
 
 __m128 test_mm_maskz_range_ss(__mmask8 __U, __m128 __A, __m128 __B) {
-  // CHECK-LABEL: @test_mm_maskz_range_ss
+  // CHECK-LABEL: test_mm_maskz_range_ss
   // CHECK: @llvm.x86.avx512.mask.range.ss
   return _mm_maskz_range_ss(__U, __A, __B, 4); 
 }
 
 __m512 test_mm512_range_ps(__m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_range_ps
+  // CHECK-LABEL: test_mm512_range_ps
   // CHECK: @llvm.x86.avx512.mask.range.ps.512
   return _mm512_range_ps(__A, __B, 4); 
 }
 
 __m512 test_mm512_mask_range_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_mask_range_ps
+  // CHECK-LABEL: test_mm512_mask_range_ps
   // CHECK: @llvm.x86.avx512.mask.range.ps.512
   return _mm512_mask_range_ps(__W, __U, __A, __B, 4); 
 }
 
 __m512 test_mm512_maskz_range_ps(__mmask16 __U, __m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_maskz_range_ps
+  // CHECK-LABEL: test_mm512_maskz_range_ps
   // CHECK: @llvm.x86.avx512.mask.range.ps.512
   return _mm512_maskz_range_ps(__U, __A, __B, 4); 
 }
 
 __m512 test_mm512_range_round_ps(__m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_range_round_ps
+  // CHECK-LABEL: test_mm512_range_round_ps
   // CHECK: @llvm.x86.avx512.mask.range.ps.512
   return _mm512_range_round_ps(__A, __B, 4, 8); 
 }
 
 __m512 test_mm512_mask_range_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_mask_range_round_ps
+  // CHECK-LABEL: test_mm512_mask_range_round_ps
   // CHECK: @llvm.x86.avx512.mask.range.ps.512
   return _mm512_mask_range_round_ps(__W, __U, __A, __B, 4, 8); 
 }
 
 __m512 test_mm512_maskz_range_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {
-  // CHECK-LABEL: @test_mm512_maskz_range_round_ps
+  // CHECK-LABEL: test_mm512_maskz_range_round_ps
   // CHECK: @llvm.x86.avx512.mask.range.ps.512
   return _mm512_maskz_range_round_ps(__U, __A, __B, 4, 8); 
 }
 
 __m512d test_mm512_reduce_pd(__m512d __A) {
-  // CHECK-LABEL: @test_mm512_reduce_pd
+  // CHECK-LABEL: test_mm512_reduce_pd
   // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
   return _mm512_reduce_pd(__A, 4); 
 }
 
 __m512d test_mm512_mask_reduce_pd(__m512d __W, __mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_mask_reduce_pd
+  // CHECK-LABEL: test_mm512_mask_reduce_pd
   // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
   return _mm512_mask_reduce_pd(__W, __U, __A, 4); 
 }
 
 __m512d test_mm512_maskz_reduce_pd(__mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_maskz_reduce_pd
+  // CHECK-LABEL: test_mm512_maskz_reduce_pd
   // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
   return _mm512_maskz_reduce_pd(__U, __A, 4); 
 }
 
 __m512 test_mm512_reduce_ps(__m512 __A) {
-  // CHECK-LABEL: @test_mm512_reduce_ps
+  // CHECK-LABEL: test_mm512_reduce_ps
   // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
   return _mm512_reduce_ps(__A, 4); 
 }
 
 __m512 test_mm512_mask_reduce_ps(__m512 __W, __mmask16 __U, __m512 __A) {
-  // CHECK-LABEL: @test_mm512_mask_reduce_ps
+  // CHECK-LABEL: test_mm512_mask_reduce_ps
   // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
   return _mm512_mask_reduce_ps(__W, __U, __A, 4); 
 }
 
 __m512 test_mm512_maskz_reduce_ps(__mmask16 __U, __m512 __A) {
-  // CHECK-LABEL: @test_mm512_maskz_reduce_ps
+  // CHECK-LABEL: test_mm512_maskz_reduce_ps
   // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
   return _mm512_maskz_reduce_ps(__U, __A, 4); 
 }
 
 __m512d test_mm512_reduce_round_pd(__m512d __A) {
-  // CHECK-LABEL: @test_mm512_reduce_round_pd
+  // CHECK-LABEL: test_mm512_reduce_round_pd
   // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
   return _mm512_reduce_round_pd(__A, 4, 8); 
 }
 
 __m512d test_mm512_mask_reduce_round_pd(__m512d __W, __mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_mask_reduce_round_pd
+  // CHECK-LABEL: test_mm512_mask_reduce_round_pd
   // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
   return _mm512_mask_reduce_round_pd(__W, __U, __A, 4, 8); 
 }
 
 __m512d test_mm512_maskz_reduce_round_pd(__mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_maskz_reduce_round_pd
+  // CHECK-LABEL: test_mm512_maskz_reduce_round_pd
   // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
   return _mm512_maskz_reduce_round_pd(__U, __A, 4, 8);
 }
 
 __m512 test_mm512_reduce_round_ps(__m512 __A) {
-  // CHECK-LABEL: @test_mm512_reduce_round_ps
+  // CHECK-LABEL: test_mm512_reduce_round_ps
   // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
   return _mm512_reduce_round_ps(__A, 4, 8); 
 }
 
 __m512 test_mm512_mask_reduce_round_ps(__m512 __W, __mmask16 __U, __m512 __A) {
-  // CHECK-LABEL: @test_mm512_mask_reduce_round_ps
+  // CHECK-LABEL: test_mm512_mask_reduce_round_ps
   // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
   return _mm512_mask_reduce_round_ps(__W, __U, __A, 4, 8); 
 }
 
 __m512 test_mm512_maskz_reduce_round_ps(__mmask16 __U, __m512 __A) {
-  // CHECK-LABEL: @test_mm512_maskz_reduce_round_ps
+  // CHECK-LABEL: test_mm512_maskz_reduce_round_ps
   // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
   return _mm512_maskz_reduce_round_ps(__U, __A, 4, 8); 
 }
 
 __m128 test_mm_reduce_ss(__m128 __A, __m128 __B) {
-  // CHECK-LABEL: @test_mm_reduce_ss
+  // CHECK-LABEL: test_mm_reduce_ss
   // CHECK: @llvm.x86.avx512.mask.reduce.ss
   return _mm_reduce_ss(__A, __B, 4);
 }
 
 __m128 test_mm_mask_reduce_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
-  // CHECK-LABEL: @test_mm_mask_reduce_ss
+  // CHECK-LABEL: test_mm_mask_reduce_ss
   // CHECK: @llvm.x86.avx512.mask.reduce.ss
   return _mm_mask_reduce_ss(__W, __U, __A, __B, 4);
 }
 
 __m128 test_mm_maskz_reduce_ss(__mmask8 __U, __m128 __A, __m128 __B) {
-  // CHECK-LABEL: @test_mm_maskz_reduce_ss
+  // CHECK-LABEL: test_mm_maskz_reduce_ss
   // CHECK: @llvm.x86.avx512.mask.reduce.ss
   return _mm_maskz_reduce_ss(__U, __A, __B, 4);
 }
 
 __m128 test_mm_reduce_round_ss(__m128 __A, __m128 __B) {
-  // CHECK-LABEL: @test_mm_reduce_round_ss
+  // CHECK-LABEL: test_mm_reduce_round_ss
   // CHECK: @llvm.x86.avx512.mask.reduce.ss
   return _mm_reduce_round_ss(__A, __B, 4, 8);
 }
 
 __m128 test_mm_mask_reduce_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
-  // CHECK-LABEL: @test_mm_mask_reduce_round_ss
+  // CHECK-LABEL: test_mm_mask_reduce_round_ss
   // CHECK: @llvm.x86.avx512.mask.reduce.ss
   return _mm_mask_reduce_round_ss(__W, __U, __A, __B, 4, 8);
 }
 
 __m128 test_mm_maskz_reduce_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
-  // CHECK-LABEL: @test_mm_maskz_reduce_round_ss
+  // CHECK-LABEL: test_mm_maskz_reduce_round_ss
   // CHECK: @llvm.x86.avx512.mask.reduce.ss
   return _mm_maskz_reduce_round_ss(__U, __A, __B, 4, 8);
 }
 
 __m128d test_mm_reduce_sd(__m128d __A, __m128d __B) {
-  // CHECK-LABEL: @test_mm_reduce_sd
+  // CHECK-LABEL: test_mm_reduce_sd
   // CHECK: @llvm.x86.avx512.mask.reduce.sd
   return _mm_reduce_sd(__A, __B, 4);
 }
 
 __m128d test_mm_mask_reduce_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
-  // CHECK-LABEL: @test_mm_mask_reduce_sd
+  // CHECK-LABEL: test_mm_mask_reduce_sd
   // CHECK: @llvm.x86.avx512.mask.reduce.sd
   return _mm_mask_reduce_sd(__W, __U, __A, __B, 4);
 }
 
 __m128d test_mm_maskz_reduce_sd(__mmask8 __U, __m128d __A, __m128d __B) {
-  // CHECK-LABEL: @test_mm_maskz_reduce_sd
+  // CHECK-LABEL: test_mm_maskz_reduce_sd
   // CHECK: @llvm.x86.avx512.mask.reduce.sd
   return _mm_maskz_reduce_sd(__U, __A, __B, 4);
 }
 
 __m128d test_mm_reduce_round_sd(__m128d __A, __m128d __B) {
-  // CHECK-LABEL: @test_mm_reduce_round_sd
+  // CHECK-LABEL: test_mm_reduce_round_sd
   // CHECK: @llvm.x86.avx512.mask.reduce.sd
   return _mm_reduce_round_sd(__A, __B, 4, 8);
 }
 
 __m128d test_mm_mask_reduce_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
-  // CHECK-LABEL: @test_mm_mask_reduce_round_sd
+  // CHECK-LABEL: test_mm_mask_reduce_round_sd
   // CHECK: @llvm.x86.avx512.mask.reduce.sd
   return _mm_mask_reduce_round_sd(__W, __U, __A, __B, 4, 8);
 }
 
 __m128d test_mm_maskz_reduce_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
-  // CHECK-LABEL: @test_mm_maskz_reduce_round_sd
+  // CHECK-LABEL: test_mm_maskz_reduce_round_sd
   // CHECK: @llvm.x86.avx512.mask.reduce.sd
   return _mm_maskz_reduce_round_sd(__U, __A, __B, 4, 8);
 }
 
 __mmask16 test_mm512_movepi32_mask(__m512i __A) {
-  // CHECK-LABEL: @test_mm512_movepi32_mask
+  // CHECK-LABEL: test_mm512_movepi32_mask
   // CHECK: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer
   return _mm512_movepi32_mask(__A); 
 }
 
 __m512i test_mm512_movm_epi32(__mmask16 __A) {
-  // CHECK-LABEL: @test_mm512_movm_epi32
+  // CHECK-LABEL: test_mm512_movm_epi32
   // CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
   // CHECK: %vpmovm2.i = sext <16 x i1> %{{.*}} to <16 x i32>
   return _mm512_movm_epi32(__A); 
 }
 
 __m512i test_mm512_movm_epi64(__mmask8 __A) {
-  // CHECK-LABEL: @test_mm512_movm_epi64
+  // CHECK-LABEL: test_mm512_movm_epi64
   // CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
   // CHECK: %vpmovm2.i = sext <8 x i1> %{{.*}} to <8 x i64>
   return _mm512_movm_epi64(__A); 
 }
 
 __mmask8 test_mm512_movepi64_mask(__m512i __A) {
-  // CHECK-LABEL: @test_mm512_movepi64_mask
+  // CHECK-LABEL: test_mm512_movepi64_mask
   // CHECK: [[CMP:%.*]] = icmp slt <8 x i64> %{{.*}}, zeroinitializer
   return _mm512_movepi64_mask(__A); 
 }
 
 __m512 test_mm512_broadcast_f32x2(__m128 __A) {
-  // CHECK-LABEL: @test_mm512_broadcast_f32x2
+  // CHECK-LABEL: test_mm512_broadcast_f32x2
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   return _mm512_broadcast_f32x2(__A); 
 }
 
 __m512 test_mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A) {
-  // CHECK-LABEL: @test_mm512_mask_broadcast_f32x2
+  // CHECK-LABEL: test_mm512_mask_broadcast_f32x2
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_broadcast_f32x2(__O, __M, __A); 
 }
 
 __m512 test_mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A) {
-  // CHECK-LABEL: @test_mm512_maskz_broadcast_f32x2
+  // CHECK-LABEL: test_mm512_maskz_broadcast_f32x2
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_maskz_broadcast_f32x2(__M, __A); 
 }
 
 __m512 test_mm512_broadcast_f32x8(float const* __A) {
-  // CHECK-LABEL: @test_mm512_broadcast_f32x8
+  // CHECK-LABEL: test_mm512_broadcast_f32x8
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm512_broadcast_f32x8(_mm256_loadu_ps(__A)); 
 }
 
 __m512 test_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, float const* __A) {
-  // CHECK-LABEL: @test_mm512_mask_broadcast_f32x8
+  // CHECK-LABEL: test_mm512_mask_broadcast_f32x8
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_broadcast_f32x8(__O, __M, _mm256_loadu_ps(__A)); 
 }
 
 __m512 test_mm512_maskz_broadcast_f32x8(__mmask16 __M, float const* __A) {
-  // CHECK-LABEL: @test_mm512_maskz_broadcast_f32x8
+  // CHECK-LABEL: test_mm512_maskz_broadcast_f32x8
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_maskz_broadcast_f32x8(__M, _mm256_loadu_ps(__A)); 
 }
 
 __m512d test_mm512_broadcast_f64x2(double const* __A) {
-  // CHECK-LABEL: @test_mm512_broadcast_f64x2
+  // CHECK-LABEL: test_mm512_broadcast_f64x2
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   return _mm512_broadcast_f64x2(_mm_loadu_pd(__A)); 
 }
 
 __m512d test_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, double const* __A) {
-  // CHECK-LABEL: @test_mm512_mask_broadcast_f64x2
+  // CHECK-LABEL: test_mm512_mask_broadcast_f64x2
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_broadcast_f64x2(__O, __M, _mm_loadu_pd(__A)); 
 }
 
 __m512d test_mm512_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) {
-  // CHECK-LABEL: @test_mm512_maskz_broadcast_f64x2
+  // CHECK-LABEL: test_mm512_maskz_broadcast_f64x2
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_broadcast_f64x2(__M, _mm_loadu_pd(__A)); 
 }
 
 __m512i test_mm512_broadcast_i32x2(__m128i __A) {
-  // CHECK-LABEL: @test_mm512_broadcast_i32x2
+  // CHECK-LABEL: test_mm512_broadcast_i32x2
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   return _mm512_broadcast_i32x2(__A); 
 }
 
 __m512i test_mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A) {
-  // CHECK-LABEL: @test_mm512_mask_broadcast_i32x2
+  // CHECK-LABEL: test_mm512_mask_broadcast_i32x2
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_broadcast_i32x2(__O, __M, __A); 
 }
 
 __m512i test_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) {
-  // CHECK-LABEL: @test_mm512_maskz_broadcast_i32x2
+  // CHECK-LABEL: test_mm512_maskz_broadcast_i32x2
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_broadcast_i32x2(__M, __A); 
 }
 
 __m512i test_mm512_broadcast_i32x8(__m256i const* __A) {
-  // CHECK-LABEL: @test_mm512_broadcast_i32x8
+  // CHECK-LABEL: test_mm512_broadcast_i32x8
   // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm512_broadcast_i32x8(_mm256_loadu_si256(__A)); 
 }
 
 __m512i test_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i const* __A) {
-  // CHECK-LABEL: @test_mm512_mask_broadcast_i32x8
+  // CHECK-LABEL: test_mm512_mask_broadcast_i32x8
   // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_broadcast_i32x8(__O, __M, _mm256_loadu_si256(__A)); 
 }
 
 __m512i test_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i const* __A) {
-  // CHECK-LABEL: @test_mm512_maskz_broadcast_i32x8
+  // CHECK-LABEL: test_mm512_maskz_broadcast_i32x8
   // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_broadcast_i32x8(__M, _mm256_loadu_si256(__A)); 
 }
 
 __m512i test_mm512_broadcast_i64x2(__m128i const* __A) {
-  // CHECK-LABEL: @test_mm512_broadcast_i64x2
+  // CHECK-LABEL: test_mm512_broadcast_i64x2
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   return _mm512_broadcast_i64x2(_mm_loadu_si128(__A)); 
 }
 
 __m512i test_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i const* __A) {
-  // CHECK-LABEL: @test_mm512_mask_broadcast_i64x2
+  // CHECK-LABEL: test_mm512_mask_broadcast_i64x2
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_broadcast_i64x2(__O, __M, _mm_loadu_si128(__A)); 
 }
 
 __m512i test_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i const* __A) {
-  // CHECK-LABEL: @test_mm512_maskz_broadcast_i64x2
+  // CHECK-LABEL: test_mm512_maskz_broadcast_i64x2
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_maskz_broadcast_i64x2(__M, _mm_loadu_si128(__A)); 
 }
 
 __m256 test_mm512_extractf32x8_ps(__m512 __A) {
-  // CHECK-LABEL: @test_mm512_extractf32x8_ps
+  // CHECK-LABEL: test_mm512_extractf32x8_ps
   // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   return _mm512_extractf32x8_ps(__A, 1); 
 }
 
 __m256 test_mm512_mask_extractf32x8_ps(__m256 __W, __mmask8 __U, __m512 __A) {
-  // CHECK-LABEL: @test_mm512_mask_extractf32x8_ps
+  // CHECK-LABEL: test_mm512_mask_extractf32x8_ps
   // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_mask_extractf32x8_ps(__W, __U, __A, 1); 
 }
 
 __m256 test_mm512_maskz_extractf32x8_ps(__mmask8 __U, __m512 __A) {
-  // CHECK-LABEL: @test_mm512_maskz_extractf32x8_ps
+  // CHECK-LABEL: test_mm512_maskz_extractf32x8_ps
   // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_maskz_extractf32x8_ps(__U, __A, 1); 
 }
 
 __m128d test_mm512_extractf64x2_pd(__m512d __A) {
-  // CHECK-LABEL: @test_mm512_extractf64x2_pd
+  // CHECK-LABEL: test_mm512_extractf64x2_pd
   // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <2 x i32> <i32 6, i32 7>
   return _mm512_extractf64x2_pd(__A, 3); 
 }
 
 __m128d test_mm512_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_mask_extractf64x2_pd
+  // CHECK-LABEL: test_mm512_mask_extractf64x2_pd
   // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <2 x i32> <i32 6, i32 7>
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm512_mask_extractf64x2_pd(__W, __U, __A, 3); 
 }
 
 __m128d test_mm512_maskz_extractf64x2_pd(__mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_maskz_extractf64x2_pd
+  // CHECK-LABEL: test_mm512_maskz_extractf64x2_pd
   // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <2 x i32> <i32 6, i32 7>
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm512_maskz_extractf64x2_pd(__U, __A, 3); 
 }
 
 __m256i test_mm512_extracti32x8_epi32(__m512i __A) {
-  // CHECK-LABEL: @test_mm512_extracti32x8_epi32
+  // CHECK-LABEL: test_mm512_extracti32x8_epi32
   // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   return _mm512_extracti32x8_epi32(__A, 1); 
 }
 
 __m256i test_mm512_mask_extracti32x8_epi32(__m256i __W, __mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_mask_extracti32x8_epi32
+  // CHECK-LABEL: test_mm512_mask_extracti32x8_epi32
   // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm512_mask_extracti32x8_epi32(__W, __U, __A, 1); 
 }
 
 __m256i test_mm512_maskz_extracti32x8_epi32(__mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_maskz_extracti32x8_epi32
+  // CHECK-LABEL: test_mm512_maskz_extracti32x8_epi32
   // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm512_maskz_extracti32x8_epi32(__U, __A, 1); 
 }
 
 __m128i test_mm512_extracti64x2_epi64(__m512i __A) {
-  // CHECK-LABEL: @test_mm512_extracti64x2_epi64
+  // CHECK-LABEL: test_mm512_extracti64x2_epi64
   // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <2 x i32> <i32 6, i32 7>
   return _mm512_extracti64x2_epi64(__A, 3); 
 }
 
 __m128i test_mm512_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_mask_extracti64x2_epi64
+  // CHECK-LABEL: test_mm512_mask_extracti64x2_epi64
   // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <2 x i32> <i32 6, i32 7>
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm512_mask_extracti64x2_epi64(__W, __U, __A, 3); 
 }
 
 __m128i test_mm512_maskz_extracti64x2_epi64(__mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_maskz_extracti64x2_epi64
+  // CHECK-LABEL: test_mm512_maskz_extracti64x2_epi64
   // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> poison, <2 x i32> <i32 6, i32 7>
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm512_maskz_extracti64x2_epi64(__U, __A, 3); 
 }
 
 __m512 test_mm512_insertf32x8(__m512 __A, __m256 __B) {
-  // CHECK-LABEL: @test_mm512_insertf32x8
+  // CHECK-LABEL: test_mm512_insertf32x8
   // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   return _mm512_insertf32x8(__A, __B, 1); 
 }
 
 __m512 test_mm512_mask_insertf32x8(__m512 __W, __mmask16 __U, __m512 __A, __m256 __B) {
-  // CHECK-LABEL: @test_mm512_mask_insertf32x8
+  // CHECK-LABEL: test_mm512_mask_insertf32x8
   // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_insertf32x8(__W, __U, __A, __B, 1); 
 }
 
 __m512 test_mm512_maskz_insertf32x8(__mmask16 __U, __m512 __A, __m256 __B) {
-  // CHECK-LABEL: @test_mm512_maskz_insertf32x8
+  // CHECK-LABEL: test_mm512_maskz_insertf32x8
   // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_maskz_insertf32x8(__U, __A, __B, 1); 
 }
 
 __m512d test_mm512_insertf64x2(__m512d __A, __m128d __B) {
-  // CHECK-LABEL: @test_mm512_insertf64x2
+  // CHECK-LABEL: test_mm512_insertf64x2
   // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
   return _mm512_insertf64x2(__A, __B, 3); 
 }
 
 __m512d test_mm512_mask_insertf64x2(__m512d __W, __mmask8 __U, __m512d __A, __m128d __B) {
-  // CHECK-LABEL: @test_mm512_mask_insertf64x2
+  // CHECK-LABEL: test_mm512_mask_insertf64x2
   // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_insertf64x2(__W, __U, __A, __B, 3); 
 }
 
 __m512d test_mm512_maskz_insertf64x2(__mmask8 __U, __m512d __A, __m128d __B) {
-  // CHECK-LABEL: @test_mm512_maskz_insertf64x2
+  // CHECK-LABEL: test_mm512_maskz_insertf64x2
   // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
   // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_insertf64x2(__U, __A, __B, 3); 
 }
 
 __m512i test_mm512_inserti32x8(__m512i __A, __m256i __B) {
-  // CHECK-LABEL: @test_mm512_inserti32x8
+  // CHECK-LABEL: test_mm512_inserti32x8
   // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   return _mm512_inserti32x8(__A, __B, 1); 
 }
 
 __m512i test_mm512_mask_inserti32x8(__m512i __W, __mmask16 __U, __m512i __A, __m256i __B) {
-  // CHECK-LABEL: @test_mm512_mask_inserti32x8
+  // CHECK-LABEL: test_mm512_mask_inserti32x8
   // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_inserti32x8(__W, __U, __A, __B, 1); 
 }
 
 __m512i test_mm512_maskz_inserti32x8(__mmask16 __U, __m512i __A, __m256i __B) {
-  // CHECK-LABEL: @test_mm512_maskz_inserti32x8
+  // CHECK-LABEL: test_mm512_maskz_inserti32x8
   // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_inserti32x8(__U, __A, __B, 1); 
 }
 
 __m512i test_mm512_inserti64x2(__m512i __A, __m128i __B) {
-  // CHECK-LABEL: @test_mm512_inserti64x2
+  // CHECK-LABEL: test_mm512_inserti64x2
   // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
   return _mm512_inserti64x2(__A, __B, 1); 
 }
 
 __m512i test_mm512_mask_inserti64x2(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
-  // CHECK-LABEL: @test_mm512_mask_inserti64x2
+  // CHECK-LABEL: test_mm512_mask_inserti64x2
   // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_inserti64x2(__W, __U, __A, __B, 1); 
 }
 
 __m512i test_mm512_maskz_inserti64x2(__mmask8 __U, __m512i __A, __m128i __B) {
-  // CHECK-LABEL: @test_mm512_maskz_inserti64x2
+  // CHECK-LABEL: test_mm512_maskz_inserti64x2
   // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_maskz_inserti64x2(__U, __A, __B, 1); 
 }
 __mmask8 test_mm512_mask_fpclass_pd_mask(__mmask8 __U, __m512d __A) {
-  // CHECK-LABEL: @test_mm512_mask_fpclass_pd_mask
+  // CHECK-LABEL: test_mm512_mask_fpclass_pd_mask
   // CHECK: @llvm.x86.avx512.fpclass.pd.512
   return _mm512_mask_fpclass_pd_mask(__U, __A, 4); 
 }
 
 __mmask8 test_mm512_fpclass_pd_mask(__m512d __A) {
-  // CHECK-LABEL: @test_mm512_fpclass_pd_mask
+  // CHECK-LABEL: test_mm512_fpclass_pd_mask
   // CHECK: @llvm.x86.avx512.fpclass.pd.512
   return _mm512_fpclass_pd_mask(__A, 4); 
 }
 
 __mmask16 test_mm512_mask_fpclass_ps_mask(__mmask16 __U, __m512 __A) {
-  // CHECK-LABEL: @test_mm512_mask_fpclass_ps_mask
+  // CHECK-LABEL: test_mm512_mask_fpclass_ps_mask
   // CHECK: @llvm.x86.avx512.fpclass.ps.512
   return _mm512_mask_fpclass_ps_mask(__U, __A, 4); 
 }
 
 __mmask16 test_mm512_fpclass_ps_mask(__m512 __A) {
-  // CHECK-LABEL: @test_mm512_fpclass_ps_mask
+  // CHECK-LABEL: test_mm512_fpclass_ps_mask
   // CHECK: @llvm.x86.avx512.fpclass.ps.512
   return _mm512_fpclass_ps_mask(__A, 4); 
 }
 
 __mmask8 test_mm_fpclass_sd_mask(__m128d __A)  { 
-  // CHECK-LABEL: @test_mm_fpclass_sd_mask
+  // CHECK-LABEL: test_mm_fpclass_sd_mask
   // CHECK: @llvm.x86.avx512.mask.fpclass.sd
  return _mm_fpclass_sd_mask (__A, 2);
 }
 
 __mmask8 test_mm_mask_fpclass_sd_mask(__mmask8 __U, __m128d __A)  {
- // CHECK-LABEL: @test_mm_mask_fpclass_sd_mask
+ // CHECK-LABEL: test_mm_mask_fpclass_sd_mask
  // CHECK: @llvm.x86.avx512.mask.fpclass.sd
  return _mm_mask_fpclass_sd_mask (__U,  __A, 2);
 }
 
 __mmask8 test_mm_fpclass_ss_mask(__m128 __A)  { 
- // CHECK-LABEL: @test_mm_fpclass_ss_mask
+ // CHECK-LABEL: test_mm_fpclass_ss_mask
  // CHECK: @llvm.x86.avx512.mask.fpclass.ss
  return _mm_fpclass_ss_mask ( __A, 2);
 }
 
 __mmask8 test_mm_mask_fpclass_ss_mask(__mmask8 __U, __m128 __A)  {
- // CHECK-LABEL: @test_mm_mask_fpclass_ss_mask
+ // CHECK-LABEL: test_mm_mask_fpclass_ss_mask
  // CHECK: @llvm.x86.avx512.mask.fpclass.ss
  return _mm_mask_fpclass_ss_mask (__U, __A, 2);
 }
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 5447035..8c14c57 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -154,6 +154,7 @@ __m512 test_mm512_add_ps(__m512 a, __m512 b)
   // CHECK: fadd <16 x float>
   return _mm512_add_ps(a, b);
 }
+TEST_CONSTEXPR(match_m512(_mm512_add_ps((__m512){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m512){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}), -2.0f, -4.0f, -6.0f, -8.0f, -10.0f, -12.0f, -14.0f, -16.0f, +2.0f, +4.0f, +6.0f, +8.0f, +10.0f, +12.0f, +14.0f, +16.0f));
 
 __m512d test_mm512_add_pd(__m512d a, __m512d b)
 {
@@ -161,6 +162,7 @@ __m512d test_mm512_add_pd(__m512d a, __m512d b)
   // CHECK: fadd <8 x double>
   return _mm512_add_pd(a, b);
 }
+TEST_CONSTEXPR(match_m512d(_mm512_add_pd((__m512d){-1.0, -2.0, -3.0, -4.0, +1.0, +2.0, +3.0, +4.0}, (__m512d){-1.0, -2.0, -3.0, -4.0, +1.0, +2.0, +3.0, +4.0}), -2.0, -4.0, -6.0, -8.0, +2.0, +4.0, +6.0, +8.0));
 
 __m512 test_mm512_mul_ps(__m512 a, __m512 b)
 {
@@ -168,6 +170,7 @@ __m512 test_mm512_mul_ps(__m512 a, __m512 b)
   // CHECK: fmul <16 x float>
   return _mm512_mul_ps(a, b);
 }
+TEST_CONSTEXPR(match_m512(_mm512_mul_ps((__m512){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m512){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}), +1.0f, +4.0f, +9.0f, +16.0f, +25.0f, +36.0f, +49.0f, +64.0f, +1.0f, +4.0f, +9.0f, +16.0f, +25.0f, +36.0f, +49.0f, +64.0f));
 
 __m512d test_mm512_mul_pd(__m512d a, __m512d b)
 {
@@ -175,6 +178,7 @@ __m512d test_mm512_mul_pd(__m512d a, __m512d b)
   // CHECK: fmul <8 x double>
   return _mm512_mul_pd(a, b);
 }
+TEST_CONSTEXPR(match_m512d(_mm512_mul_pd((__m512d){-1.0, -2.0, -3.0, -4.0, +1.0, +2.0, +3.0, +4.0}, (__m512d){-1.0, -2.0, -3.0, -4.0, +1.0, +2.0, +3.0, +4.0}), +1.0, +4.0, +9.0, +16.0, +1.0, +4.0, +9.0, +16.0));
 
 void test_mm512_storeu_si512 (void *__P, __m512i __A)
 {
@@ -1261,6 +1265,7 @@ __m512d test_mm512_unpackhi_pd(__m512d a, __m512d b)
   // CHECK: shufflevector <8 x double> {{.*}} <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   return _mm512_unpackhi_pd(a, b);
 }
+TEST_CONSTEXPR(match_m512d(_mm512_unpackhi_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}), +2.0, +10.0, +4.0, +12.0, +6.0, +14.0, +8.0, +16.0));
 
 __m512d test_mm512_unpacklo_pd(__m512d a, __m512d b)
 {
@@ -1268,6 +1273,7 @@ __m512d test_mm512_unpacklo_pd(__m512d a, __m512d b)
   // CHECK: shufflevector <8 x double> {{.*}} <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   return _mm512_unpacklo_pd(a, b);
 }
+TEST_CONSTEXPR(match_m512d(_mm512_unpacklo_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}), +1.0, +9.0, +3.0, +11.0, +5.0, +13.0, +7.0, +15.0));
 
 __m512 test_mm512_unpackhi_ps(__m512 a, __m512 b)
 {
@@ -1275,6 +1281,7 @@ __m512 test_mm512_unpackhi_ps(__m512 a, __m512 b)
   // CHECK: shufflevector <16 x float> {{.*}} <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
   return _mm512_unpackhi_ps(a, b);
 }
+TEST_CONSTEXPR(match_m512(_mm512_unpackhi_ps((__m512){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}, (__m512){16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f}), +2.0f, +18.0f, +3.0f, +19.0f, +6.0f, +22.0f, +7.0f, +23.0f, +10.0f, +26.0f, +11.0f, +27.0f, +14.0f, +30.0f, +15.0f, +31.0f));
 
 __m512 test_mm512_unpacklo_ps(__m512 a, __m512 b)
 {
@@ -1282,6 +1289,7 @@ __m512 test_mm512_unpacklo_ps(__m512 a, __m512 b)
   // CHECK: shufflevector <16 x float> {{.*}} <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
   return _mm512_unpacklo_ps(a, b);
 }
+TEST_CONSTEXPR(match_m512(_mm512_unpacklo_ps((__m512){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}, (__m512){16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f}), +0.0f, +16.0f, +1.0f, +17.0f, +4.0f, +20.0f, +5.0f, +21.0f, +8.0f, +24.0f, +9.0f, +25.0f, +12.0f, +28.0f, +13.0f, +29.0f));
 
 __mmask16 test_mm512_cmp_round_ps_mask(__m512 a, __m512 b) {
   // CHECK-LABEL: test_mm512_cmp_round_ps_mask
@@ -3551,6 +3559,7 @@ __m512d test_mm512_div_pd(__m512d __a, __m512d __b) {
   // CHECK: fdiv <8 x double>
   return _mm512_div_pd(__a,__b); 
 }
+TEST_CONSTEXPR(match_m512d(_mm512_div_pd((__m512d){+8.0, +6.0, +4.0, +2.0, -8.0, -6.0, -4.0, -2.0}, (__m512d){+2.0, +2.0, +2.0, +2.0, -2.0, -2.0, -2.0, -2.0}), +4.0, +3.0, +2.0, +1.0, +4.0, +3.0, +2.0, +1.0));
 __m512d test_mm512_mask_div_pd(__m512d __w, __mmask8 __u, __m512d __a, __m512d __b) {
   // CHECK-LABEL: test_mm512_mask_div_pd
   // CHECK: fdiv <8 x double> %{{.*}}, %{{.*}}
@@ -3585,6 +3594,7 @@ __m512 test_mm512_div_ps(__m512 __A, __m512 __B) {
   // CHECK: fdiv <16 x float>
   return _mm512_div_ps(__A,__B); 
 }
+TEST_CONSTEXPR(match_m512(_mm512_div_ps((__m512){+16.0f, +14.0f, +12.0f, +10.0f, +8.0f, +6.0f, +4.0f, +2.0f, -16.0f, -14.0f, -12.0f, -10.0f, -8.0f, -6.0f, -4.0f, -2.0f}, (__m512){+2.0f, +2.0f, +2.0f, +2.0f, +2.0f, +2.0f, +2.0f, +2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f}), +8.0f, +7.0f, +6.0f, +5.0f, +4.0f, +3.0f, +2.0f, +1.0f, +8.0f, +7.0f, +6.0f, +5.0f, +4.0f, +3.0f, +2.0f, +1.0f));
 __m512 test_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK-LABEL: test_mm512_mask_div_ps
   // CHECK: fdiv <16 x float> %{{.*}}, %{{.*}}
diff --git a/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c b/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c
index ca8f5e4..8927ae2 100644
--- a/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c
@@ -1,45 +1,48 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
 
 __m512i test_mm512_popcnt_epi64(__m512i __A) {
-  // CHECK-LABEL: @test_mm512_popcnt_epi64
+  // CHECK-LABEL: test_mm512_popcnt_epi64
   // CHECK: @llvm.ctpop.v8i64
   return _mm512_popcnt_epi64(__A);
 }
-TEST_CONSTEXPR(match_v8di(_mm512_popcnt_epi64((__m512i)(__v8di){+5, -3, -10, +8, 0, -256, +256, -128}), 2, 31, 30, 1, 0, 24, 1, 25));
+TEST_CONSTEXPR(match_v8di(_mm512_popcnt_epi64((__m512i)(__v8di){+5, -3, -10, +8, 0, -256, +256, -128}), 2, 63, 62, 1, 0, 56, 1, 57));
 
 __m512i test_mm512_mask_popcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_mask_popcnt_epi64
+  // CHECK-LABEL: test_mm512_mask_popcnt_epi64
   // CHECK: @llvm.ctpop.v8i64
   // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_mask_popcnt_epi64(__W, __U, __A);
 }
 
 __m512i test_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_maskz_popcnt_epi64
+  // CHECK-LABEL: test_mm512_maskz_popcnt_epi64
   // CHECK: @llvm.ctpop.v8i64
   // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
   return _mm512_maskz_popcnt_epi64(__U, __A);
 }
 
 __m512i test_mm512_popcnt_epi32(__m512i __A) {
-  // CHECK-LABEL: @test_mm512_popcnt_epi32
+  // CHECK-LABEL: test_mm512_popcnt_epi32
   // CHECK: @llvm.ctpop.v16i32
   return _mm512_popcnt_epi32(__A);
 }
 TEST_CONSTEXPR(match_v16si(_mm512_popcnt_epi32((__m512i)(__v16si){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), 2, 31, 30, 1, 0, 24, 1, 25, 2, 2, 4, 2, 6, 2, 9, 2));
 
 __m512i test_mm512_mask_popcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_mask_popcnt_epi32
+  // CHECK-LABEL: test_mm512_mask_popcnt_epi32
   // CHECK: @llvm.ctpop.v16i32
   // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_popcnt_epi32(__W, __U, __A);
 }
 
 __m512i test_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
-  // CHECK-LABEL: @test_mm512_maskz_popcnt_epi32
+  // CHECK-LABEL: test_mm512_maskz_popcnt_epi32
   // CHECK: @llvm.ctpop.v16i32
   // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_maskz_popcnt_epi32(__U, __A);
diff --git a/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c b/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c
index 5d18b68..d9fbd76 100644
--- a/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c
@@ -1,87 +1,90 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vpopcntdq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vpopcntdq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 #include <immintrin.h>
 #include "builtin_test_helpers.h"
 
 __m128i test_mm_popcnt_epi64(__m128i __A) {
-  // CHECK-LABEL: @test_mm_popcnt_epi64
+  // CHECK-LABEL: test_mm_popcnt_epi64
   // CHECK: @llvm.ctpop.v2i64
   return _mm_popcnt_epi64(__A);
 }
 TEST_CONSTEXPR(match_v2di(_mm_popcnt_epi64((__m128i)(__v2di){+5, -3}), 2, 63));
 
 __m128i test_mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
-  // CHECK-LABEL: @test_mm_mask_popcnt_epi64
+  // CHECK-LABEL: test_mm_mask_popcnt_epi64
   // CHECK: @llvm.ctpop.v2i64
   // CHECK: select <2 x i1> %{{.+}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm_mask_popcnt_epi64(__W, __U, __A);
 }
 
 __m128i test_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
-  // CHECK-LABEL: @test_mm_maskz_popcnt_epi64
+  // CHECK-LABEL: test_mm_maskz_popcnt_epi64
   // CHECK: @llvm.ctpop.v2i64
   // CHECK: select <2 x i1> %{{.+}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm_maskz_popcnt_epi64(__U, __A);
 }
 
 __m128i test_mm_popcnt_epi32(__m128i __A) {
-  // CHECK-LABEL: @test_mm_popcnt_epi32
+  // CHECK-LABEL: test_mm_popcnt_epi32
   // CHECK: @llvm.ctpop.v4i32
   return _mm_popcnt_epi32(__A);
 }
 TEST_CONSTEXPR(match_v4si(_mm_popcnt_epi32((__m128i)(__v4si){+5, -3, -10, +8}), 2, 31, 30, 1));
 
 __m128i test_mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
-  // CHECK-LABEL: @test_mm_mask_popcnt_epi32
+  // CHECK-LABEL: test_mm_mask_popcnt_epi32
   // CHECK: @llvm.ctpop.v4i32
   // CHECK: select <4 x i1> %{{.+}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_mask_popcnt_epi32(__W, __U, __A);
 }
 
 __m128i test_mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) {
-  // CHECK-LABEL: @test_mm_maskz_popcnt_epi32
+  // CHECK-LABEL: test_mm_maskz_popcnt_epi32
   // CHECK: @llvm.ctpop.v4i32
   // CHECK: select <4 x i1> %{{.+}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_maskz_popcnt_epi32(__U, __A);
 }
 
 __m256i test_mm256_popcnt_epi64(__m256i __A) {
-  // CHECK-LABEL: @test_mm256_popcnt_epi64
+  // CHECK-LABEL: test_mm256_popcnt_epi64
   // CHECK: @llvm.ctpop.v4i64
   return _mm256_popcnt_epi64(__A);
 }
 TEST_CONSTEXPR(match_v4di(_mm256_popcnt_epi64((__m256i)(__v4di){+5, -3, -10, +8}), 2, 63, 62, 1));
 
 __m256i test_mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
-  // CHECK-LABEL: @test_mm256_mask_popcnt_epi64
+  // CHECK-LABEL: test_mm256_mask_popcnt_epi64
   // CHECK: @llvm.ctpop.v4i64
   // CHECK: select <4 x i1> %{{.+}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_mask_popcnt_epi64(__W, __U, __A);
 }
 
 __m256i test_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
-  // CHECK-LABEL: @test_mm256_maskz_popcnt_epi64
+  // CHECK-LABEL: test_mm256_maskz_popcnt_epi64
   // CHECK: @llvm.ctpop.v4i64
   // CHECK: select <4 x i1> %{{.+}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm256_maskz_popcnt_epi64(__U, __A);
 }
 
 __m256i test_mm256_popcnt_epi32(__m256i __A) {
-  // CHECK-LABEL: @test_mm256_popcnt_epi32
+  // CHECK-LABEL: test_mm256_popcnt_epi32
   // CHECK: @llvm.ctpop.v8i32
   return _mm256_popcnt_epi32(__A);
 }
 TEST_CONSTEXPR(match_v8si(_mm256_popcnt_epi32((__m256i)(__v8si){+5, -3, -10, +8, 0, -256, +256, -128}), 2, 31, 30, 1, 0, 24, 1, 25));
 
 __m256i test_mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
-  // CHECK-LABEL: @test_mm256_mask_popcnt_epi32
+  // CHECK-LABEL: test_mm256_mask_popcnt_epi32
   // CHECK: @llvm.ctpop.v8i32
   // CHECK: select <8 x i1> %{{.+}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_popcnt_epi32(__W, __U, __A);
 }
 
 __m256i test_mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) {
-  // CHECK-LABEL: @test_mm256_maskz_popcnt_epi32
+  // CHECK-LABEL: test_mm256_maskz_popcnt_epi32
   // CHECK: @llvm.ctpop.v8i32
   // CHECK: select <8 x i1> %{{.+}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_maskz_popcnt_epi32(__U, __A);
diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c
index f201dfe..375664b 100644
--- a/clang/test/CodeGen/builtins-wasm.c
+++ b/clang/test/CodeGen/builtins-wasm.c
@@ -751,24 +751,3 @@ void *tp (void) {
   return __builtin_thread_pointer ();
   // WEBASSEMBLY: call {{.*}} @llvm.thread.pointer.p0()
 }
-
-typedef void (*Fvoid)(void);
-typedef float (*Ffloats)(float, double, int);
-typedef void (*Fpointers)(Fvoid, Ffloats, void*, int*, int***, char[5]);
-
-void use(int);
-
-void test_function_pointer_signature_void(Fvoid func) {
-  // WEBASSEMBLY:  %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, token poison)
-  use(__builtin_wasm_test_function_pointer_signature(func));
-}
-
-void test_function_pointer_signature_floats(Ffloats func) {
-  // WEBASSEMBLY:  tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, float 0.000000e+00, token poison, float 0.000000e+00, double 0.000000e+00, i32 0)
-  use(__builtin_wasm_test_function_pointer_signature(func));
-}
-
-void test_function_pointer_signature_pointers(Fpointers func) {
-  // WEBASSEMBLY:  %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, token poison, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null)
-  use(__builtin_wasm_test_function_pointer_signature(func));
-}
diff --git a/clang/test/Headers/__cpuidex_conflict.c b/clang/test/Headers/__cpuidex_conflict.c
index 74f4532..d14ef29 100644
--- a/clang/test/Headers/__cpuidex_conflict.c
+++ b/clang/test/Headers/__cpuidex_conflict.c
@@ -5,6 +5,7 @@
 
 // Ensure that we do not run into conflicts when offloading.
 // RUN: %clang_cc1 %s -DIS_STATIC=static -ffreestanding -fopenmp -fopenmp-is-target-device -aux-triple x86_64-unknown-linux-gnu
+// RUN: %clang_cc1 -DIS_STATIC="" -triple nvptx64-nvidia-cuda -aux-triple x86_64-unknown-linux-gnu -aux-target-cpu x86-64 -fcuda-is-device -internal-isystem /home/gha/llvm-project/build/lib/clang/22/include -x cuda %s -o -
 
 typedef __SIZE_TYPE__ size_t;
 
diff --git a/clang/test/Sema/builtins-wasm.c b/clang/test/Sema/builtins-wasm.c
index a3486b1..9075e9e 100644
--- a/clang/test/Sema/builtins-wasm.c
+++ b/clang/test/Sema/builtins-wasm.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -triple wasm32 -target-feature +reference-types %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple wasm32 -target-abi experimental-mv -DMULTIVALUE -target-feature +reference-types %s
 
 #define EXPR_HAS_TYPE(expr, type) _Generic((expr), type : 1, default : 0)
 
@@ -57,8 +58,8 @@ void test_table_copy(int dst_idx, int src_idx, int nelem) {
 
 typedef void (*F1)(void);
 typedef int (*F2)(int);
-typedef int (*F3)(__externref_t);
-typedef __externref_t (*F4)(int);
+typedef void (*F3)(struct {int x; double y;});
+typedef struct {int x; double y;} (*F4)(void);
 
 void test_function_pointer_signature() {
   // Test argument count validation
@@ -68,8 +69,6 @@ void test_function_pointer_signature() {
   // // Test argument type validation - should require function pointer
   (void)__builtin_wasm_test_function_pointer_signature((void*)0); // expected-error {{used type 'void *' where function pointer is required}}
   (void)__builtin_wasm_test_function_pointer_signature((int)0);   // expected-error {{used type 'int' where function pointer is required}}
-  (void)__builtin_wasm_test_function_pointer_signature((F3)0);   // expected-error {{not supported for function pointers with a reference type parameter}}
-  (void)__builtin_wasm_test_function_pointer_signature((F4)0);   // expected-error {{not supported for function pointers with a reference type return value}}
 
   // // Test valid usage
   int res = __builtin_wasm_test_function_pointer_signature((F1)0);
@@ -77,4 +76,14 @@ void test_function_pointer_signature() {
 
   // Test return type
   _Static_assert(EXPR_HAS_TYPE(__builtin_wasm_test_function_pointer_signature((F1)0), int), "");
+
+#ifdef MULTIVALUE
+  // Test that struct arguments and returns are rejected with multivalue abi
+  (void)__builtin_wasm_test_function_pointer_signature((F3)0); // expected-error {{not supported with the multivalue ABI for function pointers with a struct/union as parameter}}
+  (void)__builtin_wasm_test_function_pointer_signature((F4)0); // expected-error {{not supported with the multivalue ABI for function pointers with a struct/union as return value}}
+#else
+  // with default abi they are fine
+  (void)__builtin_wasm_test_function_pointer_signature((F3)0);
+  (void)__builtin_wasm_test_function_pointer_signature((F4)0);
+#endif
 }