aboutsummaryrefslogtreecommitdiff
path: root/clang/test/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'clang/test/CodeGen')
-rw-r--r--clang/test/CodeGen/RISCV/bitint.c342
-rw-r--r--clang/test/CodeGen/X86/avx-builtins.c11
-rw-r--r--clang/test/CodeGen/X86/avx2-builtins.c26
-rw-r--r--clang/test/CodeGen/X86/avx512bw-builtins.c9
-rw-r--r--clang/test/CodeGen/X86/avx512cd-builtins.c4
-rw-r--r--clang/test/CodeGen/X86/avx512ifma-builtins.c199
-rw-r--r--clang/test/CodeGen/X86/avx512ifmavl-builtins.c182
-rw-r--r--clang/test/CodeGen/X86/avx512vlbw-builtins.c13
-rw-r--r--clang/test/CodeGen/X86/avx512vlcd-builtins.c4
-rw-r--r--clang/test/CodeGen/X86/avxifma-builtins.c182
-rw-r--r--clang/test/CodeGen/X86/mmx-builtins.c8
-rw-r--r--clang/test/CodeGen/X86/sse3-builtins.c4
-rw-r--r--clang/test/CodeGen/X86/ssse3-builtins.c9
-rw-r--r--clang/test/CodeGen/attr-target-mv.c5
-rw-r--r--clang/test/CodeGen/ext-int-cc.c8
-rw-r--r--clang/test/CodeGen/inline-asm-systemz-flag-output.c57
-rw-r--r--clang/test/CodeGen/target-builtin-noerror.c1
-rw-r--r--clang/test/CodeGen/unified-lto-module-flag.ll11
18 files changed, 1042 insertions, 33 deletions
diff --git a/clang/test/CodeGen/RISCV/bitint.c b/clang/test/CodeGen/RISCV/bitint.c
new file mode 100644
index 0000000..1ad43af
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/bitint.c
@@ -0,0 +1,342 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
+// RUN: %clang_cc1 -triple riscv64 -O2 -emit-llvm -o - %s | FileCheck %s --check-prefix=RISCV64
+// RUN: %clang_cc1 -triple riscv32 -O2 -emit-llvm -o - %s | FileCheck %s --check-prefix=RISCV32
+// RUN: %clang_cc1 -triple riscv32 -fforce-enable-int128 -O2 -emit-llvm -o - %s | FileCheck %s --check-prefix=RISCV32_INT128
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_17_add_unsigned
+// RISCV64-SAME: (i17 noundef zeroext [[A:%.*]], i17 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// RISCV64-NEXT: entry:
+// RISCV64-NEXT: [[ADD:%.*]] = add i17 [[B]], [[A]]
+// RISCV64-NEXT: ret i17 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_17_add_unsigned
+// RISCV32-SAME: (i17 noundef zeroext [[A:%.*]], i17 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// RISCV32-NEXT: entry:
+// RISCV32-NEXT: [[ADD:%.*]] = add i17 [[B]], [[A]]
+// RISCV32-NEXT: ret i17 [[ADD]]
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_17_add_unsigned
+// RISCV32_INT128-SAME: (i17 noundef zeroext [[A:%.*]], i17 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// RISCV32_INT128-NEXT: entry:
+// RISCV32_INT128-NEXT: [[ADD:%.*]] = add i17 [[B]], [[A]]
+// RISCV32_INT128-NEXT: ret i17 [[ADD]]
+//
+unsigned _BitInt(17) test_bitint_17_add_unsigned(unsigned _BitInt(17) a, unsigned _BitInt(17) b) {
+ return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_17_add_signed
+// RISCV64-SAME: (i17 noundef signext [[A:%.*]], i17 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT: entry:
+// RISCV64-NEXT: [[ADD:%.*]] = add nsw i17 [[B]], [[A]]
+// RISCV64-NEXT: ret i17 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_17_add_signed
+// RISCV32-SAME: (i17 noundef signext [[A:%.*]], i17 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32-NEXT: entry:
+// RISCV32-NEXT: [[ADD:%.*]] = add nsw i17 [[B]], [[A]]
+// RISCV32-NEXT: ret i17 [[ADD]]
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_17_add_signed
+// RISCV32_INT128-SAME: (i17 noundef signext [[A:%.*]], i17 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32_INT128-NEXT: entry:
+// RISCV32_INT128-NEXT: [[ADD:%.*]] = add nsw i17 [[B]], [[A]]
+// RISCV32_INT128-NEXT: ret i17 [[ADD]]
+//
+signed _BitInt(17) test_bitint_17_add_signed(signed _BitInt(17) a, signed _BitInt(17) b) {
+ return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_17_add_default
+// RISCV64-SAME: (i17 noundef signext [[A:%.*]], i17 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT: entry:
+// RISCV64-NEXT: [[ADD:%.*]] = add nsw i17 [[B]], [[A]]
+// RISCV64-NEXT: ret i17 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_17_add_default
+// RISCV32-SAME: (i17 noundef signext [[A:%.*]], i17 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32-NEXT: entry:
+// RISCV32-NEXT: [[ADD:%.*]] = add nsw i17 [[B]], [[A]]
+// RISCV32-NEXT: ret i17 [[ADD]]
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_17_add_default
+// RISCV32_INT128-SAME: (i17 noundef signext [[A:%.*]], i17 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32_INT128-NEXT: entry:
+// RISCV32_INT128-NEXT: [[ADD:%.*]] = add nsw i17 [[B]], [[A]]
+// RISCV32_INT128-NEXT: ret i17 [[ADD]]
+//
+_BitInt(17) test_bitint_17_add_default(_BitInt(17) a, _BitInt(17) b) {
+ return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_32_add_unsigned
+// RISCV64-SAME: (i32 noundef signext [[A:%.*]], i32 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT: entry:
+// RISCV64-NEXT: [[ADD:%.*]] = add i32 [[B]], [[A]]
+// RISCV64-NEXT: ret i32 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_32_add_unsigned
+// RISCV32-SAME: (i32 noundef zeroext [[A:%.*]], i32 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32-NEXT: entry:
+// RISCV32-NEXT: [[ADD:%.*]] = add i32 [[B]], [[A]]
+// RISCV32-NEXT: ret i32 [[ADD]]
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_32_add_unsigned
+// RISCV32_INT128-SAME: (i32 noundef zeroext [[A:%.*]], i32 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32_INT128-NEXT: entry:
+// RISCV32_INT128-NEXT: [[ADD:%.*]] = add i32 [[B]], [[A]]
+// RISCV32_INT128-NEXT: ret i32 [[ADD]]
+//
+unsigned _BitInt(32) test_bitint_32_add_unsigned(unsigned _BitInt(32) a, unsigned _BitInt(32) b) {
+ return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_32_add_signed
+// RISCV64-SAME: (i32 noundef signext [[A:%.*]], i32 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT: entry:
+// RISCV64-NEXT: [[ADD:%.*]] = add nsw i32 [[B]], [[A]]
+// RISCV64-NEXT: ret i32 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_32_add_signed
+// RISCV32-SAME: (i32 noundef signext [[A:%.*]], i32 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32-NEXT: entry:
+// RISCV32-NEXT: [[ADD:%.*]] = add nsw i32 [[B]], [[A]]
+// RISCV32-NEXT: ret i32 [[ADD]]
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_32_add_signed
+// RISCV32_INT128-SAME: (i32 noundef signext [[A:%.*]], i32 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32_INT128-NEXT: entry:
+// RISCV32_INT128-NEXT: [[ADD:%.*]] = add nsw i32 [[B]], [[A]]
+// RISCV32_INT128-NEXT: ret i32 [[ADD]]
+//
+signed _BitInt(32) test_bitint_32_add_signed(signed _BitInt(32) a, signed _BitInt(32) b) {
+ return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_32_add_default
+// RISCV64-SAME: (i32 noundef signext [[A:%.*]], i32 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT: entry:
+// RISCV64-NEXT: [[ADD:%.*]] = add nsw i32 [[B]], [[A]]
+// RISCV64-NEXT: ret i32 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_32_add_default
+// RISCV32-SAME: (i32 noundef signext [[A:%.*]], i32 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32-NEXT: entry:
+// RISCV32-NEXT: [[ADD:%.*]] = add nsw i32 [[B]], [[A]]
+// RISCV32-NEXT: ret i32 [[ADD]]
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_32_add_default
+// RISCV32_INT128-SAME: (i32 noundef signext [[A:%.*]], i32 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV32_INT128-NEXT: entry:
+// RISCV32_INT128-NEXT: [[ADD:%.*]] = add nsw i32 [[B]], [[A]]
+// RISCV32_INT128-NEXT: ret i32 [[ADD]]
+//
+_BitInt(32) test_bitint_32_add_default(_BitInt(32) a, _BitInt(32) b) {
+ return a + b;
+}
+
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_65_add_unsigned
+// RISCV64-SAME: (i65 noundef zeroext [[A:%.*]], i65 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT: entry:
+// RISCV64-NEXT: [[ADD:%.*]] = add i65 [[B]], [[A]]
+// RISCV64-NEXT: ret i65 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_65_add_unsigned
+// RISCV32-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// RISCV32-NEXT: entry:
+// RISCV32-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA6:![0-9]+]]
+// RISCV32-NEXT: [[A:%.*]] = trunc i128 [[TMP2]] to i65
+// RISCV32-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA6]]
+// RISCV32-NEXT: [[B:%.*]] = trunc i128 [[TMP3]] to i65
+// RISCV32-NEXT: [[ADD:%.*]] = add i65 [[B]], [[A]]
+// RISCV32-NEXT: [[STOREDV4:%.*]] = zext i65 [[ADD]] to i128
+// RISCV32-NEXT: store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA6]]
+// RISCV32-NEXT: ret void
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_65_add_unsigned
+// RISCV32_INT128-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// RISCV32_INT128-NEXT: entry:
+// RISCV32_INT128-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA6:![0-9]+]]
+// RISCV32_INT128-NEXT: [[A:%.*]] = trunc i128 [[TMP2]] to i65
+// RISCV32_INT128-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA6]]
+// RISCV32_INT128-NEXT: [[B:%.*]] = trunc i128 [[TMP3]] to i65
+// RISCV32_INT128-NEXT: [[ADD:%.*]] = add i65 [[B]], [[A]]
+// RISCV32_INT128-NEXT: [[STOREDV4:%.*]] = zext i65 [[ADD]] to i128
+// RISCV32_INT128-NEXT: store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA6]]
+// RISCV32_INT128-NEXT: ret void
+//
+unsigned _BitInt(65) test_bitint_65_add_unsigned(unsigned _BitInt(65) a, unsigned _BitInt(65) b) {
+ return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_65_add_signed
+// RISCV64-SAME: (i65 noundef signext [[A:%.*]], i65 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT: entry:
+// RISCV64-NEXT: [[ADD:%.*]] = add nsw i65 [[B]], [[A]]
+// RISCV64-NEXT: ret i65 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_65_add_signed
+// RISCV32-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32-NEXT: entry:
+// RISCV32-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA6]]
+// RISCV32-NEXT: [[A:%.*]] = trunc i128 [[TMP2]] to i65
+// RISCV32-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA6]]
+// RISCV32-NEXT: [[B:%.*]] = trunc i128 [[TMP3]] to i65
+// RISCV32-NEXT: [[ADD:%.*]] = add nsw i65 [[B]], [[A]]
+// RISCV32-NEXT: [[STOREDV4:%.*]] = sext i65 [[ADD]] to i128
+// RISCV32-NEXT: store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA6]]
+// RISCV32-NEXT: ret void
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_65_add_signed
+// RISCV32_INT128-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32_INT128-NEXT: entry:
+// RISCV32_INT128-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA6]]
+// RISCV32_INT128-NEXT: [[A:%.*]] = trunc i128 [[TMP2]] to i65
+// RISCV32_INT128-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA6]]
+// RISCV32_INT128-NEXT: [[B:%.*]] = trunc i128 [[TMP3]] to i65
+// RISCV32_INT128-NEXT: [[ADD:%.*]] = add nsw i65 [[B]], [[A]]
+// RISCV32_INT128-NEXT: [[STOREDV4:%.*]] = sext i65 [[ADD]] to i128
+// RISCV32_INT128-NEXT: store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA6]]
+// RISCV32_INT128-NEXT: ret void
+//
+signed _BitInt(65) test_bitint_65_add_signed(signed _BitInt(65) a, signed _BitInt(65) b) {
+ return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_65_add_default
+// RISCV64-SAME: (i65 noundef signext [[A:%.*]], i65 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT: entry:
+// RISCV64-NEXT: [[ADD:%.*]] = add nsw i65 [[B]], [[A]]
+// RISCV64-NEXT: ret i65 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_65_add_default
+// RISCV32-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32-NEXT: entry:
+// RISCV32-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA6]]
+// RISCV32-NEXT: [[A:%.*]] = trunc i128 [[TMP2]] to i65
+// RISCV32-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA6]]
+// RISCV32-NEXT: [[B:%.*]] = trunc i128 [[TMP3]] to i65
+// RISCV32-NEXT: [[ADD:%.*]] = add nsw i65 [[B]], [[A]]
+// RISCV32-NEXT: [[STOREDV4:%.*]] = sext i65 [[ADD]] to i128
+// RISCV32-NEXT: store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA6]]
+// RISCV32-NEXT: ret void
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_65_add_default
+// RISCV32_INT128-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32_INT128-NEXT: entry:
+// RISCV32_INT128-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA6]]
+// RISCV32_INT128-NEXT: [[A:%.*]] = trunc i128 [[TMP2]] to i65
+// RISCV32_INT128-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA6]]
+// RISCV32_INT128-NEXT: [[B:%.*]] = trunc i128 [[TMP3]] to i65
+// RISCV32_INT128-NEXT: [[ADD:%.*]] = add nsw i65 [[B]], [[A]]
+// RISCV32_INT128-NEXT: [[STOREDV4:%.*]] = sext i65 [[ADD]] to i128
+// RISCV32_INT128-NEXT: store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA6]]
+// RISCV32_INT128-NEXT: ret void
+//
+_BitInt(65) test_bitint_65_add_default(_BitInt(65) a, _BitInt(65) b) {
+ return a + b;
+}
+
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_77_add_unsigned
+// RISCV64-SAME: (i77 noundef zeroext [[A:%.*]], i77 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT: entry:
+// RISCV64-NEXT: [[ADD:%.*]] = add i77 [[B]], [[A]]
+// RISCV64-NEXT: ret i77 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_77_add_unsigned
+// RISCV32-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32-NEXT: entry:
+// RISCV32-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA10:![0-9]+]]
+// RISCV32-NEXT: [[A:%.*]] = trunc i128 [[TMP2]] to i77
+// RISCV32-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA10]]
+// RISCV32-NEXT: [[B:%.*]] = trunc i128 [[TMP3]] to i77
+// RISCV32-NEXT: [[ADD:%.*]] = add i77 [[B]], [[A]]
+// RISCV32-NEXT: [[STOREDV4:%.*]] = zext i77 [[ADD]] to i128
+// RISCV32-NEXT: store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA10]]
+// RISCV32-NEXT: ret void
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_77_add_unsigned
+// RISCV32_INT128-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32_INT128-NEXT: entry:
+// RISCV32_INT128-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA10:![0-9]+]]
+// RISCV32_INT128-NEXT: [[A:%.*]] = trunc i128 [[TMP2]] to i77
+// RISCV32_INT128-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA10]]
+// RISCV32_INT128-NEXT: [[B:%.*]] = trunc i128 [[TMP3]] to i77
+// RISCV32_INT128-NEXT: [[ADD:%.*]] = add i77 [[B]], [[A]]
+// RISCV32_INT128-NEXT: [[STOREDV4:%.*]] = zext i77 [[ADD]] to i128
+// RISCV32_INT128-NEXT: store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA10]]
+// RISCV32_INT128-NEXT: ret void
+//
+unsigned _BitInt(77) test_bitint_77_add_unsigned(unsigned _BitInt(77) a, unsigned _BitInt(77) b) {
+ return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_77_add_signed
+// RISCV64-SAME: (i77 noundef signext [[A:%.*]], i77 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT: entry:
+// RISCV64-NEXT: [[ADD:%.*]] = add nsw i77 [[B]], [[A]]
+// RISCV64-NEXT: ret i77 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_77_add_signed
+// RISCV32-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32-NEXT: entry:
+// RISCV32-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA10]]
+// RISCV32-NEXT: [[A:%.*]] = trunc i128 [[TMP2]] to i77
+// RISCV32-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA10]]
+// RISCV32-NEXT: [[B:%.*]] = trunc i128 [[TMP3]] to i77
+// RISCV32-NEXT: [[ADD:%.*]] = add nsw i77 [[B]], [[A]]
+// RISCV32-NEXT: [[STOREDV4:%.*]] = sext i77 [[ADD]] to i128
+// RISCV32-NEXT: store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA10]]
+// RISCV32-NEXT: ret void
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_77_add_signed
+// RISCV32_INT128-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32_INT128-NEXT: entry:
+// RISCV32_INT128-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA10]]
+// RISCV32_INT128-NEXT: [[A:%.*]] = trunc i128 [[TMP2]] to i77
+// RISCV32_INT128-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA10]]
+// RISCV32_INT128-NEXT: [[B:%.*]] = trunc i128 [[TMP3]] to i77
+// RISCV32_INT128-NEXT: [[ADD:%.*]] = add nsw i77 [[B]], [[A]]
+// RISCV32_INT128-NEXT: [[STOREDV4:%.*]] = sext i77 [[ADD]] to i128
+// RISCV32_INT128-NEXT: store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA10]]
+// RISCV32_INT128-NEXT: ret void
+//
+signed _BitInt(77) test_bitint_77_add_signed(signed _BitInt(77) a, signed _BitInt(77) b) {
+ return a + b;
+}
+
+// RISCV64-LABEL: define {{[^@]+}}@test_bitint_77_add_default
+// RISCV64-SAME: (i77 noundef signext [[A:%.*]], i77 noundef signext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// RISCV64-NEXT: entry:
+// RISCV64-NEXT: [[ADD:%.*]] = add nsw i77 [[B]], [[A]]
+// RISCV64-NEXT: ret i77 [[ADD]]
+//
+// RISCV32-LABEL: define {{[^@]+}}@test_bitint_77_add_default
+// RISCV32-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32-NEXT: entry:
+// RISCV32-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA10]]
+// RISCV32-NEXT: [[A:%.*]] = trunc i128 [[TMP2]] to i77
+// RISCV32-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA10]]
+// RISCV32-NEXT: [[B:%.*]] = trunc i128 [[TMP3]] to i77
+// RISCV32-NEXT: [[ADD:%.*]] = add nsw i77 [[B]], [[A]]
+// RISCV32-NEXT: [[STOREDV4:%.*]] = sext i77 [[ADD]] to i128
+// RISCV32-NEXT: store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA10]]
+// RISCV32-NEXT: ret void
+//
+// RISCV32_INT128-LABEL: define {{[^@]+}}@test_bitint_77_add_default
+// RISCV32_INT128-SAME: (ptr dead_on_unwind noalias writable writeonly sret(i128) align 8 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// RISCV32_INT128-NEXT: entry:
+// RISCV32_INT128-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 8, !tbaa [[TBAA10]]
+// RISCV32_INT128-NEXT: [[A:%.*]] = trunc i128 [[TMP2]] to i77
+// RISCV32_INT128-NEXT: [[TMP3:%.*]] = load i128, ptr [[TMP1]], align 8, !tbaa [[TBAA10]]
+// RISCV32_INT128-NEXT: [[B:%.*]] = trunc i128 [[TMP3]] to i77
+// RISCV32_INT128-NEXT: [[ADD:%.*]] = add nsw i77 [[B]], [[A]]
+// RISCV32_INT128-NEXT: [[STOREDV4:%.*]] = sext i77 [[ADD]] to i128
+// RISCV32_INT128-NEXT: store i128 [[STOREDV4]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA10]]
+// RISCV32_INT128-NEXT: ret void
+//
+_BitInt(77) test_bitint_77_add_default(_BitInt(77) a, _BitInt(77) b) {
+ return a + b;
+}
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 8f3d459..bcffd861 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1100,6 +1100,7 @@ __m256d test_mm256_hadd_pd(__m256d A, __m256d B) {
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_hadd_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_hadd_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+5.0, +6.0, +7.0, +8.0}), +3.0, +11.0, +7.0, +15.0));
__m256 test_mm256_hadd_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_hadd_ps
@@ -1107,17 +1108,27 @@ __m256 test_mm256_hadd_ps(__m256 A, __m256 B) {
return _mm256_hadd_ps(A, B);
}
+TEST_CONSTEXPR(match_m256(_mm256_hadd_ps(
+ (__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f},
+ (__m256){+9.0f, +10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f}),
+ +3.0f, +7.0f, +19.0f, +23.0f, +11.0f, +15.0f, +27.0f, +31.0f));
+
__m256d test_mm256_hsub_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_hsub_pd
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_hsub_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_hsub_pd((__m256d){+1.0, +2.0, +4.0, +3.0}, (__m256d){+10.0, +6.0, +16.0, +8.0}), -1.0,+4.0,+1.0,+8.0));
__m256 test_mm256_hsub_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_hsub_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_hsub_ps(A, B);
}
+TEST_CONSTEXPR(match_m256(_mm256_hsub_ps(
+ (__m256){1.0f, 2.0f, 4.0f, 3.0f, 5.0f, 7.0f, 7.0f, 5.0f},
+ (__m256){6.0f, 9.0f, 11.0f, 8.0f, 13.0f, 17.0f, 15.0f, 11.0f}),
+ -1.0f, 1.0f, -3.0f, 3.0f, -2.0f, 2.0f, -4.0f, 4.0f));
__m256i test_mm256_insert_epi8(__m256i x, char b) {
// CHECK-LABEL: test_mm256_insert_epi8
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 55f18f9..b798618 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -485,36 +485,60 @@ __m256i test_mm256_hadd_epi16(__m256i a, __m256i b) {
// CHECK: call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hadd_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_hadd_epi16(
+ (__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16},
+ (__m256i)(__v16hi){17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}),
+ 3,7,11,15,35,39,43,47,19,23,27,31,51,55,59,63));
__m256i test_mm256_hadd_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hadd_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_hadd_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_hadd_epi32(
+ (__m256i)(__v8si){10, 20, 30, 40, 50, 60, 70, 80},
+ (__m256i)(__v8si){5, 15, 25, 35, 45, 55, 65, 75}),
+ 30,70,20,60,110,150,100,140));
__m256i test_mm256_hadds_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hadds_epi16
// CHECK:call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hadds_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi( _mm256_hadds_epi16(
+ (__m256i)(__v16hi){32767, 32767, 1,2,3,4,5,6,7,8,9,10,11,12,13,14},
+ (__m256i)(__v16hi){19,20,21,22,23,24,25,26,27,28,29,30,31,32, 32767, 5}),
+ 32767, 3,7,11, 39,43,47,51,15,19,23,27, 55,59,63, 32767));
__m256i test_mm256_hsub_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hsub_epi16
// CHECK: call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hsub_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_hsub_epi16(
+ (__m256i)(__v16hi){2,1,1,2,5,3,3,5,7,4,4,7,9,5,5,9},
+ (__m256i)(__v16hi){10,5,5,10,12,6,6,12,21,14,14,21,24,16,16,24}),
+ 1,-1,2,-2,5,-5,6,-6,3,-3,4,-4, 7,-7,8,-8));
__m256i test_mm256_hsub_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hsub_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_hsub_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_hsub_epi32(
+ (__m256i)(__v8si){10, 20, 30,50,60,90,100,140},
+ (__m256i)(__v8si){200,150,260,200,420,350,800,720}),
+ -10,-20,50,60, -30,-40, 70,80));
__m256i test_mm256_hsubs_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hsubs_epi16
// CHECK:call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hsubs_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_hsubs_epi16(
+ (__m256i)(__v16hi){32726, -100, 3, 2, 6, 4, 8, 5,15,10 ,21, 14, 27, 18, 100, 90},
+ (__m256i)(__v16hi){40, 20, 100, 70, 200,150, 100,40, 1000,900,300,150, 500,300, 1, 1}),
+ 32767, 1, 2, 3, 20, 30, 50, 60, 5, 7, 9, 10, 100, 150, 200, 0));
__m128i test_mm_i32gather_epi32(int const *b, __m128i c) {
// CHECK-LABEL: test_mm_i32gather_epi32
@@ -1106,6 +1130,8 @@ __m256i test_mm256_shuffle_epi8(__m256i a, __m256i b) {
return _mm256_shuffle_epi8(a, b);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_shuffle_epi8((__m256i)(__v32qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}, (__m256i)(__v32qs){0,33,2,35,4,37,6,-39,8,41,10,43,12,45,14,-47,16,49,18,51,20,53,22,-55,24,57,26,59,28,61,30,-63}), 0,1,2,3,4,5,6,0,8,9,10,11,12,13,14,0,16,17,18,19,20,21,22,0,24,25,26,27,28,29,30,0));
+
__m256i test_mm256_shuffle_epi32(__m256i a) {
// CHECK-LABEL: test_mm256_shuffle_epi32
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <8 x i32> <i32 3, i32 3, i32 0, i32 0, i32 7, i32 7, i32 4, i32 4>
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index af1c904..fddf17d 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -1466,18 +1466,27 @@ __m512i test_mm512_shuffle_epi8(__m512i __A, __m512i __B) {
// CHECK: @llvm.x86.avx512.pshuf.b.512
return _mm512_shuffle_epi8(__A,__B);
}
+
+TEST_CONSTEXPR(match_v64qi(_mm512_shuffle_epi8((__m512i)(__v64qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}, (__m512i)(__v64qs){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,-15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,-15,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,-79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,-95}), 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,0,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,0,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,0));
+
__m512i test_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_shuffle_epi8
// CHECK: @llvm.x86.avx512.pshuf.b.512
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_shuffle_epi8(__W,__U,__A,__B);
}
+
+TEST_CONSTEXPR(match_v64qi(_mm512_mask_shuffle_epi8((__m512i)(__v64qi){1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8}, 0xFFFFFFFF00000000, (__m512i)(__v64qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}, (__m512i)(__v64qi){63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}), 1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48));
+
__m512i test_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_shuffle_epi8
// CHECK: @llvm.x86.avx512.pshuf.b.512
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_shuffle_epi8(__U,__A,__B);
}
+
+TEST_CONSTEXPR(match_v64qi(_mm512_maskz_shuffle_epi8(0x8888888888888888,(__m512i)(__v64qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}, (__m512i)(__v64qi){127,126,125,124,123,122,121,120,119,118,117,116,115,114,113,112,111,110,109,108,107,106,105,104,103,102,101,100,99,98,97,96,95,94,93,92,91,90,89,88,87,86,85,84,83,82,81,80,79,78,77,76,75,74,73,72,71,70,69,68,67,66,65,64}), 0,0,0,12,0,0,0,8,0,0,0,4,0,0,0,0,0,0,0,28,0,0,0,24,0,0,0,20,0,0,0,16,0,0,0,44,0,0,0,40,0,0,0,36,0,0,0,32,0,0,0,60,0,0,0,56,0,0,0,52,0,0,0,48));
+
__m512i test_mm512_subs_epi8(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_subs_epi8
// CHECK: @llvm.ssub.sat.v64i8
diff --git a/clang/test/CodeGen/X86/avx512cd-builtins.c b/clang/test/CodeGen/X86/avx512cd-builtins.c
index b9d42b7..2890889 100644
--- a/clang/test/CodeGen/X86/avx512cd-builtins.c
+++ b/clang/test/CodeGen/X86/avx512cd-builtins.c
@@ -125,6 +125,8 @@ __m512i test_mm512_broadcastmb_epi64(__m512i a, __m512i b) {
// CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 7
return _mm512_broadcastmb_epi64(_mm512_cmpeq_epu64_mask ( a, b));
}
+TEST_CONSTEXPR(match_v8di(_mm512_broadcastmb_epi64((__mmask8)(0)), 0,0,0,0, 0,0,0,0));
+TEST_CONSTEXPR(match_v8di(_mm512_broadcastmb_epi64((__mmask8)(0xab)), 0xab,0xab,0xab,0xab, 0xab,0xab,0xab,0xab));
__m512i test_mm512_broadcastmw_epi32(__m512i a, __m512i b) {
// CHECK-LABEL: test_mm512_broadcastmw_epi32
@@ -148,3 +150,5 @@ __m512i test_mm512_broadcastmw_epi32(__m512i a, __m512i b) {
// CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
return _mm512_broadcastmw_epi32(_mm512_cmpeq_epi32_mask ( a, b));
}
+TEST_CONSTEXPR(match_v16si(_mm512_broadcastmw_epi32((__mmask16)(0xff)), 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff));
+TEST_CONSTEXPR(match_v16si(_mm512_broadcastmw_epi32((__mmask16)(0x0FA1L)), 0x0FA1L,0x0FA1L,0x0FA1L,0x0FA1L, 0x0FA1L,0x0FA1L,0x0FA1L,0x0FA1L, 0x0FA1L,0x0FA1L,0x0FA1L,0x0FA1L, 0x0FA1L,0x0FA1L,0x0FA1L,0x0FA1L));
diff --git a/clang/test/CodeGen/X86/avx512ifma-builtins.c b/clang/test/CodeGen/X86/avx512ifma-builtins.c
index eebefb0..f90697e 100644
--- a/clang/test/CodeGen/X86/avx512ifma-builtins.c
+++ b/clang/test/CodeGen/X86/avx512ifma-builtins.c
@@ -8,45 +8,230 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512ifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512ifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
-
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m512i test_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
// CHECK-LABEL: test_mm512_madd52hi_epu64
// CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
- return _mm512_madd52hi_epu64(__X, __Y, __Z);
+ return _mm512_madd52hi_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v8di(_mm512_madd52hi_epu64(
+ (__m512i)(__v8du){100, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){10, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){5, 0, 0, 0, 0, 0, 0, 0}),
+ 100, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52hi_epu64(
+ (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0,
+ 0, 0, 0, 0},
+ (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0,
+ 0, 0, 0, 0}),
+ 0xFFFFFFFFFFFFEull, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52hi_epu64(
+ (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m512i)(__v8du){0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull},
+ (__m512i)(__v8du){0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull}),
+ 4503599627370495ull, 4503599627370496ull,
+ 4503599627370497ull, 4503599627370498ull,
+ 4503599627370499ull, 4503599627370500ull,
+ 4503599627370501ull, 4503599627370502ull));
+
__m512i test_mm512_mask_madd52hi_epu64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_mask_madd52hi_epu64
// CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
- return _mm512_mask_madd52hi_epu64(__W, __M, __X, __Y);
+ return _mm512_mask_madd52hi_epu64(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52hi_epu64(
+ (__m512i)(__v8du){111, 222, 333, 444, 555, 666,
+ 777, 888},
+ 0x00,
+ (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80}),
+ 111, 222, 333, 444, 555, 666, 777, 888));
+
+TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52hi_epu64(
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80},
+ 0xFF,
+ (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+ 700, 800},
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80}),
+ 10, 20, 30, 40, 50, 60, 70, 80));
+
__m512i test_mm512_maskz_madd52hi_epu64(__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) {
// CHECK-LABEL: test_mm512_maskz_madd52hi_epu64
// CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
- return _mm512_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
+ return _mm512_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52hi_epu64(
+ 0x00,
+ (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80},
+ (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+ 700, 800}),
+ 0, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52hi_epu64(
+ 0xFF,
+ (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80},
+ (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+ 700, 800}),
+ 1, 2, 3, 4, 5, 6, 7, 8));
+
__m512i test_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
// CHECK-LABEL: test_mm512_madd52lo_epu64
// CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
- return _mm512_madd52lo_epu64(__X, __Y, __Z);
+ return _mm512_madd52lo_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){10, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){5, 0, 0, 0, 0, 0, 0, 0}),
+ 50, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){100, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){20, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){30, 0, 0, 0, 0, 0, 0, 0}),
+ 700, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0,
+ 0, 0, 0, 0},
+ (__m512i)(__v8du){1, 0, 0, 0, 0, 0, 0, 0}),
+ 0xFFFFFFFFFFFFFull, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){0x1F000000000000ull, 0, 0, 0,
+ 0, 0, 0, 0},
+ (__m512i)(__v8du){2, 0, 0, 0, 0, 0, 0, 0}),
+ 0xE000000000000ull, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80},
+ (__m512i)(__v8du){2, 3, 4, 5, 6, 7, 8, 9}),
+ 21, 62, 123, 204, 305, 426, 567, 728));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0,
+ 0, 0, 0, 0},
+ (__m512i)(__v8du){10, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){5, 0, 0, 0, 0, 0, 0, 0}),
+ 4503599627370545ull, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80},
+ (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+ 700, 800},
+ (__m512i)(__v8du){2, 3, 4, 5, 6, 7, 8, 9}),
+ 210, 620, 1230, 2040, 3050, 4260, 5670, 7280));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){0x1F000000000000ull,
+ 0x1F000000000000ull, 0, 0, 0,
+ 0, 0, 0},
+ (__m512i)(__v8du){2, 3, 0, 0, 0, 0, 0, 0}),
+ 0xE000000000000ull, 0xD000000000000ull, 0, 0, 0, 0,
+ 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull},
+ (__m512i)(__v8du){1, 1, 1, 1, 1, 1, 1, 1}),
+ 0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull));
+
__m512i test_mm512_mask_madd52lo_epu64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_mask_madd52lo_epu64
// CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
- return _mm512_mask_madd52lo_epu64(__W, __M, __X, __Y);
+ return _mm512_mask_madd52lo_epu64(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52lo_epu64(
+ (__m512i)(__v8du){111, 222, 333, 444, 555, 666,
+ 777, 888},
+ 0x00,
+ (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80}),
+ 111, 222, 333, 444, 555, 666, 777, 888));
+
+TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52lo_epu64(
+ (__m512i)(__v8du){1000, 2000, 3000, 4000, 5000,
+ 6000, 7000, 8000},
+ 0xFF,
+ (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+ 700, 800},
+ (__m512i)(__v8du){20, 30, 40, 50, 60, 70, 80,
+ 90}),
+ 3000, 8000, 15000, 24000, 35000, 48000, 63000,
+ 80000));
+
__m512i test_mm512_maskz_madd52lo_epu64(__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) {
// CHECK-LABEL: test_mm512_maskz_madd52lo_epu64
// CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
- return _mm512_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
+ return _mm512_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
}
+
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52lo_epu64(
+ 0x00,
+ (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80},
+ (__m512i)(__v8du){2, 3, 4, 5, 6, 7, 8, 9}),
+ 0, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52lo_epu64(
+ 0xFF,
+ (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+ 700, 800},
+ (__m512i)(__v8du){20, 30, 40, 50, 60, 70, 80,
+ 90},
+ (__m512i)(__v8du){30, 40, 50, 60, 70, 80, 90,
+ 100}),
+ 700, 1400, 2300, 3400, 4700, 6200, 7900, 9800));
diff --git a/clang/test/CodeGen/X86/avx512ifmavl-builtins.c b/clang/test/CodeGen/X86/avx512ifmavl-builtins.c
index 89108fc..1cbb580 100644
--- a/clang/test/CodeGen/X86/avx512ifmavl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512ifmavl-builtins.c
@@ -8,85 +8,241 @@
// RUN: %clang_cc1 -x c++ %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
// RUN: %clang_cc1 -x c++ %s -flax-vector-conversions=none -ffreestanding -triple=i386-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
-
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
// CHECK-LABEL: test_mm_madd52hi_epu64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
- return _mm_madd52hi_epu64(__X, __Y, __Z);
+ return _mm_madd52hi_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64(
+ (__m128i)((__v2du){100, 0}),
+ (__m128i)((__v2du){10, 0}),
+ (__m128i)((__v2du){5, 0})),
+ 100, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64(
+ (__m128i)((__v2du){0, 0}),
+ (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+ (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0})),
+ 0xFFFFFFFFFFFFEull, 0));
+
__m128i test_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
// CHECK-LABEL: test_mm_mask_madd52hi_epu64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
- return _mm_mask_madd52hi_epu64(__W, __M, __X, __Y);
+ return _mm_mask_madd52hi_epu64(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v2di(_mm_mask_madd52hi_epu64((__m128i)((__v2du){111, 222}),
+ 0x0,
+ (__m128i)((__v2du){1, 2}),
+ (__m128i)((__v2du){10, 20})),
+ 111, 222));
+
+TEST_CONSTEXPR(match_v2di(_mm_mask_madd52hi_epu64((__m128i)((__v2du){10, 20}),
+ 0x2,
+ (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL}),
+ (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL})),
+ 10, 0x100000000014ULL));
+
__m128i test_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
// CHECK-LABEL: test_mm_maskz_madd52hi_epu64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
- return _mm_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
+ return _mm_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52hi_epu64(0x3,
+ (__m128i)((__v2du){1, 2}),
+ (__m128i)((__v2du){10, 20}),
+ (__m128i)((__v2du){100, 200})),
+ 1, 2));
+
+TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52hi_epu64(0x1,
+ (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL}),
+ (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL}),
+ (__m128i)((__v2du){0, 0})),
+ 0x1000000000000ULL, 0));
+
__m256i test_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
// CHECK-LABEL: test_mm256_madd52hi_epu64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
- return _mm256_madd52hi_epu64(__X, __Y, __Z);
+ return _mm256_madd52hi_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64(
+ (__m256i)((__v4du){100, 200, 300, 400}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){5, 6, 7, 8})),
+ 100, 200, 300, 400));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64(
+ (__m256i)((__v4du){0, 0, 0, 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+ 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+ 0})),
+ 0xFFFFFFFFFFFFEull, 0, 0, 0));
+
__m256i test_mm256_mask_madd52hi_epu64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
// CHECK-LABEL: test_mm256_mask_madd52hi_epu64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
- return _mm256_mask_madd52hi_epu64(__W, __M, __X, __Y);
+ return _mm256_mask_madd52hi_epu64(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52hi_epu64((__m256i)((__v4du){111, 222, 333, 444}),
+ 0x0,
+ (__m256i)((__v4du){1, 2, 3, 4}),
+ (__m256i)((__v4du){10, 20, 30, 40})),
+ 111, 222, 333, 444));
+
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52hi_epu64((__m256i)((__v4du){10, 20, 30, 40}),
+ 0xA,
+ (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL,
+ 0x1000000000000ULL, 0x1000000000000ULL}),
+ (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL,
+ 0x1000000000000ULL, 0x1000000000000ULL})),
+ 10, 0x100000000014ULL, 30, 0x100000000028ULL));
+
__m256i test_mm256_maskz_madd52hi_epu64(__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
// CHECK-LABEL: test_mm256_maskz_madd52hi_epu64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
- return _mm256_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
+ return _mm256_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52hi_epu64(0xF,
+ (__m256i)((__v4du){1, 2, 3, 4}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){100, 200, 300, 400})),
+ 1, 2, 3, 4));
+
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52hi_epu64(0x5,
+ (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL,
+ 0x1000000000000ULL, 0x1000000000000ULL}),
+ (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL,
+ 0x1000000000000ULL, 0x1000000000000ULL}),
+ (__m256i)((__v4du){0, 0, 0, 0})),
+ 0x1000000000000ULL, 0, 0x1000000000000ULL, 0));
+
__m128i test_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
// CHECK-LABEL: test_mm_madd52lo_epu64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
- return _mm_madd52lo_epu64(__X, __Y, __Z);
+ return _mm_madd52lo_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+ (__m128i)((__v2du){0, 0}),
+ (__m128i)((__v2du){10, 0}),
+ (__m128i)((__v2du){5, 0})),
+ 50, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+ (__m128i)((__v2du){100, 0}),
+ (__m128i)((__v2du){20, 0}),
+ (__m128i)((__v2du){30, 0})),
+ 700, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+ (__m128i)((__v2du){1, 2}),
+ (__m128i)((__v2du){10, 20}),
+ (__m128i)((__v2du){2, 3})),
+ 21, 62));
+
__m128i test_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
// CHECK-LABEL: test_mm_mask_madd52lo_epu64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
- return _mm_mask_madd52lo_epu64(__W, __M, __X, __Y);
+ return _mm_mask_madd52lo_epu64(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v2di(_mm_mask_madd52lo_epu64((__m128i)((__v2du){1000, 2000}),
+ 0x3,
+ (__m128i)((__v2du){100, 200}),
+ (__m128i)((__v2du){20, 30})),
+ 3000, 8000));
+
+TEST_CONSTEXPR(match_v2di(_mm_mask_madd52lo_epu64((__m128i)((__v2du){111, 222}),
+ 0x0,
+ (__m128i)((__v2du){1, 2}),
+ (__m128i)((__v2du){10, 20})),
+ 111, 222));
+
__m128i test_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
// CHECK-LABEL: test_mm_maskz_madd52lo_epu64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
- return _mm_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
+ return _mm_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52lo_epu64(0x3,
+ (__m128i)((__v2du){100, 200}),
+ (__m128i)((__v2du){20, 30}),
+ (__m128i)((__v2du){30, 40})),
+ 700, 1400));
+
+TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52lo_epu64(0x1,
+ (__m128i)((__v2du){100, 0}),
+ (__m128i)((__v2du){20, 0}),
+ (__m128i)((__v2du){30, 0})),
+ 700, 0));
+
__m256i test_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
// CHECK-LABEL: test_mm256_madd52lo_epu64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
- return _mm256_madd52lo_epu64(__X, __Y, __Z);
+ return _mm256_madd52lo_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64(
+ (__m256i)((__v4du){1, 2, 3, 4}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){2, 3, 4, 5})),
+ 21, 62, 123, 204));
+
__m256i test_mm256_mask_madd52lo_epu64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
// CHECK-LABEL: test_mm256_mask_madd52lo_epu64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
- return _mm256_mask_madd52lo_epu64(__W, __M, __X, __Y);
+ return _mm256_mask_madd52lo_epu64(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52lo_epu64((__m256i)((__v4du){1000, 2000, 3000, 4000}),
+ 0xF,
+ (__m256i)((__v4du){100, 200, 300, 400}),
+ (__m256i)((__v4du){20, 30, 40, 50})),
+ 3000, 8000, 15000, 24000));
+
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52lo_epu64((__m256i)((__v4du){111, 222, 333, 444}),
+ 0x0,
+ (__m256i)((__v4du){1, 2, 3, 4}),
+ (__m256i)((__v4du){10, 20, 30, 40})),
+ 111, 222, 333, 444));
+
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52lo_epu64((__m256i)((__v4du){11, 22, 33, 44}),
+ 0x5,
+ (__m256i)((__v4du){100, 200, 300, 400}),
+ (__m256i)((__v4du){10, 20, 30, 40})),
+ 1011, 22, 9033, 44));
+
__m256i test_mm256_maskz_madd52lo_epu64(__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
// CHECK-LABEL: test_mm256_maskz_madd52lo_epu64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
- return _mm256_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
+ return _mm256_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
}
+
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52lo_epu64(0xF,
+ (__m256i)((__v4du){100, 200, 300, 400}),
+ (__m256i)((__v4du){20, 30, 40, 50}),
+ (__m256i)((__v4du){30, 40, 50, 60})),
+ 700, 1400, 2300, 3400));
+
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52lo_epu64(0x9,
+ (__m256i)((__v4du){100, 200, 300, 400}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){5, 10, 15, 20})),
+ 150, 0, 0, 1200));
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index c0e46de..d569283 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -1688,24 +1688,37 @@ __m128i test_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m12
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_shuffle_epi8(__W,__U,__A,__B);
}
+
+TEST_CONSTEXPR(match_v16qi(_mm_mask_shuffle_epi8((__m128i)(__v16qi){1,1,1,1,1,1,1,1,2,2,4,4,6,6,8,8}, 0x00FF, (__m128i)(__v16qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}, (__m128i)(__v16qi){15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}), 15,14,13,12,11,10,9,8,2,2,4,4,6,6,8,8));
+
__m128i test_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_shuffle_epi8
// CHECK: @llvm.x86.ssse3.pshuf.b
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_shuffle_epi8(__U,__A,__B);
}
+
+TEST_CONSTEXPR(match_v16qi(_mm_maskz_shuffle_epi8(0xAAAA, (__m128i)(__v16qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}, (__m128i)(__v16qi){15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}), 0,14,0,12,0,10,0,8,0,6,0,4,0,2,0,0));
+
__m256i test_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_shuffle_epi8
// CHECK: @llvm.x86.avx2.pshuf.b
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_shuffle_epi8(__W,__U,__A,__B);
}
+
+TEST_CONSTEXPR(match_v32qi(_mm256_mask_shuffle_epi8((__m256i)(__v32qi){1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4}, 0x80808080, (__m256i)(__v32qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}, (__m256i)(__v32qi){31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}), 1,1,1,1,1,1,1,8,2,2,2,2,2,2,2,0,3,3,3,3,3,3,3,24,4,4,4,4,4,4,4,16));
+
+
__m256i test_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_shuffle_epi8
// CHECK: @llvm.x86.avx2.pshuf.b
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_shuffle_epi8(__U,__A,__B);
}
+
+TEST_CONSTEXPR(match_v32qi(_mm256_maskz_shuffle_epi8(0x0000FFFF, (__m256i)(__v32qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}, (__m256i)(__v32qi){31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}), 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0));
+
__m128i test_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_subs_epi8
// CHECK: @llvm.ssub.sat.v16i8
diff --git a/clang/test/CodeGen/X86/avx512vlcd-builtins.c b/clang/test/CodeGen/X86/avx512vlcd-builtins.c
index 1619305..56c04a0 100644
--- a/clang/test/CodeGen/X86/avx512vlcd-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlcd-builtins.c
@@ -20,6 +20,7 @@ __m128i test_mm_broadcastmb_epi64(__m128i a,__m128i b) {
// CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
return _mm_broadcastmb_epi64(_mm_cmpeq_epi32_mask (a, b));
}
+TEST_CONSTEXPR(match_v2du(_mm_broadcastmb_epi64((__mmask8)(76)), 76, 76));
__m256i test_mm256_broadcastmb_epi64(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_broadcastmb_epi64
@@ -32,6 +33,7 @@ __m256i test_mm256_broadcastmb_epi64(__m256i a, __m256i b) {
// CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 3
return _mm256_broadcastmb_epi64(_mm256_cmpeq_epi64_mask ( a, b));
}
+TEST_CONSTEXPR(match_v4di(_mm256_broadcastmb_epi64((__mmask8)(67)), 67, 67, 67, 67));
__m128i test_mm_broadcastmw_epi32(__m512i a, __m512i b) {
// CHECK-LABEL: test_mm_broadcastmw_epi32
@@ -43,6 +45,7 @@ __m128i test_mm_broadcastmw_epi32(__m512i a, __m512i b) {
// CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
return _mm_broadcastmw_epi32(_mm512_cmpeq_epi32_mask ( a, b));
}
+TEST_CONSTEXPR(match_v4su(_mm_broadcastmw_epi32((__mmask16)(0xbabe)), 0xbabe, 0xbabe, 0xbabe, 0xbabe));
__m256i test_mm256_broadcastmw_epi32(__m512i a, __m512i b) {
// CHECK-LABEL: test_mm256_broadcastmw_epi32
@@ -58,6 +61,7 @@ __m256i test_mm256_broadcastmw_epi32(__m512i a, __m512i b) {
// CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 7
return _mm256_broadcastmw_epi32(_mm512_cmpeq_epi32_mask ( a, b));
}
+TEST_CONSTEXPR(match_v8si(_mm256_broadcastmw_epi32((__mmask16)(0xcafe)), 0xcafe,0xcafe,0xcafe,0xcafe, 0xcafe,0xcafe,0xcafe,0xcafe));
__m128i test_mm_conflict_epi64(__m128i __A) {
// CHECK-LABEL: test_mm_conflict_epi64
diff --git a/clang/test/CodeGen/X86/avxifma-builtins.c b/clang/test/CodeGen/X86/avxifma-builtins.c
index aa15159..70531da 100644
--- a/clang/test/CodeGen/X86/avxifma-builtins.c
+++ b/clang/test/CodeGen/X86/avxifma-builtins.c
@@ -8,8 +8,9 @@
// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
-
#include <immintrin.h>
+#include "builtin_test_helpers.h"
+
__m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
// CHECK-LABEL: test_mm_madd52hi_epu64
@@ -17,44 +18,207 @@ __m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
return _mm_madd52hi_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64(
+ (__m128i)((__v2du){50, 100}),
+ (__m128i)((__v2du){10, 20}),
+ (__m128i)((__v2du){5, 6})),
+ 50, 100));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64(
+ (__m128i)((__v2du){0, 0}),
+ (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+ (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0})),
+ 0xFFFFFFFFFFFFEull, 0));
+
__m256i test_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
// CHECK-LABEL: test_mm256_madd52hi_epu64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_madd52hi_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64(
+ (__m256i)((__v4du){100, 200, 300, 400}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){5, 6, 7, 8})),
+ 100, 200, 300, 400));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64(
+ (__m256i)((__v4du){0, 0, 0, 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull, 0, 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull, 0, 0})),
+ 0xFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFEull, 0, 0));
+
__m128i test_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
// CHECK-LABEL: test_mm_madd52lo_epu64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_madd52lo_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+ (__m128i)((__v2du){0, 0}),
+ (__m128i)((__v2du){10, 0}),
+ (__m128i)((__v2du){5, 0})),
+ 50, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+ (__m128i)((__v2du){1, 2}),
+ (__m128i)((__v2du){10, 20}),
+ (__m128i)((__v2du){2, 3})),
+ 21, 62));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+ (__m128i)((__v2du){0, 0}),
+ (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+ (__m128i)((__v2du){1, 0})),
+ 0xFFFFFFFFFFFFFull, 0));
+
__m256i test_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
// CHECK-LABEL: test_mm256_madd52lo_epu64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_madd52lo_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64(
+ (__m256i)((__v4du){1, 2, 3, 4}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){2, 3, 4, 5})),
+ 21, 62, 123, 204));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64(
+ (__m256i)((__v4du){0, 0, 0, 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+ 0}),
+ (__m256i)((__v4du){1, 0, 0, 0})),
+ 0xFFFFFFFFFFFFFull, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64(
+ (__m256i)((__v4du){0, 0, 0, 0}),
+ (__m256i)((__v4du){0x1F000000000000ull, 0, 0,
+ 0}),
+ (__m256i)((__v4du){2, 0, 0, 0})),
+ 0xE000000000000ull, 0, 0, 0));
+
__m128i test_mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
-// CHECK-LABEL: test_mm_madd52hi_avx_epu64
-// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK-LABEL: test_mm_madd52hi_avx_epu64
+ // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_madd52hi_avx_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_avx_epu64(
+ (__m128i)((__v2du){50, 100}),
+ (__m128i)((__v2du){10, 20}),
+ (__m128i)((__v2du){5, 6})),
+ 50, 100));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_avx_epu64(
+ (__m128i)((__v2du){100, 0}),
+ (__m128i)((__v2du){10, 0}),
+ (__m128i)((__v2du){5, 0})),
+ 100, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_avx_epu64(
+ (__m128i)((__v2du){0, 0}),
+ (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+ (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0})),
+ 0xFFFFFFFFFFFFEull, 0));
+
__m256i test_mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
-// CHECK-LABEL: test_mm256_madd52hi_avx_epu64
-// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
+ // CHECK-LABEL: test_mm256_madd52hi_avx_epu64
+ // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_madd52hi_avx_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_avx_epu64(
+ (__m256i)((__v4du){0, 0, 0, 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull, 0, 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull, 0, 0})),
+ 0xFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFEull, 0, 0));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_avx_epu64(
+ (__m256i)((__v4du){100, 200, 300, 400}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){5, 6, 7, 8})),
+ 100, 200, 300, 400));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_avx_epu64(
+ (__m256i)((__v4du){0, 0, 0, 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+ 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+ 0})),
+ 0xFFFFFFFFFFFFEull, 0, 0, 0));
+
__m128i test_mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
-// CHECK-LABEL: test_mm_madd52lo_avx_epu64
-// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK-LABEL: test_mm_madd52lo_avx_epu64
+ // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_madd52lo_avx_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+ (__m128i)((__v2du){0, 0}),
+ (__m128i)((__v2du){10, 0}),
+ (__m128i)((__v2du){5, 0})),
+ 50, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+ (__m128i)((__v2du){100, 0}),
+ (__m128i)((__v2du){20, 0}),
+ (__m128i)((__v2du){30, 0})),
+ 700, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+ (__m128i)((__v2du){1, 2}),
+ (__m128i)((__v2du){10, 20}),
+ (__m128i)((__v2du){2, 3})),
+ 21, 62));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+ (__m128i)((__v2du){0, 0}),
+ (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+ (__m128i)((__v2du){1, 0})),
+ 0xFFFFFFFFFFFFFull, 0));
+
__m256i test_mm256_madd52lo_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
-// CHECK-LABEL: test_mm256_madd52lo_avx_epu64
-// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
+ // CHECK-LABEL: test_mm256_madd52lo_avx_epu64
+ // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_madd52lo_avx_epu64(__X, __Y, __Z);
}
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64(
+ (__m256i)((__v4du){1, 2, 3, 4}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){2, 3, 4, 5})),
+ 21, 62, 123, 204));
+
+
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64(
+ (__m256i)((__v4du){0, 0, 0, 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+ 0}),
+ (__m256i)((__v4du){1, 0, 0, 0})),
+ 0xFFFFFFFFFFFFFull, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64(
+ (__m256i)((__v4du){0, 0, 0, 0}),
+ (__m256i)((__v4du){0x1F000000000000ull, 0, 0,
+ 0}),
+ (__m256i)((__v4du){2, 0, 0, 0})),
+ 0xE000000000000ull, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+ (__m128i)((__v2du){5, 10}),
+ (__m128i)((__v2du){100, 200}),
+ (__m128i)((__v2du){7, 8})),
+ 705, 1610));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64(
+ (__m256i)((__v4du){1, 2, 3, 4}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){2, 3, 4, 5})),
+ 21, 62, 123, 204));
+
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 2b45b92..d9041d4 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -312,36 +312,42 @@ __m64 test_mm_hadd_pi16(__m64 a, __m64 b) {
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128(
return _mm_hadd_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_hadd_pi16((__m64)(__v4hi){1,2,3,4},(__m64)(__v4hi){5,6,7,8}),3,7,11,15));
__m64 test_mm_hadd_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hadd_pi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128(
return _mm_hadd_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_hadd_pi32((__m64)(__v2si){1,2},(__m64)(__v2si){3,4}),3,7));
__m64 test_mm_hadds_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hadds_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(
return _mm_hadds_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_hadds_pi16((__m64)(__v4hi){32767, 32767, 1,3},(__m64)(__v4hi){-1,3, 40, 60}),32767, 4, 2,100));
__m64 test_mm_hsub_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsub_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128(
return _mm_hsub_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_hsub_pi16((__m64)(__v4hi){1,2,4,3},(__m64)(__v4hi){10,5,0,-10}),-1,1,5,10));
__m64 test_mm_hsub_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsub_pi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128(
return _mm_hsub_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_hsub_pi32((__m64)(__v2si){1,2},(__m64)(__v2si){4,3}),-1,1));
__m64 test_mm_hsubs_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsubs_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(
return _mm_hsubs_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_hsubs_pi16((__m64)(__v4hi){32767, 32767, 5, -32767},(__m64)(__v4hi){4,5,10,5}),0,32767,-1,5));
__m64 test_mm_insert_pi16(__m64 a, int d) {
// CHECK-LABEL: test_mm_insert_pi16
@@ -583,6 +589,8 @@ __m64 test_mm_shuffle_pi8(__m64 a, __m64 b) {
return _mm_shuffle_pi8(a, b);
}
+TEST_CONSTEXPR(match_v8qi(_mm_shuffle_pi8((__m64)(__v8qi){0,1,2,3,4,5,6,7}, (__m64)(__v8qi){10,20,30,40,50,60,70,80}), 2,4,6,0,2,4,6,0));
+
__m64 test_mm_shuffle_pi16(__m64 a) {
// CHECK-LABEL: test_mm_shuffle_pi16
// CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c
index c53afc5..a82dd40 100644
--- a/clang/test/CodeGen/X86/sse3-builtins.c
+++ b/clang/test/CodeGen/X86/sse3-builtins.c
@@ -31,24 +31,28 @@ __m128d test_mm_hadd_pd(__m128d A, __m128d B) {
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_hadd_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_hadd_pd((__m128d){+1.0, +2.0}, (__m128d){+3.0, +4.0}), +3.0, +7.0));
__m128 test_mm_hadd_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_hadd_ps
// CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_hadd_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_hadd_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f,+6.0f,+7.0f,+8.0f}), +3.0f, +7.0f, +11.0f, +15.0f));
__m128d test_mm_hsub_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_hsub_pd
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_hsub_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_hsub_pd((__m128d){+1.0, +2.0}, (__m128d){+4.0, +3.0}), -1.0, +1.0));
__m128 test_mm_hsub_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_hsub_ps
// CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_hsub_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_hsub_ps((__m128){+1.0f, +2.0f, +4.0f, +3.0f}, (__m128){+5.0f,+7.0f,+10.0f,+8.0f}), -1.0f, +1.0f, -2.0f, +2.0f));
__m128i test_mm_lddqu_si128(__m128i const* P) {
// CHECK-LABEL: test_mm_lddqu_si128
diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c
index 5885768..32abd9d 100644
--- a/clang/test/CodeGen/X86/ssse3-builtins.c
+++ b/clang/test/CodeGen/X86/ssse3-builtins.c
@@ -60,36 +60,43 @@ __m128i test_mm_hadd_epi16(__m128i a, __m128i b) {
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hadd_epi16(a, b);
}
+TEST_CONSTEXPR(match_v8hi(_mm_hadd_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8}, (__m128i)(__v8hi){17,18,19,20,21,22,23,24}), 3,7,11,15,35,39,43,47));
__m128i test_mm_hadd_epi32(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hadd_epi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_hadd_epi32(a, b);
}
+TEST_CONSTEXPR(match_v4si(_mm_hadd_epi32((__m128i)(__v4si){1,2,3,4}, (__m128i)(__v4si){5,6,7,8}), 3,7,11,15));
__m128i test_mm_hadds_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hadds_epi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hadds_epi16(a, b);
}
+TEST_CONSTEXPR(match_v8hi(_mm_hadds_epi16((__m128i)(__v8hi){30000,30000,-1,2,-3,3,1,4}, (__m128i)(__v8hi){2,6,1,9,-4,16,7,8}), 32767, 1,0,5,8,10,12,15));
+
__m128i test_mm_hsub_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hsub_epi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hsub_epi16(a, b);
}
+TEST_CONSTEXPR(match_v8hi(_mm_hsub_epi16((__m128i)(__v8hi){20,15,16,12,9,6,4,2}, (__m128i)(__v8hi){3,2,1,1,4,5,0,2}), 5,4,3,2,1,0,-1,-2));
__m128i test_mm_hsub_epi32(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hsub_epi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_hsub_epi32(a, b);
}
+TEST_CONSTEXPR(match_v4si(_mm_hsub_epi32((__m128i)(__v4si){4,3,1,1}, (__m128i)(__v4si){7,5,10,5}), 1,0,2,5));
__m128i test_mm_hsubs_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hsubs_epi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hsubs_epi16(a, b);
}
+TEST_CONSTEXPR(match_v8hi(_mm_hsubs_epi16((__m128i)(__v8hi){32767, -15,16,12,9,6,4,2},(__m128i)(__v8hi){3,2,1,1,4,5,0,2}), 32767,4,3,2,1,0,-1,-2));
__m128i test_mm_maddubs_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_maddubs_epi16
@@ -110,6 +117,8 @@ __m128i test_mm_shuffle_epi8(__m128i a, __m128i b) {
return _mm_shuffle_epi8(a, b);
}
+TEST_CONSTEXPR(match_v16qi(_mm_shuffle_epi8((__m128i)(__v16qs){0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15}, (__m128i)(__v16qs){15,-14,13,-12,11,-10,9,-8,7,-6,5,-4,3,-2,1,0}), -15,0,-13,0,-11,0,-9,0,-7,0,-5,0,-3,0,-1,0));
+
__m128i test_mm_sign_epi8(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_sign_epi8
// CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
diff --git a/clang/test/CodeGen/attr-target-mv.c b/clang/test/CodeGen/attr-target-mv.c
index 07f47d9..607e3e4 100644
--- a/clang/test/CodeGen/attr-target-mv.c
+++ b/clang/test/CodeGen/attr-target-mv.c
@@ -30,6 +30,7 @@ int __attribute__((target("arch=gracemont"))) foo(void) {return 24;}
int __attribute__((target("arch=pantherlake"))) foo(void) {return 25;}
int __attribute__((target("arch=clearwaterforest"))) foo(void) {return 26;}
int __attribute__((target("arch=diamondrapids"))) foo(void) {return 27;}
+int __attribute__((target("arch=wildcatlake"))) foo(void) {return 28;}
int __attribute__((target("default"))) foo(void) { return 2; }
int bar(void) {
@@ -203,6 +204,8 @@ void calls_pr50025c(void) { pr50025c(); }
// ITANIUM: ret i32 26
// ITANIUM: define{{.*}} i32 @foo.arch_diamondrapids()
// ITANIUM: ret i32 27
+// ITANIUM: define{{.*}} i32 @foo.arch_wildcatlake()
+// ITANIUM: ret i32 28
// ITANIUM: define{{.*}} i32 @foo()
// ITANIUM: ret i32 2
// ITANIUM: define{{.*}} i32 @bar()
@@ -262,6 +265,8 @@ void calls_pr50025c(void) { pr50025c(); }
// WINDOWS: ret i32 26
// WINDOWS: define dso_local i32 @foo.arch_diamondrapids()
// WINDOWS: ret i32 27
+// WINDOWS: define dso_local i32 @foo.arch_wildcatlake()
+// WINDOWS: ret i32 28
// WINDOWS: define dso_local i32 @foo()
// WINDOWS: ret i32 2
// WINDOWS: define dso_local i32 @bar()
diff --git a/clang/test/CodeGen/ext-int-cc.c b/clang/test/CodeGen/ext-int-cc.c
index 7cfd992..f845afc 100644
--- a/clang/test/CodeGen/ext-int-cc.c
+++ b/clang/test/CodeGen/ext-int-cc.c
@@ -49,8 +49,8 @@ void ParamPassing(_BitInt(128) b, _BitInt(64) c) {}
// R600: define{{.*}} void @ParamPassing(ptr addrspace(5) byval(i128) align 8 %{{.+}}, i64 %{{.+}})
// ARC: define{{.*}} void @ParamPassing(ptr byval(i128) align 4 %{{.+}}, i64 inreg %{{.+}})
// XCORE: define{{.*}} void @ParamPassing(ptr byval(i128) align 4 %{{.+}}, i64 %{{.+}})
-// RISCV64: define{{.*}} void @ParamPassing(i128 %{{.+}}, i64 %{{.+}})
-// RISCV32: define{{.*}} void @ParamPassing(ptr dead_on_return %{{.+}}, i64 %{{.+}})
+// RISCV64: define{{.*}} void @ParamPassing(i128 signext %{{.+}}, i64 signext %{{.+}})
+// RISCV32: define{{.*}} void @ParamPassing(ptr dead_on_return %{{.+}}, i64 signext %{{.+}})
// WASM: define{{.*}} void @ParamPassing(i128 %{{.+}}, i64 %{{.+}})
// SYSTEMZ: define{{.*}} void @ParamPassing(ptr dead_on_return %{{.+}}, i64 %{{.+}})
// PPC64: define{{.*}} void @ParamPassing(i128 %{{.+}}, i64 %{{.+}})
@@ -79,8 +79,8 @@ void ParamPassing2(_BitInt(127) b, _BitInt(63) c) {}
// R600: define{{.*}} void @ParamPassing2(ptr addrspace(5) byval(i128) align 8 %{{.+}}, i63 %{{.+}})
// ARC: define{{.*}} void @ParamPassing2(ptr byval(i128) align 4 %{{.+}}, i63 inreg %{{.+}})
// XCORE: define{{.*}} void @ParamPassing2(ptr byval(i128) align 4 %{{.+}}, i63 %{{.+}})
-// RISCV64: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 signext %{{.+}})
-// RISCV32: define{{.*}} void @ParamPassing2(ptr dead_on_return %{{.+}}, i63 %{{.+}})
+// RISCV64: define{{.*}} void @ParamPassing2(i127 signext %{{.+}}, i63 signext %{{.+}})
+// RISCV32: define{{.*}} void @ParamPassing2(ptr dead_on_return %{{.+}}, i63 signext %{{.+}})
// WASM: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 %{{.+}})
// SYSTEMZ: define{{.*}} void @ParamPassing2(ptr dead_on_return %{{.+}}, i63 signext %{{.+}})
// PPC64: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 signext %{{.+}})
diff --git a/clang/test/CodeGen/inline-asm-systemz-flag-output.c b/clang/test/CodeGen/inline-asm-systemz-flag-output.c
new file mode 100644
index 0000000..041797b
--- /dev/null
+++ b/clang/test/CodeGen/inline-asm-systemz-flag-output.c
@@ -0,0 +1,57 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -O2 -triple s390x-linux -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: define dso_local signext range(i32 0, 4) i32 @test(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm "ahi $0,42\0A", "=d,={@cc},0"(i32 [[X]]) #[[ATTR2:[0-9]+]], !srcloc [[META2:![0-9]+]]
+// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP1]])
+// CHECK-NEXT: ret i32 [[ASMRESULT1]]
+//
+int test(int x) {
+ int cc;
+ asm ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+ return cc;
+}
+
+// CHECK-LABEL: define dso_local signext range(i32 0, 2) i32 @test_low_high_transformation(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm "ahi $0,42\0A", "=d,={@cc},0"(i32 [[X]]) #[[ATTR2]], !srcloc [[META3:![0-9]+]]
+// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[ASMRESULT1]], -1
+// CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 2
+// CHECK-NEXT: [[LOR_EXT:%.*]] = zext i1 [[TMP3]] to i32
+// CHECK-NEXT: ret i32 [[LOR_EXT]]
+//
+int test_low_high_transformation(int x) {
+ int cc;
+ asm ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+ return cc == 1 || cc == 2;
+}
+
+// CHECK-LABEL: define dso_local signext range(i32 0, 2) i32 @test_equal_high_transformation(
+// CHECK-SAME: i32 noundef signext [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm "ahi $0,42\0A", "=d,={@cc},0"(i32 [[X]]) #[[ATTR2]], !srcloc [[META4:![0-9]+]]
+// CHECK-NEXT: [[ASMRESULT1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
+// CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[ASMRESULT1]], 4
+// CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = and i32 [[ASMRESULT1]], 1
+// CHECK-NEXT: [[LOR_EXT:%.*]] = xor i32 [[TMP2]], 1
+// CHECK-NEXT: ret i32 [[LOR_EXT]]
+//
+int test_equal_high_transformation(int x) {
+ int cc;
+ asm ("ahi %[x],42\n" : [x] "+d"(x), "=@cc" (cc));
+ return cc == 0 || cc == 2;
+}
+//.
+// CHECK: [[META2]] = !{i64 788}
+// CHECK: [[META3]] = !{i64 1670}
+// CHECK: [[META4]] = !{i64 2505}
+//.
diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 120f1a5..2c0d83c 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -178,6 +178,7 @@ void verifycpustrings(void) {
(void)__builtin_cpu_is("lunarlake");
(void)__builtin_cpu_is("clearwaterforest");
(void)__builtin_cpu_is("pantherlake");
+ (void)__builtin_cpu_is("wildcatlake");
(void)__builtin_cpu_is("haswell");
(void)__builtin_cpu_is("icelake-client");
(void)__builtin_cpu_is("icelake-server");
diff --git a/clang/test/CodeGen/unified-lto-module-flag.ll b/clang/test/CodeGen/unified-lto-module-flag.ll
new file mode 100644
index 0000000..deefe82
--- /dev/null
+++ b/clang/test/CodeGen/unified-lto-module-flag.ll
@@ -0,0 +1,11 @@
+; Test that we do not duplicate the UnifiedLTO module flag.
+;
+; RUN: %clang_cc1 -emit-llvm -flto=full -funified-lto -o - %s | FileCheck %s
+
+; CHECK: !llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = !{i32 1, !"wchar_size", i32 2}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{i32 1, !"EnableSplitLTOUnit", i32 1}
+!3 = !{i32 1, !"UnifiedLTO", i32 1}