aboutsummaryrefslogtreecommitdiff
path: root/clang/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'clang/test/CodeGen/X86')
-rw-r--r--clang/test/CodeGen/X86/avx-builtins.c11
-rw-r--r--clang/test/CodeGen/X86/avx2-builtins.c24
-rw-r--r--clang/test/CodeGen/X86/avx512ifma-builtins.c199
-rw-r--r--clang/test/CodeGen/X86/avx512ifmavl-builtins.c182
-rw-r--r--clang/test/CodeGen/X86/avxifma-builtins.c182
-rw-r--r--clang/test/CodeGen/X86/mmx-builtins.c6
-rw-r--r--clang/test/CodeGen/X86/sse3-builtins.c4
-rw-r--r--clang/test/CodeGen/X86/ssse3-builtins.c7
8 files changed, 586 insertions, 29 deletions
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 8f3d459..bcffd861 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1100,6 +1100,7 @@ __m256d test_mm256_hadd_pd(__m256d A, __m256d B) {
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_hadd_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_hadd_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+5.0, +6.0, +7.0, +8.0}), +3.0, +11.0, +7.0, +15.0));
__m256 test_mm256_hadd_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_hadd_ps
@@ -1107,17 +1108,27 @@ __m256 test_mm256_hadd_ps(__m256 A, __m256 B) {
return _mm256_hadd_ps(A, B);
}
+TEST_CONSTEXPR(match_m256(_mm256_hadd_ps(
+ (__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f},
+ (__m256){+9.0f, +10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f}),
+ +3.0f, +7.0f, +19.0f, +23.0f, +11.0f, +15.0f, +27.0f, +31.0f));
+
__m256d test_mm256_hsub_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_hsub_pd
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_hsub_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_hsub_pd((__m256d){+1.0, +2.0, +4.0, +3.0}, (__m256d){+10.0, +6.0, +16.0, +8.0}), -1.0,+4.0,+1.0,+8.0));
__m256 test_mm256_hsub_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_hsub_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_hsub_ps(A, B);
}
+TEST_CONSTEXPR(match_m256(_mm256_hsub_ps(
+ (__m256){1.0f, 2.0f, 4.0f, 3.0f, 5.0f, 7.0f, 7.0f, 5.0f},
+ (__m256){6.0f, 9.0f, 11.0f, 8.0f, 13.0f, 17.0f, 15.0f, 11.0f}),
+ -1.0f, 1.0f, -3.0f, 3.0f, -2.0f, 2.0f, -4.0f, 4.0f));
__m256i test_mm256_insert_epi8(__m256i x, char b) {
// CHECK-LABEL: test_mm256_insert_epi8
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 55f18f9..dc64f96 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -485,36 +485,60 @@ __m256i test_mm256_hadd_epi16(__m256i a, __m256i b) {
// CHECK: call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hadd_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_hadd_epi16(
+ (__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16},
+ (__m256i)(__v16hi){17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}),
+ 3,7,11,15,35,39,43,47,19,23,27,31,51,55,59,63));
__m256i test_mm256_hadd_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hadd_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_hadd_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_hadd_epi32(
+ (__m256i)(__v8si){10, 20, 30, 40, 50, 60, 70, 80},
+ (__m256i)(__v8si){5, 15, 25, 35, 45, 55, 65, 75}),
+ 30,70,20,60,110,150,100,140));
__m256i test_mm256_hadds_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hadds_epi16
// CHECK:call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hadds_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi( _mm256_hadds_epi16(
+ (__m256i)(__v16hi){32767, 32767, 1,2,3,4,5,6,7,8,9,10,11,12,13,14},
+ (__m256i)(__v16hi){19,20,21,22,23,24,25,26,27,28,29,30,31,32, 32767, 5}),
+ 32767, 3,7,11, 39,43,47,51,15,19,23,27, 55,59,63, 32767));
__m256i test_mm256_hsub_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hsub_epi16
// CHECK: call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hsub_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_hsub_epi16(
+ (__m256i)(__v16hi){2,1,1,2,5,3,3,5,7,4,4,7,9,5,5,9},
+ (__m256i)(__v16hi){10,5,5,10,12,6,6,12,21,14,14,21,24,16,16,24}),
+ 1,-1,2,-2,5,-5,6,-6,3,-3,4,-4, 7,-7,8,-8));
__m256i test_mm256_hsub_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hsub_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_hsub_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_hsub_epi32(
+ (__m256i)(__v8si){10, 20, 30,50,60,90,100,140},
+ (__m256i)(__v8si){200,150,260,200,420,350,800,720}),
+ -10,-20,50,60, -30,-40, 70,80));
__m256i test_mm256_hsubs_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hsubs_epi16
// CHECK:call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hsubs_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_hsubs_epi16(
+ (__m256i)(__v16hi){32726, -100, 3, 2, 6, 4, 8, 5,15,10 ,21, 14, 27, 18, 100, 90},
+ (__m256i)(__v16hi){40, 20, 100, 70, 200,150, 100,40, 1000,900,300,150, 500,300, 1, 1}),
+ 32767, 1, 2, 3, 20, 30, 50, 60, 5, 7, 9, 10, 100, 150, 200, 0));
__m128i test_mm_i32gather_epi32(int const *b, __m128i c) {
// CHECK-LABEL: test_mm_i32gather_epi32
diff --git a/clang/test/CodeGen/X86/avx512ifma-builtins.c b/clang/test/CodeGen/X86/avx512ifma-builtins.c
index eebefb0..f90697e 100644
--- a/clang/test/CodeGen/X86/avx512ifma-builtins.c
+++ b/clang/test/CodeGen/X86/avx512ifma-builtins.c
@@ -8,45 +8,230 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512ifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512ifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
-
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m512i test_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
// CHECK-LABEL: test_mm512_madd52hi_epu64
// CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
- return _mm512_madd52hi_epu64(__X, __Y, __Z);
+ return _mm512_madd52hi_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v8di(_mm512_madd52hi_epu64(
+ (__m512i)(__v8du){100, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){10, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){5, 0, 0, 0, 0, 0, 0, 0}),
+ 100, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52hi_epu64(
+ (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0,
+ 0, 0, 0, 0},
+ (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0,
+ 0, 0, 0, 0}),
+ 0xFFFFFFFFFFFFEull, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52hi_epu64(
+ (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m512i)(__v8du){0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull},
+ (__m512i)(__v8du){0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull}),
+ 4503599627370495ull, 4503599627370496ull,
+ 4503599627370497ull, 4503599627370498ull,
+ 4503599627370499ull, 4503599627370500ull,
+ 4503599627370501ull, 4503599627370502ull));
+
__m512i test_mm512_mask_madd52hi_epu64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_mask_madd52hi_epu64
// CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
- return _mm512_mask_madd52hi_epu64(__W, __M, __X, __Y);
+ return _mm512_mask_madd52hi_epu64(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52hi_epu64(
+ (__m512i)(__v8du){111, 222, 333, 444, 555, 666,
+ 777, 888},
+ 0x00,
+ (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80}),
+ 111, 222, 333, 444, 555, 666, 777, 888));
+
+TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52hi_epu64(
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80},
+ 0xFF,
+ (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+ 700, 800},
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80}),
+ 10, 20, 30, 40, 50, 60, 70, 80));
+
__m512i test_mm512_maskz_madd52hi_epu64(__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) {
// CHECK-LABEL: test_mm512_maskz_madd52hi_epu64
// CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52h.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
- return _mm512_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
+ return _mm512_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52hi_epu64(
+ 0x00,
+ (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80},
+ (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+ 700, 800}),
+ 0, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52hi_epu64(
+ 0xFF,
+ (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80},
+ (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+ 700, 800}),
+ 1, 2, 3, 4, 5, 6, 7, 8));
+
__m512i test_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
// CHECK-LABEL: test_mm512_madd52lo_epu64
// CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
- return _mm512_madd52lo_epu64(__X, __Y, __Z);
+ return _mm512_madd52lo_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){10, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){5, 0, 0, 0, 0, 0, 0, 0}),
+ 50, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){100, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){20, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){30, 0, 0, 0, 0, 0, 0, 0}),
+ 700, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0,
+ 0, 0, 0, 0},
+ (__m512i)(__v8du){1, 0, 0, 0, 0, 0, 0, 0}),
+ 0xFFFFFFFFFFFFFull, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){0x1F000000000000ull, 0, 0, 0,
+ 0, 0, 0, 0},
+ (__m512i)(__v8du){2, 0, 0, 0, 0, 0, 0, 0}),
+ 0xE000000000000ull, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80},
+ (__m512i)(__v8du){2, 3, 4, 5, 6, 7, 8, 9}),
+ 21, 62, 123, 204, 305, 426, 567, 728));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){0xFFFFFFFFFFFFFull, 0, 0, 0,
+ 0, 0, 0, 0},
+ (__m512i)(__v8du){10, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){5, 0, 0, 0, 0, 0, 0, 0}),
+ 4503599627370545ull, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80},
+ (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+ 700, 800},
+ (__m512i)(__v8du){2, 3, 4, 5, 6, 7, 8, 9}),
+ 210, 620, 1230, 2040, 3050, 4260, 5670, 7280));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){0x1F000000000000ull,
+ 0x1F000000000000ull, 0, 0, 0,
+ 0, 0, 0},
+ (__m512i)(__v8du){2, 3, 0, 0, 0, 0, 0, 0}),
+ 0xE000000000000ull, 0xD000000000000ull, 0, 0, 0, 0,
+ 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_madd52lo_epu64(
+ (__m512i)(__v8du){0, 0, 0, 0, 0, 0, 0, 0},
+ (__m512i)(__v8du){0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull},
+ (__m512i)(__v8du){1, 1, 1, 1, 1, 1, 1, 1}),
+ 0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFull));
+
__m512i test_mm512_mask_madd52lo_epu64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_mask_madd52lo_epu64
// CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
- return _mm512_mask_madd52lo_epu64(__W, __M, __X, __Y);
+ return _mm512_mask_madd52lo_epu64(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52lo_epu64(
+ (__m512i)(__v8du){111, 222, 333, 444, 555, 666,
+ 777, 888},
+ 0x00,
+ (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80}),
+ 111, 222, 333, 444, 555, 666, 777, 888));
+
+TEST_CONSTEXPR(match_v8di(_mm512_mask_madd52lo_epu64(
+ (__m512i)(__v8du){1000, 2000, 3000, 4000, 5000,
+ 6000, 7000, 8000},
+ 0xFF,
+ (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+ 700, 800},
+ (__m512i)(__v8du){20, 30, 40, 50, 60, 70, 80,
+ 90}),
+ 3000, 8000, 15000, 24000, 35000, 48000, 63000,
+ 80000));
+
__m512i test_mm512_maskz_madd52lo_epu64(__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) {
// CHECK-LABEL: test_mm512_maskz_madd52lo_epu64
// CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.vpmadd52l.uq.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
- return _mm512_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
+ return _mm512_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
}
+
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52lo_epu64(
+ 0x00,
+ (__m512i)(__v8du){1, 2, 3, 4, 5, 6, 7, 8},
+ (__m512i)(__v8du){10, 20, 30, 40, 50, 60, 70,
+ 80},
+ (__m512i)(__v8du){2, 3, 4, 5, 6, 7, 8, 9}),
+ 0, 0, 0, 0, 0, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_madd52lo_epu64(
+ 0xFF,
+ (__m512i)(__v8du){100, 200, 300, 400, 500, 600,
+ 700, 800},
+ (__m512i)(__v8du){20, 30, 40, 50, 60, 70, 80,
+ 90},
+ (__m512i)(__v8du){30, 40, 50, 60, 70, 80, 90,
+ 100}),
+ 700, 1400, 2300, 3400, 4700, 6200, 7900, 9800));
diff --git a/clang/test/CodeGen/X86/avx512ifmavl-builtins.c b/clang/test/CodeGen/X86/avx512ifmavl-builtins.c
index 89108fc..1cbb580 100644
--- a/clang/test/CodeGen/X86/avx512ifmavl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512ifmavl-builtins.c
@@ -8,85 +8,241 @@
// RUN: %clang_cc1 -x c++ %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
// RUN: %clang_cc1 -x c++ %s -flax-vector-conversions=none -ffreestanding -triple=i386-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
-
#include <immintrin.h>
+#include "builtin_test_helpers.h"
__m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
// CHECK-LABEL: test_mm_madd52hi_epu64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
- return _mm_madd52hi_epu64(__X, __Y, __Z);
+ return _mm_madd52hi_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64(
+ (__m128i)((__v2du){100, 0}),
+ (__m128i)((__v2du){10, 0}),
+ (__m128i)((__v2du){5, 0})),
+ 100, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64(
+ (__m128i)((__v2du){0, 0}),
+ (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+ (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0})),
+ 0xFFFFFFFFFFFFEull, 0));
+
__m128i test_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
// CHECK-LABEL: test_mm_mask_madd52hi_epu64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
- return _mm_mask_madd52hi_epu64(__W, __M, __X, __Y);
+ return _mm_mask_madd52hi_epu64(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v2di(_mm_mask_madd52hi_epu64((__m128i)((__v2du){111, 222}),
+ 0x0,
+ (__m128i)((__v2du){1, 2}),
+ (__m128i)((__v2du){10, 20})),
+ 111, 222));
+
+TEST_CONSTEXPR(match_v2di(_mm_mask_madd52hi_epu64((__m128i)((__v2du){10, 20}),
+ 0x2,
+ (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL}),
+ (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL})),
+ 10, 0x100000000014ULL));
+
__m128i test_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
// CHECK-LABEL: test_mm_maskz_madd52hi_epu64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
- return _mm_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
+ return _mm_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52hi_epu64(0x3,
+ (__m128i)((__v2du){1, 2}),
+ (__m128i)((__v2du){10, 20}),
+ (__m128i)((__v2du){100, 200})),
+ 1, 2));
+
+TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52hi_epu64(0x1,
+ (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL}),
+ (__m128i)((__v2du){0x1000000000000ULL, 0x1000000000000ULL}),
+ (__m128i)((__v2du){0, 0})),
+ 0x1000000000000ULL, 0));
+
__m256i test_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
// CHECK-LABEL: test_mm256_madd52hi_epu64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
- return _mm256_madd52hi_epu64(__X, __Y, __Z);
+ return _mm256_madd52hi_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64(
+ (__m256i)((__v4du){100, 200, 300, 400}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){5, 6, 7, 8})),
+ 100, 200, 300, 400));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64(
+ (__m256i)((__v4du){0, 0, 0, 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+ 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+ 0})),
+ 0xFFFFFFFFFFFFEull, 0, 0, 0));
+
__m256i test_mm256_mask_madd52hi_epu64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
// CHECK-LABEL: test_mm256_mask_madd52hi_epu64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
- return _mm256_mask_madd52hi_epu64(__W, __M, __X, __Y);
+ return _mm256_mask_madd52hi_epu64(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52hi_epu64((__m256i)((__v4du){111, 222, 333, 444}),
+ 0x0,
+ (__m256i)((__v4du){1, 2, 3, 4}),
+ (__m256i)((__v4du){10, 20, 30, 40})),
+ 111, 222, 333, 444));
+
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52hi_epu64((__m256i)((__v4du){10, 20, 30, 40}),
+ 0xA,
+ (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL,
+ 0x1000000000000ULL, 0x1000000000000ULL}),
+ (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL,
+ 0x1000000000000ULL, 0x1000000000000ULL})),
+ 10, 0x100000000014ULL, 30, 0x100000000028ULL));
+
__m256i test_mm256_maskz_madd52hi_epu64(__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
// CHECK-LABEL: test_mm256_maskz_madd52hi_epu64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
- return _mm256_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
+ return _mm256_maskz_madd52hi_epu64(__M, __X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52hi_epu64(0xF,
+ (__m256i)((__v4du){1, 2, 3, 4}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){100, 200, 300, 400})),
+ 1, 2, 3, 4));
+
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52hi_epu64(0x5,
+ (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL,
+ 0x1000000000000ULL, 0x1000000000000ULL}),
+ (__m256i)((__v4du){0x1000000000000ULL, 0x1000000000000ULL,
+ 0x1000000000000ULL, 0x1000000000000ULL}),
+ (__m256i)((__v4du){0, 0, 0, 0})),
+ 0x1000000000000ULL, 0, 0x1000000000000ULL, 0));
+
__m128i test_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
// CHECK-LABEL: test_mm_madd52lo_epu64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
- return _mm_madd52lo_epu64(__X, __Y, __Z);
+ return _mm_madd52lo_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+ (__m128i)((__v2du){0, 0}),
+ (__m128i)((__v2du){10, 0}),
+ (__m128i)((__v2du){5, 0})),
+ 50, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+ (__m128i)((__v2du){100, 0}),
+ (__m128i)((__v2du){20, 0}),
+ (__m128i)((__v2du){30, 0})),
+ 700, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+ (__m128i)((__v2du){1, 2}),
+ (__m128i)((__v2du){10, 20}),
+ (__m128i)((__v2du){2, 3})),
+ 21, 62));
+
__m128i test_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
// CHECK-LABEL: test_mm_mask_madd52lo_epu64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
- return _mm_mask_madd52lo_epu64(__W, __M, __X, __Y);
+ return _mm_mask_madd52lo_epu64(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v2di(_mm_mask_madd52lo_epu64((__m128i)((__v2du){1000, 2000}),
+ 0x3,
+ (__m128i)((__v2du){100, 200}),
+ (__m128i)((__v2du){20, 30})),
+ 3000, 8000));
+
+TEST_CONSTEXPR(match_v2di(_mm_mask_madd52lo_epu64((__m128i)((__v2du){111, 222}),
+ 0x0,
+ (__m128i)((__v2du){1, 2}),
+ (__m128i)((__v2du){10, 20})),
+ 111, 222));
+
__m128i test_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
// CHECK-LABEL: test_mm_maskz_madd52lo_epu64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
- return _mm_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
+ return _mm_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52lo_epu64(0x3,
+ (__m128i)((__v2du){100, 200}),
+ (__m128i)((__v2du){20, 30}),
+ (__m128i)((__v2du){30, 40})),
+ 700, 1400));
+
+TEST_CONSTEXPR(match_v2di(_mm_maskz_madd52lo_epu64(0x1,
+ (__m128i)((__v2du){100, 0}),
+ (__m128i)((__v2du){20, 0}),
+ (__m128i)((__v2du){30, 0})),
+ 700, 0));
+
__m256i test_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
// CHECK-LABEL: test_mm256_madd52lo_epu64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
- return _mm256_madd52lo_epu64(__X, __Y, __Z);
+ return _mm256_madd52lo_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64(
+ (__m256i)((__v4du){1, 2, 3, 4}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){2, 3, 4, 5})),
+ 21, 62, 123, 204));
+
__m256i test_mm256_mask_madd52lo_epu64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
// CHECK-LABEL: test_mm256_mask_madd52lo_epu64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
- return _mm256_mask_madd52lo_epu64(__W, __M, __X, __Y);
+ return _mm256_mask_madd52lo_epu64(__W, __M, __X, __Y);
}
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52lo_epu64((__m256i)((__v4du){1000, 2000, 3000, 4000}),
+ 0xF,
+ (__m256i)((__v4du){100, 200, 300, 400}),
+ (__m256i)((__v4du){20, 30, 40, 50})),
+ 3000, 8000, 15000, 24000));
+
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52lo_epu64((__m256i)((__v4du){111, 222, 333, 444}),
+ 0x0,
+ (__m256i)((__v4du){1, 2, 3, 4}),
+ (__m256i)((__v4du){10, 20, 30, 40})),
+ 111, 222, 333, 444));
+
+TEST_CONSTEXPR(match_v4di(_mm256_mask_madd52lo_epu64((__m256i)((__v4du){11, 22, 33, 44}),
+ 0x5,
+ (__m256i)((__v4du){100, 200, 300, 400}),
+ (__m256i)((__v4du){10, 20, 30, 40})),
+ 1011, 22, 9033, 44));
+
__m256i test_mm256_maskz_madd52lo_epu64(__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
// CHECK-LABEL: test_mm256_maskz_madd52lo_epu64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
- return _mm256_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
+ return _mm256_maskz_madd52lo_epu64(__M, __X, __Y, __Z);
}
+
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52lo_epu64(0xF,
+ (__m256i)((__v4du){100, 200, 300, 400}),
+ (__m256i)((__v4du){20, 30, 40, 50}),
+ (__m256i)((__v4du){30, 40, 50, 60})),
+ 700, 1400, 2300, 3400));
+
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_madd52lo_epu64(0x9,
+ (__m256i)((__v4du){100, 200, 300, 400}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){5, 10, 15, 20})),
+ 150, 0, 0, 1200));
diff --git a/clang/test/CodeGen/X86/avxifma-builtins.c b/clang/test/CodeGen/X86/avxifma-builtins.c
index aa15159..70531da 100644
--- a/clang/test/CodeGen/X86/avxifma-builtins.c
+++ b/clang/test/CodeGen/X86/avxifma-builtins.c
@@ -8,8 +8,9 @@
// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
-
#include <immintrin.h>
+#include "builtin_test_helpers.h"
+
__m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
// CHECK-LABEL: test_mm_madd52hi_epu64
@@ -17,44 +18,207 @@ __m128i test_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
return _mm_madd52hi_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64(
+ (__m128i)((__v2du){50, 100}),
+ (__m128i)((__v2du){10, 20}),
+ (__m128i)((__v2du){5, 6})),
+ 50, 100));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_epu64(
+ (__m128i)((__v2du){0, 0}),
+ (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+ (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0})),
+ 0xFFFFFFFFFFFFEull, 0));
+
__m256i test_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
// CHECK-LABEL: test_mm256_madd52hi_epu64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_madd52hi_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64(
+ (__m256i)((__v4du){100, 200, 300, 400}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){5, 6, 7, 8})),
+ 100, 200, 300, 400));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_epu64(
+ (__m256i)((__v4du){0, 0, 0, 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull, 0, 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull, 0, 0})),
+ 0xFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFEull, 0, 0));
+
__m128i test_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
// CHECK-LABEL: test_mm_madd52lo_epu64
// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_madd52lo_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+ (__m128i)((__v2du){0, 0}),
+ (__m128i)((__v2du){10, 0}),
+ (__m128i)((__v2du){5, 0})),
+ 50, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+ (__m128i)((__v2du){1, 2}),
+ (__m128i)((__v2du){10, 20}),
+ (__m128i)((__v2du){2, 3})),
+ 21, 62));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_epu64(
+ (__m128i)((__v2du){0, 0}),
+ (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+ (__m128i)((__v2du){1, 0})),
+ 0xFFFFFFFFFFFFFull, 0));
+
__m256i test_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
// CHECK-LABEL: test_mm256_madd52lo_epu64
// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_madd52lo_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64(
+ (__m256i)((__v4du){1, 2, 3, 4}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){2, 3, 4, 5})),
+ 21, 62, 123, 204));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64(
+ (__m256i)((__v4du){0, 0, 0, 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+ 0}),
+ (__m256i)((__v4du){1, 0, 0, 0})),
+ 0xFFFFFFFFFFFFFull, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_epu64(
+ (__m256i)((__v4du){0, 0, 0, 0}),
+ (__m256i)((__v4du){0x1F000000000000ull, 0, 0,
+ 0}),
+ (__m256i)((__v4du){2, 0, 0, 0})),
+ 0xE000000000000ull, 0, 0, 0));
+
__m128i test_mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
-// CHECK-LABEL: test_mm_madd52hi_avx_epu64
-// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK-LABEL: test_mm_madd52hi_avx_epu64
+ // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_madd52hi_avx_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_avx_epu64(
+ (__m128i)((__v2du){50, 100}),
+ (__m128i)((__v2du){10, 20}),
+ (__m128i)((__v2du){5, 6})),
+ 50, 100));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_avx_epu64(
+ (__m128i)((__v2du){100, 0}),
+ (__m128i)((__v2du){10, 0}),
+ (__m128i)((__v2du){5, 0})),
+ 100, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52hi_avx_epu64(
+ (__m128i)((__v2du){0, 0}),
+ (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+ (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0})),
+ 0xFFFFFFFFFFFFEull, 0));
+
__m256i test_mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
-// CHECK-LABEL: test_mm256_madd52hi_avx_epu64
-// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
+ // CHECK-LABEL: test_mm256_madd52hi_avx_epu64
+ // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_madd52hi_avx_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_avx_epu64(
+ (__m256i)((__v4du){0, 0, 0, 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull, 0, 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull,
+ 0xFFFFFFFFFFFFFull, 0, 0})),
+ 0xFFFFFFFFFFFFEull, 0xFFFFFFFFFFFFEull, 0, 0));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_avx_epu64(
+ (__m256i)((__v4du){100, 200, 300, 400}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){5, 6, 7, 8})),
+ 100, 200, 300, 400));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52hi_avx_epu64(
+ (__m256i)((__v4du){0, 0, 0, 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+ 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+ 0})),
+ 0xFFFFFFFFFFFFEull, 0, 0, 0));
+
__m128i test_mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
-// CHECK-LABEL: test_mm_madd52lo_avx_epu64
-// CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK-LABEL: test_mm_madd52lo_avx_epu64
+ // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_madd52lo_avx_epu64(__X, __Y, __Z);
}
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+ (__m128i)((__v2du){0, 0}),
+ (__m128i)((__v2du){10, 0}),
+ (__m128i)((__v2du){5, 0})),
+ 50, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+ (__m128i)((__v2du){100, 0}),
+ (__m128i)((__v2du){20, 0}),
+ (__m128i)((__v2du){30, 0})),
+ 700, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+ (__m128i)((__v2du){1, 2}),
+ (__m128i)((__v2du){10, 20}),
+ (__m128i)((__v2du){2, 3})),
+ 21, 62));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+ (__m128i)((__v2du){0, 0}),
+ (__m128i)((__v2du){0xFFFFFFFFFFFFFull, 0}),
+ (__m128i)((__v2du){1, 0})),
+ 0xFFFFFFFFFFFFFull, 0));
+
__m256i test_mm256_madd52lo_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
-// CHECK-LABEL: test_mm256_madd52lo_avx_epu64
-// CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
+ // CHECK-LABEL: test_mm256_madd52lo_avx_epu64
+ // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_madd52lo_avx_epu64(__X, __Y, __Z);
}
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64(
+ (__m256i)((__v4du){1, 2, 3, 4}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){2, 3, 4, 5})),
+ 21, 62, 123, 204));
+
+
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64(
+ (__m256i)((__v4du){0, 0, 0, 0}),
+ (__m256i)((__v4du){0xFFFFFFFFFFFFFull, 0, 0,
+ 0}),
+ (__m256i)((__v4du){1, 0, 0, 0})),
+ 0xFFFFFFFFFFFFFull, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64(
+ (__m256i)((__v4du){0, 0, 0, 0}),
+ (__m256i)((__v4du){0x1F000000000000ull, 0, 0,
+ 0}),
+ (__m256i)((__v4du){2, 0, 0, 0})),
+ 0xE000000000000ull, 0, 0, 0));
+
+TEST_CONSTEXPR(match_v2di(_mm_madd52lo_avx_epu64(
+ (__m128i)((__v2du){5, 10}),
+ (__m128i)((__v2du){100, 200}),
+ (__m128i)((__v2du){7, 8})),
+ 705, 1610));
+
+TEST_CONSTEXPR(match_v4di(_mm256_madd52lo_avx_epu64(
+ (__m256i)((__v4du){1, 2, 3, 4}),
+ (__m256i)((__v4du){10, 20, 30, 40}),
+ (__m256i)((__v4du){2, 3, 4, 5})),
+ 21, 62, 123, 204));
+
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 2b45b92..a1e05a1 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -312,36 +312,42 @@ __m64 test_mm_hadd_pi16(__m64 a, __m64 b) {
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128(
return _mm_hadd_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_hadd_pi16((__m64)(__v4hi){1,2,3,4},(__m64)(__v4hi){5,6,7,8}),3,7,11,15));
__m64 test_mm_hadd_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hadd_pi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128(
return _mm_hadd_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_hadd_pi32((__m64)(__v2si){1,2},(__m64)(__v2si){3,4}),3,7));
__m64 test_mm_hadds_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hadds_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(
return _mm_hadds_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_hadds_pi16((__m64)(__v4hi){32767, 32767, 1,3},(__m64)(__v4hi){-1,3, 40, 60}),32767, 4, 2,100));
__m64 test_mm_hsub_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsub_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128(
return _mm_hsub_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_hsub_pi16((__m64)(__v4hi){1,2,4,3},(__m64)(__v4hi){10,5,0,-10}),-1,1,5,10));
__m64 test_mm_hsub_pi32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsub_pi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128(
return _mm_hsub_pi32(a, b);
}
+TEST_CONSTEXPR(match_v2si(_mm_hsub_pi32((__m64)(__v2si){1,2},(__m64)(__v2si){4,3}),-1,1));
__m64 test_mm_hsubs_pi16(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_hsubs_pi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(
return _mm_hsubs_pi16(a, b);
}
+TEST_CONSTEXPR(match_v4hi(_mm_hsubs_pi16((__m64)(__v4hi){32767, 32767, 5, -32767},(__m64)(__v4hi){4,5,10,5}),0,32767,-1,5));
__m64 test_mm_insert_pi16(__m64 a, int d) {
// CHECK-LABEL: test_mm_insert_pi16
diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c
index c53afc5..a82dd40 100644
--- a/clang/test/CodeGen/X86/sse3-builtins.c
+++ b/clang/test/CodeGen/X86/sse3-builtins.c
@@ -31,24 +31,28 @@ __m128d test_mm_hadd_pd(__m128d A, __m128d B) {
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_hadd_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_hadd_pd((__m128d){+1.0, +2.0}, (__m128d){+3.0, +4.0}), +3.0, +7.0));
__m128 test_mm_hadd_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_hadd_ps
// CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_hadd_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_hadd_ps((__m128){+1.0f, +2.0f, +3.0f, +4.0f}, (__m128){+5.0f,+6.0f,+7.0f,+8.0f}), +3.0f, +7.0f, +11.0f, +15.0f));
__m128d test_mm_hsub_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_hsub_pd
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_hsub_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(_mm_hsub_pd((__m128d){+1.0, +2.0}, (__m128d){+4.0, +3.0}), -1.0, +1.0));
__m128 test_mm_hsub_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_hsub_ps
// CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_hsub_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(_mm_hsub_ps((__m128){+1.0f, +2.0f, +4.0f, +3.0f}, (__m128){+5.0f,+7.0f,+10.0f,+8.0f}), -1.0f, +1.0f, -2.0f, +2.0f));
__m128i test_mm_lddqu_si128(__m128i const* P) {
// CHECK-LABEL: test_mm_lddqu_si128
diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c
index 5885768..e623599 100644
--- a/clang/test/CodeGen/X86/ssse3-builtins.c
+++ b/clang/test/CodeGen/X86/ssse3-builtins.c
@@ -60,36 +60,43 @@ __m128i test_mm_hadd_epi16(__m128i a, __m128i b) {
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hadd_epi16(a, b);
}
+TEST_CONSTEXPR(match_v8hi(_mm_hadd_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8}, (__m128i)(__v8hi){17,18,19,20,21,22,23,24}), 3,7,11,15,35,39,43,47));
__m128i test_mm_hadd_epi32(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hadd_epi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_hadd_epi32(a, b);
}
+TEST_CONSTEXPR(match_v4si(_mm_hadd_epi32((__m128i)(__v4si){1,2,3,4}, (__m128i)(__v4si){5,6,7,8}), 3,7,11,15));
__m128i test_mm_hadds_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hadds_epi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hadds_epi16(a, b);
}
+TEST_CONSTEXPR(match_v8hi(_mm_hadds_epi16((__m128i)(__v8hi){30000,30000,-1,2,-3,3,1,4}, (__m128i)(__v8hi){2,6,1,9,-4,16,7,8}), 32767, 1,0,5,8,10,12,15));
+
__m128i test_mm_hsub_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hsub_epi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hsub_epi16(a, b);
}
+TEST_CONSTEXPR(match_v8hi(_mm_hsub_epi16((__m128i)(__v8hi){20,15,16,12,9,6,4,2}, (__m128i)(__v8hi){3,2,1,1,4,5,0,2}), 5,4,3,2,1,0,-1,-2));
__m128i test_mm_hsub_epi32(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hsub_epi32
// CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_hsub_epi32(a, b);
}
+TEST_CONSTEXPR(match_v4si(_mm_hsub_epi32((__m128i)(__v4si){4,3,1,1}, (__m128i)(__v4si){7,5,10,5}), 1,0,2,5));
__m128i test_mm_hsubs_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_hsubs_epi16
// CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_hsubs_epi16(a, b);
}
+TEST_CONSTEXPR(match_v8hi(_mm_hsubs_epi16((__m128i)(__v8hi){32767, -15,16,12,9,6,4,2},(__m128i)(__v8hi){3,2,1,1,4,5,0,2}), 32767,4,3,2,1,0,-1,-2));
__m128i test_mm_maddubs_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_maddubs_epi16