aboutsummaryrefslogtreecommitdiff
path: root/clang/test
diff options
context:
space:
mode:
authorYi-Chi Lee <55395582+yichi170@users.noreply.github.com>2025-08-07 05:21:58 -0500
committerGitHub <noreply@github.com>2025-08-07 11:21:58 +0100
commite1d67530065efb64dba2f716a355a40535f4a19d (patch)
tree76d13233956bec85433ea4e226dcb21c4f1b1d6c /clang/test
parent246990dc029620619f41b6bd3bd7ba67ada1a384 (diff)
downloadllvm-e1d67530065efb64dba2f716a355a40535f4a19d.zip
llvm-e1d67530065efb64dba2f716a355a40535f4a19d.tar.gz
llvm-e1d67530065efb64dba2f716a355a40535f4a19d.tar.bz2
[Headers][X86] Update AVX/AVX512 float/double add/sub/mul/div/unpck intrinsics to be used in constexpr (#152435)
Fixed #152313 --------- Co-authored-by: Simon Pilgrim <llvm-dev@redking.me.uk>
Diffstat (limited to 'clang/test')
-rw-r--r--clang/test/CodeGen/X86/avx-builtins.c12
-rw-r--r--clang/test/CodeGen/X86/avx512f-builtins.c10
2 files changed, 22 insertions, 0 deletions
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 2d43764..e2c9f96 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -20,12 +20,14 @@ __m256d test_mm256_add_pd(__m256d A, __m256d B) {
// CHECK: fadd <4 x double>
return _mm256_add_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d( _mm256_add_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-4.0, -5.0, +6.0, +7.0}), -8.0, -10.0, +12.0, +14.0));
__m256 test_mm256_add_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_add_ps
// CHECK: fadd <8 x float>
return _mm256_add_ps(A, B);
}
+TEST_CONSTEXPR(match_m256(_mm256_add_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}), -8.0f, -10.0f, +12.0f, +14.0f, +14.0f, +12.0f, -10.0f, -8.0f));
__m256d test_mm256_addsub_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_addsub_pd
@@ -977,12 +979,14 @@ __m256d test_mm256_div_pd(__m256d A, __m256d B) {
// CHECK: fdiv <4 x double>
return _mm256_div_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d( _mm256_div_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-1.0, +1.0, -1.0, +1.0}), +4.0, -5.0, -6.0, +7.0));
__m256 test_mm256_div_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_div_ps
// CHECK: fdiv <8 x float>
return _mm256_div_ps(A, B);
}
+TEST_CONSTEXPR(match_m256( _mm256_div_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-1.0f, +1.0f, -1.0f, +1.0f, +1.0f, -1.0f, +1.0f, -1.0f}), +4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, +4.0f));
__m256 test_mm256_dp_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_dp_ps
@@ -1295,12 +1299,14 @@ __m256d test_mm256_mul_pd(__m256d A, __m256d B) {
// CHECK: fmul <4 x double>
return _mm256_mul_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d( _mm256_mul_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-4.0, -5.0, +6.0, +7.0}), +16.0, +25.0, +36.0, +49.0));
__m256 test_mm256_mul_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_mul_ps
// CHECK: fmul <8 x float>
return _mm256_mul_ps(A, B);
}
+TEST_CONSTEXPR(match_m256( _mm256_mul_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}), +16.0f, +25.0f, +36.0f, +49.0f, +49.0f, +36.0f, +25.0f, +16.0f));
__m256d test_mm256_or_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_or_pd
@@ -1933,12 +1939,14 @@ __m256d test_mm256_sub_pd(__m256d A, __m256d B) {
// CHECK: fsub <4 x double>
return _mm256_sub_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d( _mm256_sub_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-0.0, +0.0, +2.0, -1.0}), -4.0, -5.0, 4.0, 8.0));
__m256 test_mm256_sub_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_sub_ps
// CHECK: fsub <8 x float>
return _mm256_sub_ps(A, B);
}
+TEST_CONSTEXPR(match_m256( _mm256_sub_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-0.0f, +0.0f, +2.0f, -1.0f, -1.0f, +2.0f, +0.0f, -0.0f}), -4.0f, -5.0f, 4.0f, 8.0f, 8.0f, 4.0f, -5.0f, -4.0f));
int test_mm_testc_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_testc_pd
@@ -2062,24 +2070,28 @@ __m256d test_mm256_unpackhi_pd(__m256d A, __m256d B) {
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
return _mm256_unpackhi_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_unpackhi_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+5.0, +6.0, +7.0, +8.0}), +2.0, +6.0, +4.0, +8.0));
__m256 test_mm256_unpackhi_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_unpackhi_ps
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
return _mm256_unpackhi_ps(A, B);
}
+TEST_CONSTEXPR(match_m256(_mm256_unpackhi_ps((__m256){+0.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f}, (__m256){+10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f, +17.0f}), +2.0f, +12.0f, +3.0f, +13.0f, +6.0f, +16.0f, +7.0f, +17.0f));
__m256d test_mm256_unpacklo_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_unpacklo_pd
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
return _mm256_unpacklo_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d(_mm256_unpacklo_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+5.0, +6.0, +7.0, +8.0}), +1.0, +5.0, +3.0, +7.0));
__m256 test_mm256_unpacklo_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_unpacklo_ps
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
return _mm256_unpacklo_ps(A, B);
}
+TEST_CONSTEXPR(match_m256(_mm256_unpacklo_ps((__m256){+0.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f}, (__m256){+10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f, +17.0f}), +0.0f, +10.0f, +1.0f, +11.0f, +4.0f, +14.0f, +5.0f, +15.0f));
__m256d test_mm256_xor_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_xor_pd
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 5447035..8c14c57 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -154,6 +154,7 @@ __m512 test_mm512_add_ps(__m512 a, __m512 b)
// CHECK: fadd <16 x float>
return _mm512_add_ps(a, b);
}
+TEST_CONSTEXPR(match_m512(_mm512_add_ps((__m512){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m512){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}), -2.0f, -4.0f, -6.0f, -8.0f, -10.0f, -12.0f, -14.0f, -16.0f, +2.0f, +4.0f, +6.0f, +8.0f, +10.0f, +12.0f, +14.0f, +16.0f));
__m512d test_mm512_add_pd(__m512d a, __m512d b)
{
@@ -161,6 +162,7 @@ __m512d test_mm512_add_pd(__m512d a, __m512d b)
// CHECK: fadd <8 x double>
return _mm512_add_pd(a, b);
}
+TEST_CONSTEXPR(match_m512d(_mm512_add_pd((__m512d){-1.0, -2.0, -3.0, -4.0, +1.0, +2.0, +3.0, +4.0}, (__m512d){-1.0, -2.0, -3.0, -4.0, +1.0, +2.0, +3.0, +4.0}), -2.0, -4.0, -6.0, -8.0, +2.0, +4.0, +6.0, +8.0));
__m512 test_mm512_mul_ps(__m512 a, __m512 b)
{
@@ -168,6 +170,7 @@ __m512 test_mm512_mul_ps(__m512 a, __m512 b)
// CHECK: fmul <16 x float>
return _mm512_mul_ps(a, b);
}
+TEST_CONSTEXPR(match_m512(_mm512_mul_ps((__m512){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m512){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}), +1.0f, +4.0f, +9.0f, +16.0f, +25.0f, +36.0f, +49.0f, +64.0f, +1.0f, +4.0f, +9.0f, +16.0f, +25.0f, +36.0f, +49.0f, +64.0f));
__m512d test_mm512_mul_pd(__m512d a, __m512d b)
{
@@ -175,6 +178,7 @@ __m512d test_mm512_mul_pd(__m512d a, __m512d b)
// CHECK: fmul <8 x double>
return _mm512_mul_pd(a, b);
}
+TEST_CONSTEXPR(match_m512d(_mm512_mul_pd((__m512d){-1.0, -2.0, -3.0, -4.0, +1.0, +2.0, +3.0, +4.0}, (__m512d){-1.0, -2.0, -3.0, -4.0, +1.0, +2.0, +3.0, +4.0}), +1.0, +4.0, +9.0, +16.0, +1.0, +4.0, +9.0, +16.0));
void test_mm512_storeu_si512 (void *__P, __m512i __A)
{
@@ -1261,6 +1265,7 @@ __m512d test_mm512_unpackhi_pd(__m512d a, __m512d b)
// CHECK: shufflevector <8 x double> {{.*}} <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
return _mm512_unpackhi_pd(a, b);
}
+TEST_CONSTEXPR(match_m512d(_mm512_unpackhi_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}), +2.0, +10.0, +4.0, +12.0, +6.0, +14.0, +8.0, +16.0));
__m512d test_mm512_unpacklo_pd(__m512d a, __m512d b)
{
@@ -1268,6 +1273,7 @@ __m512d test_mm512_unpacklo_pd(__m512d a, __m512d b)
// CHECK: shufflevector <8 x double> {{.*}} <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
return _mm512_unpacklo_pd(a, b);
}
+TEST_CONSTEXPR(match_m512d(_mm512_unpacklo_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}), +1.0, +9.0, +3.0, +11.0, +5.0, +13.0, +7.0, +15.0));
__m512 test_mm512_unpackhi_ps(__m512 a, __m512 b)
{
@@ -1275,6 +1281,7 @@ __m512 test_mm512_unpackhi_ps(__m512 a, __m512 b)
// CHECK: shufflevector <16 x float> {{.*}} <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
return _mm512_unpackhi_ps(a, b);
}
+TEST_CONSTEXPR(match_m512(_mm512_unpackhi_ps((__m512){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}, (__m512){16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f}), +2.0f, +18.0f, +3.0f, +19.0f, +6.0f, +22.0f, +7.0f, +23.0f, +10.0f, +26.0f, +11.0f, +27.0f, +14.0f, +30.0f, +15.0f, +31.0f));
__m512 test_mm512_unpacklo_ps(__m512 a, __m512 b)
{
@@ -1282,6 +1289,7 @@ __m512 test_mm512_unpacklo_ps(__m512 a, __m512 b)
// CHECK: shufflevector <16 x float> {{.*}} <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
return _mm512_unpacklo_ps(a, b);
}
+TEST_CONSTEXPR(match_m512(_mm512_unpacklo_ps((__m512){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}, (__m512){16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f}), +0.0f, +16.0f, +1.0f, +17.0f, +4.0f, +20.0f, +5.0f, +21.0f, +8.0f, +24.0f, +9.0f, +25.0f, +12.0f, +28.0f, +13.0f, +29.0f));
__mmask16 test_mm512_cmp_round_ps_mask(__m512 a, __m512 b) {
// CHECK-LABEL: test_mm512_cmp_round_ps_mask
@@ -3551,6 +3559,7 @@ __m512d test_mm512_div_pd(__m512d __a, __m512d __b) {
// CHECK: fdiv <8 x double>
return _mm512_div_pd(__a,__b);
}
+TEST_CONSTEXPR(match_m512d(_mm512_div_pd((__m512d){+8.0, +6.0, +4.0, +2.0, -8.0, -6.0, -4.0, -2.0}, (__m512d){+2.0, +2.0, +2.0, +2.0, -2.0, -2.0, -2.0, -2.0}), +4.0, +3.0, +2.0, +1.0, +4.0, +3.0, +2.0, +1.0));
__m512d test_mm512_mask_div_pd(__m512d __w, __mmask8 __u, __m512d __a, __m512d __b) {
// CHECK-LABEL: test_mm512_mask_div_pd
// CHECK: fdiv <8 x double> %{{.*}}, %{{.*}}
@@ -3585,6 +3594,7 @@ __m512 test_mm512_div_ps(__m512 __A, __m512 __B) {
// CHECK: fdiv <16 x float>
return _mm512_div_ps(__A,__B);
}
+TEST_CONSTEXPR(match_m512(_mm512_div_ps((__m512){+16.0f, +14.0f, +12.0f, +10.0f, +8.0f, +6.0f, +4.0f, +2.0f, -16.0f, -14.0f, -12.0f, -10.0f, -8.0f, -6.0f, -4.0f, -2.0f}, (__m512){+2.0f, +2.0f, +2.0f, +2.0f, +2.0f, +2.0f, +2.0f, +2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f}), +8.0f, +7.0f, +6.0f, +5.0f, +4.0f, +3.0f, +2.0f, +1.0f, +8.0f, +7.0f, +6.0f, +5.0f, +4.0f, +3.0f, +2.0f, +1.0f));
__m512 test_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: test_mm512_mask_div_ps
// CHECK: fdiv <16 x float> %{{.*}}, %{{.*}}