From e1d67530065efb64dba2f716a355a40535f4a19d Mon Sep 17 00:00:00 2001 From: Yi-Chi Lee <55395582+yichi170@users.noreply.github.com> Date: Thu, 7 Aug 2025 05:21:58 -0500 Subject: [Headers][X86] Update AVX/AVX512 float/double add/sub/mul/div/unpck intrinsics to be used in constexpr (#152435) Fixed #152313 --------- Co-authored-by: Simon Pilgrim --- clang/test/CodeGen/X86/avx-builtins.c | 12 ++++++++++++ clang/test/CodeGen/X86/avx512f-builtins.c | 10 ++++++++++ 2 files changed, 22 insertions(+) (limited to 'clang/test') diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 2d43764..e2c9f96 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -20,12 +20,14 @@ __m256d test_mm256_add_pd(__m256d A, __m256d B) { // CHECK: fadd <4 x double> return _mm256_add_pd(A, B); } +TEST_CONSTEXPR(match_m256d( _mm256_add_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-4.0, -5.0, +6.0, +7.0}), -8.0, -10.0, +12.0, +14.0)); __m256 test_mm256_add_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_add_ps // CHECK: fadd <8 x float> return _mm256_add_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_add_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}), -8.0f, -10.0f, +12.0f, +14.0f, +14.0f, +12.0f, -10.0f, -8.0f)); __m256d test_mm256_addsub_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_addsub_pd @@ -977,12 +979,14 @@ __m256d test_mm256_div_pd(__m256d A, __m256d B) { // CHECK: fdiv <4 x double> return _mm256_div_pd(A, B); } +TEST_CONSTEXPR(match_m256d( _mm256_div_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-1.0, +1.0, -1.0, +1.0}), +4.0, -5.0, -6.0, +7.0)); __m256 test_mm256_div_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_div_ps // CHECK: fdiv <8 x float> return _mm256_div_ps(A, B); } +TEST_CONSTEXPR(match_m256( _mm256_div_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-1.0f, +1.0f, -1.0f, +1.0f, +1.0f, -1.0f, +1.0f, -1.0f}), +4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, +4.0f)); __m256 test_mm256_dp_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_dp_ps @@ -1295,12 +1299,14 @@ __m256d test_mm256_mul_pd(__m256d A, __m256d B) { // CHECK: fmul <4 x double> return _mm256_mul_pd(A, B); } +TEST_CONSTEXPR(match_m256d( _mm256_mul_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-4.0, -5.0, +6.0, +7.0}), +16.0, +25.0, +36.0, +49.0)); __m256 test_mm256_mul_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_mul_ps // CHECK: fmul <8 x float> return _mm256_mul_ps(A, B); } +TEST_CONSTEXPR(match_m256( _mm256_mul_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}), +16.0f, +25.0f, +36.0f, +49.0f, +49.0f, +36.0f, +25.0f, +16.0f)); __m256d test_mm256_or_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_or_pd @@ -1933,12 +1939,14 @@ __m256d test_mm256_sub_pd(__m256d A, __m256d B) { // CHECK: fsub <4 x double> return _mm256_sub_pd(A, B); } +TEST_CONSTEXPR(match_m256d( _mm256_sub_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-0.0, +0.0, +2.0, -1.0}), -4.0, -5.0, 4.0, 8.0)); __m256 test_mm256_sub_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_sub_ps // CHECK: fsub <8 x float> return _mm256_sub_ps(A, B); } +TEST_CONSTEXPR(match_m256( _mm256_sub_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-0.0f, +0.0f, +2.0f, -1.0f, -1.0f, +2.0f, +0.0f, -0.0f}), -4.0f, -5.0f, 4.0f, 8.0f, 8.0f, 4.0f, -5.0f, -4.0f)); int test_mm_testc_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_testc_pd @@ -2062,24 +2070,28 @@ __m256d test_mm256_unpackhi_pd(__m256d A, __m256d B) { // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> return _mm256_unpackhi_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_unpackhi_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+5.0, +6.0, +7.0, +8.0}), +2.0, +6.0, +4.0, +8.0)); __m256 test_mm256_unpackhi_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_unpackhi_ps // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> return _mm256_unpackhi_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_unpackhi_ps((__m256){+0.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f}, (__m256){+10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f, +17.0f}), +2.0f, +12.0f, +3.0f, +13.0f, +6.0f, +16.0f, +7.0f, +17.0f)); __m256d test_mm256_unpacklo_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_unpacklo_pd // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> return _mm256_unpacklo_pd(A, B); } +TEST_CONSTEXPR(match_m256d(_mm256_unpacklo_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+5.0, +6.0, +7.0, +8.0}), +1.0, +5.0, +3.0, +7.0)); __m256 test_mm256_unpacklo_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_unpacklo_ps // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> return _mm256_unpacklo_ps(A, B); } +TEST_CONSTEXPR(match_m256(_mm256_unpacklo_ps((__m256){+0.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f}, (__m256){+10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f, +17.0f}), +0.0f, +10.0f, +1.0f, +11.0f, +4.0f, +14.0f, +5.0f, +15.0f)); __m256d test_mm256_xor_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_xor_pd diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 5447035..8c14c57 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -154,6 +154,7 @@ __m512 test_mm512_add_ps(__m512 a, __m512 b) // CHECK: fadd <16 x float> return _mm512_add_ps(a, b); } +TEST_CONSTEXPR(match_m512(_mm512_add_ps((__m512){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m512){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}), -2.0f, -4.0f, -6.0f, -8.0f, -10.0f, -12.0f, -14.0f, -16.0f, +2.0f, +4.0f, +6.0f, +8.0f, +10.0f, +12.0f, +14.0f, +16.0f)); __m512d test_mm512_add_pd(__m512d a, __m512d b) { @@ -161,6 +162,7 @@ __m512d test_mm512_add_pd(__m512d a, __m512d b) // CHECK: fadd <8 x double> return _mm512_add_pd(a, b); } +TEST_CONSTEXPR(match_m512d(_mm512_add_pd((__m512d){-1.0, -2.0, -3.0, -4.0, +1.0, +2.0, +3.0, +4.0}, (__m512d){-1.0, -2.0, -3.0, -4.0, +1.0, +2.0, +3.0, +4.0}), -2.0, -4.0, -6.0, -8.0, +2.0, +4.0, +6.0, +8.0)); __m512 test_mm512_mul_ps(__m512 a, __m512 b) { @@ -168,6 +170,7 @@ __m512 test_mm512_mul_ps(__m512 a, __m512 b) // CHECK: fmul <16 x float> return _mm512_mul_ps(a, b); } +TEST_CONSTEXPR(match_m512(_mm512_mul_ps((__m512){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m512){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}), +1.0f, +4.0f, +9.0f, +16.0f, +25.0f, +36.0f, +49.0f, +64.0f, +1.0f, +4.0f, +9.0f, +16.0f, +25.0f, +36.0f, +49.0f, +64.0f)); __m512d test_mm512_mul_pd(__m512d a, __m512d b) { @@ -175,6 +178,7 @@ __m512d test_mm512_mul_pd(__m512d a, __m512d b) // CHECK: fmul <8 x double> return _mm512_mul_pd(a, b); } +TEST_CONSTEXPR(match_m512d(_mm512_mul_pd((__m512d){-1.0, -2.0, -3.0, -4.0, +1.0, +2.0, +3.0, +4.0}, (__m512d){-1.0, -2.0, -3.0, -4.0, +1.0, +2.0, +3.0, +4.0}), +1.0, +4.0, +9.0, +16.0, +1.0, +4.0, +9.0, +16.0)); void test_mm512_storeu_si512 (void *__P, __m512i __A) { @@ -1261,6 +1265,7 @@ __m512d test_mm512_unpackhi_pd(__m512d a, __m512d b) // CHECK: shufflevector <8 x double> {{.*}} return _mm512_unpackhi_pd(a, b); } +TEST_CONSTEXPR(match_m512d(_mm512_unpackhi_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}), +2.0, +10.0, +4.0, +12.0, +6.0, +14.0, +8.0, +16.0)); __m512d test_mm512_unpacklo_pd(__m512d a, __m512d b) { @@ -1268,6 +1273,7 @@ __m512d test_mm512_unpacklo_pd(__m512d a, __m512d b) // CHECK: shufflevector <8 x double> {{.*}} return _mm512_unpacklo_pd(a, b); } +TEST_CONSTEXPR(match_m512d(_mm512_unpacklo_pd((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, (__m512d){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}), +1.0, +9.0, +3.0, +11.0, +5.0, +13.0, +7.0, +15.0)); __m512 test_mm512_unpackhi_ps(__m512 a, __m512 b) { @@ -1275,6 +1281,7 @@ __m512 test_mm512_unpackhi_ps(__m512 a, __m512 b) // CHECK: shufflevector <16 x float> {{.*}} return _mm512_unpackhi_ps(a, b); } +TEST_CONSTEXPR(match_m512(_mm512_unpackhi_ps((__m512){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}, (__m512){16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f}), +2.0f, +18.0f, +3.0f, +19.0f, +6.0f, +22.0f, +7.0f, +23.0f, +10.0f, +26.0f, +11.0f, +27.0f, +14.0f, +30.0f, +15.0f, +31.0f)); __m512 test_mm512_unpacklo_ps(__m512 a, __m512 b) { @@ -1282,6 +1289,7 @@ __m512 test_mm512_unpacklo_ps(__m512 a, __m512 b) // CHECK: shufflevector <16 x float> {{.*}} return _mm512_unpacklo_ps(a, b); } +TEST_CONSTEXPR(match_m512(_mm512_unpacklo_ps((__m512){0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}, (__m512){16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f}), +0.0f, +16.0f, +1.0f, +17.0f, +4.0f, +20.0f, +5.0f, +21.0f, +8.0f, +24.0f, +9.0f, +25.0f, +12.0f, +28.0f, +13.0f, +29.0f)); __mmask16 test_mm512_cmp_round_ps_mask(__m512 a, __m512 b) { // CHECK-LABEL: test_mm512_cmp_round_ps_mask @@ -3551,6 +3559,7 @@ __m512d test_mm512_div_pd(__m512d __a, __m512d __b) { // CHECK: fdiv <8 x double> return _mm512_div_pd(__a,__b); } +TEST_CONSTEXPR(match_m512d(_mm512_div_pd((__m512d){+8.0, +6.0, +4.0, +2.0, -8.0, -6.0, -4.0, -2.0}, (__m512d){+2.0, +2.0, +2.0, +2.0, -2.0, -2.0, -2.0, -2.0}), +4.0, +3.0, +2.0, +1.0, +4.0, +3.0, +2.0, +1.0)); __m512d test_mm512_mask_div_pd(__m512d __w, __mmask8 __u, __m512d __a, __m512d __b) { // CHECK-LABEL: test_mm512_mask_div_pd // CHECK: fdiv <8 x double> %{{.*}}, %{{.*}} @@ -3585,6 +3594,7 @@ __m512 test_mm512_div_ps(__m512 __A, __m512 __B) { // CHECK: fdiv <16 x float> return _mm512_div_ps(__A,__B); } +TEST_CONSTEXPR(match_m512(_mm512_div_ps((__m512){+16.0f, +14.0f, +12.0f, +10.0f, +8.0f, +6.0f, +4.0f, +2.0f, -16.0f, -14.0f, -12.0f, -10.0f, -8.0f, -6.0f, -4.0f, -2.0f}, (__m512){+2.0f, +2.0f, +2.0f, +2.0f, +2.0f, +2.0f, +2.0f, +2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f}), +8.0f, +7.0f, +6.0f, +5.0f, +4.0f, +3.0f, +2.0f, +1.0f, +8.0f, +7.0f, +6.0f, +5.0f, +4.0f, +3.0f, +2.0f, +1.0f)); __m512 test_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: test_mm512_mask_div_ps // CHECK: fdiv <16 x float> %{{.*}}, %{{.*}} -- cgit v1.1