diff options
author | liuhongt <hongtao.liu@intel.com> | 2021-12-22 16:48:54 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2021-12-23 13:42:55 +0800 |
commit | 1a7ce8570997eb1596c803443d20687b43fa2e47 (patch) | |
tree | 56edd779c97e42887a8a67b7163bd96341dcb22a /gcc/testsuite | |
parent | 9f9bc0bf0d6b043192df5fc9d03b71ff2a36ddc5 (diff) | |
download | gcc-1a7ce8570997eb1596c803443d20687b43fa2e47.zip gcc-1a7ce8570997eb1596c803443d20687b43fa2e47.tar.gz gcc-1a7ce8570997eb1596c803443d20687b43fa2e47.tar.bz2 |
Combine vpcmpuw + zero_extend to vpcmpuw.
vcmp{ps,ph,pd} and vpcmp{,u}{b,w,d,q} implicitly clear the upper bits
of dest.
gcc/ChangeLog:
PR target/103750
* config/i386/sse.md
(*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
New pre_reload define_insn_and_split.
(*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
Ditto.
(*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
Ditto.
(*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
Ditto.
(*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
Ditto.
(*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
Ditto.
(*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
Ditto.
(*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx512bw-pr103750-1.c: New test.
* gcc.target/i386/avx512bw-pr103750-2.c: New test.
* gcc.target/i386/avx512f-pr103750-1.c: New test.
* gcc.target/i386/avx512f-pr103750-2.c: New test.
* gcc.target/i386/avx512fp16-pr103750-1.c: New test.
* gcc.target/i386/avx512fp16-pr103750-2.c: New test.
Diffstat (limited to 'gcc/testsuite')
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c | 154 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c | 173 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-pr103750-1.c | 426 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-pr103750-2.c | 478 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-1.c | 58 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-2.c | 71 |
6 files changed, 1360 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c new file mode 100644 index 0000000..b1165f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c @@ -0,0 +1,154 @@ +/* PR target/103750 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512bw -mavx512vl" } */ +/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ +/* xfail need to be fixed. */ + +#include <immintrin.h> +extern __m128i* pi128; +extern __m256i* pi256; +extern __m512i* pi512; + +unsigned char +foo () +{ + __mmask16 mask1 = _mm_cmpeq_epu8_mask (pi128[0], pi128[1]); + __mmask16 mask2 = _mm_cmpeq_epu8_mask (pi128[2], pi128[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo1 () +{ + __mmask16 mask1 = _mm_cmpeq_epu8_mask (pi128[0], pi128[1]); + __mmask16 mask2 = _mm_cmpeq_epu8_mask (pi128[2], pi128[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo2 () +{ + __mmask32 mask1 = _mm256_cmpeq_epu8_mask (pi256[0], pi256[1]); + __mmask32 mask2 = _mm256_cmpeq_epu8_mask (pi256[2], pi256[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo3 () +{ + __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +foo4 () +{ + __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo5 () +{ + __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo6 () +{ + __mmask16 mask1 = _mm256_cmpeq_epu16_mask (pi256[0], pi256[1]); + __mmask16 mask2 = _mm256_cmpeq_epu16_mask (pi256[2], pi256[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo7 () +{ + __mmask16 mask1 = _mm256_cmpeq_epu16_mask (pi256[0], pi256[1]); + __mmask16 mask2 = _mm256_cmpeq_epu16_mask (pi256[2], pi256[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo8 () +{ + __mmask32 mask1 = _mm512_cmpeq_epu16_mask (pi512[0], pi512[1]); + __mmask32 mask2 = _mm512_cmpeq_epu16_mask (pi512[2], pi512[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo () +{ + __mmask16 mask1 = _mm_cmpeq_epi8_mask (pi128[0], pi128[1]); + __mmask16 mask2 = _mm_cmpeq_epi8_mask (pi128[2], pi128[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo1 () +{ + __mmask16 mask1 = _mm_cmpeq_epi8_mask (pi128[0], pi128[1]); + __mmask16 mask2 = _mm_cmpeq_epi8_mask (pi128[2], pi128[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo2 () +{ + __mmask32 mask1 = _mm256_cmpeq_epi8_mask (pi256[0], pi256[1]); + __mmask32 mask2 = _mm256_cmpeq_epi8_mask (pi256[2], pi256[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo3 () +{ + __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +sign_foo4 () +{ + __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo5 () +{ + __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo6 () +{ + __mmask16 mask1 = _mm256_cmpeq_epi16_mask (pi256[0], pi256[1]); + __mmask16 mask2 = _mm256_cmpeq_epi16_mask (pi256[2], pi256[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo7 () +{ + __mmask16 mask1 = _mm256_cmpeq_epi16_mask (pi256[0], pi256[1]); + __mmask16 mask2 = _mm256_cmpeq_epi16_mask (pi256[2], pi256[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo8 () +{ + __mmask32 mask1 = _mm512_cmpeq_epi16_mask (pi512[0], pi512[1]); + __mmask32 mask2 = _mm512_cmpeq_epi16_mask (pi512[2], pi512[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c new file mode 100644 index 0000000..7303f54 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c @@ -0,0 +1,173 @@ +/* PR target/103750 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512dq -mavx512bw -mavx512vl" } */ +/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ +/* xfail need to be fixed. */ + +#include <immintrin.h> +extern __m128i* pi128; +extern __m256i* pi256; +extern __m512i* pi512; + +extern char a, b; +void +foo () +{ + __mmask16 mask1 = _mm_cmpeq_epu8_mask (pi128[0], pi128[1]); + __mmask16 mask2 = _mm_cmpeq_epu8_mask (pi128[2], pi128[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +foo1 () +{ + __mmask16 mask1 = _mm_cmpeq_epu8_mask (pi128[0], pi128[1]); + __mmask16 mask2 = _mm_cmpeq_epu8_mask (pi128[2], pi128[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +foo2 () +{ + __mmask32 mask1 = _mm256_cmpeq_epu8_mask (pi256[0], pi256[1]); + __mmask32 mask2 = _mm256_cmpeq_epu8_mask (pi256[2], pi256[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask32_u8 (mask1, mask2); +} + +void +foo3 () +{ + __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo4 () +{ + __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo5 () +{ + __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo6 () +{ + __mmask16 mask1 = _mm256_cmpeq_epu16_mask (pi256[0], pi256[1]); + __mmask16 mask2 = _mm256_cmpeq_epu16_mask (pi256[2], pi256[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +foo7 () +{ + __mmask16 mask1 = _mm256_cmpeq_epu16_mask (pi256[0], pi256[1]); + __mmask16 mask2 = _mm256_cmpeq_epu16_mask (pi256[2], pi256[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +foo8 () +{ + __mmask32 mask1 = _mm512_cmpeq_epu16_mask (pi512[0], pi512[1]); + __mmask32 mask2 = _mm512_cmpeq_epu16_mask (pi512[2], pi512[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask32_u8 (mask1, mask2); +} + +void +sign_foo () +{ + __mmask16 mask1 = _mm_cmpeq_epi8_mask (pi128[0], pi128[1]); + __mmask16 mask2 = _mm_cmpeq_epi8_mask (pi128[2], pi128[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +sign_foo1 () +{ + __mmask16 mask1 = _mm_cmpeq_epi8_mask (pi128[0], pi128[1]); + __mmask16 mask2 = _mm_cmpeq_epi8_mask (pi128[2], pi128[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +sign_foo2 () +{ + __mmask32 mask1 = _mm256_cmpeq_epi8_mask (pi256[0], pi256[1]); + __mmask32 mask2 = _mm256_cmpeq_epi8_mask (pi256[2], pi256[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask32_u8 (mask1, mask2); +} + +void +sign_foo3 () +{ + __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo4 () +{ + __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo5 () +{ + __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo6 () +{ + __mmask16 mask1 = _mm256_cmpeq_epi16_mask (pi256[0], pi256[1]); + __mmask16 mask2 = _mm256_cmpeq_epi16_mask (pi256[2], pi256[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +sign_foo7 () +{ + __mmask16 mask1 = _mm256_cmpeq_epi16_mask (pi256[0], pi256[1]); + __mmask16 mask2 = _mm256_cmpeq_epi16_mask (pi256[2], pi256[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +sign_foo8 () +{ + __mmask32 mask1 = _mm512_cmpeq_epi16_mask (pi512[0], pi512[1]); + __mmask32 mask2 = _mm512_cmpeq_epi16_mask (pi512[2], pi512[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask32_u8 (mask1, mask2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr103750-1.c new file mode 100644 index 0000000..613efe0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr103750-1.c @@ -0,0 +1,426 @@ +/* PR target/103750 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f -mavx512vl -mavx512bw" } */ +/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ +/* xfail need to be fixed. */ + +#include <immintrin.h> +extern __m128i* pi128; +extern __m256i* pi256; +extern __m512i* pi512; + +extern __m128* ps128; +extern __m256* ps256; +extern __m512* ps512; + +extern __m128d* pd128; +extern __m256d* pd256; +extern __m512d* pd512; + +unsigned char +foo () +{ + __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +foo1 () +{ + __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo2 () +{ + __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo3 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +foo4 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo5 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo6 () +{ + __mmask16 mask1 = _mm512_cmpeq_epu32_mask (pi512[0], pi512[1]); + __mmask16 mask2 = _mm512_cmpeq_epu32_mask (pi512[2], pi512[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo7 () +{ + __mmask16 mask1 = _mm512_cmpeq_epu32_mask (pi512[0], pi512[1]); + __mmask16 mask2 = _mm512_cmpeq_epu32_mask (pi512[2], pi512[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo8 () +{ + __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +foo9 () +{ + __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo10 () +{ + __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo11 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +foo12 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo13 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo14 () +{ + __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +foo15 () +{ + __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo16 () +{ + __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo () +{ + __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +sign_foo1 () +{ + __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo2 () +{ + __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo3 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +sign_foo4 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo5 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo6 () +{ + __mmask16 mask1 = _mm512_cmpeq_epi32_mask (pi512[0], pi512[1]); + __mmask16 mask2 = _mm512_cmpeq_epi32_mask (pi512[2], pi512[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo7 () +{ + __mmask16 mask1 = _mm512_cmpeq_epi32_mask (pi512[0], pi512[1]); + __mmask16 mask2 = _mm512_cmpeq_epi32_mask (pi512[2], pi512[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo8 () +{ + __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +sign_foo9 () +{ + __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo10 () +{ + __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo11 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +sign_foo12 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo13 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo14 () +{ + __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +sign_foo15 () +{ + __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo16 () +{ + __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +float_foo () +{ + __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1); + __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +float_foo1 () +{ + __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1); + __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +float_foo2 () +{ + __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1); + __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +float_foo3 () +{ + __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1); + __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +float_foo4 () +{ + __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1); + __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +float_foo5 () +{ + __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1); + __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +float_foo6 () +{ + __mmask16 mask1 = _mm512_cmp_ps_mask (ps512[0], ps512[1], 1); + __mmask16 mask2 = _mm512_cmp_ps_mask (ps512[2], ps512[3], 1); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +float_foo7 () +{ + __mmask16 mask1 = _mm512_cmp_ps_mask (ps512[0], ps512[1], 1); + __mmask16 mask2 = _mm512_cmp_ps_mask (ps512[2], ps512[3], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +float_foo8 () +{ + __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1); + __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +float_foo9 () +{ + __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1); + __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +float_foo10 () +{ + __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1); + __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +float_foo11 () +{ + __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1); + __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +float_foo12 () +{ + __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1); + __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +float_foo13 () +{ + __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1); + __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +float_foo14 () +{ + __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1); + __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +float_foo15 () +{ + __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1); + __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +float_foo16 () +{ + __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1); + __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512f-pr103750-2.c new file mode 100644 index 0000000..a6c2b06 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr103750-2.c @@ -0,0 +1,478 @@ +/* PR target/103750 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512dq -mavx512vl -mavx512bw" } */ +/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ +/* xfail need to be fixed. */ + +#include <immintrin.h> +extern __m128i* pi128; +extern __m256i* pi256; +extern __m512i* pi512; + +extern __m128* ps128; +extern __m256* ps256; +extern __m512* ps512; + +extern __m128d* pd128; +extern __m256d* pd256; +extern __m512d* pd512; + +extern char a, b; +void +foo () +{ + __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo1 () +{ + __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo2 () +{ + __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo3 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo4 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo5 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo6 () +{ + __mmask16 mask1 = _mm512_cmpeq_epu32_mask (pi512[0], pi512[1]); + __mmask16 mask2 = _mm512_cmpeq_epu32_mask (pi512[2], pi512[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +foo7 () +{ + __mmask16 mask1 = _mm512_cmpeq_epu32_mask (pi512[0], pi512[1]); + __mmask16 mask2 = _mm512_cmpeq_epu32_mask (pi512[2], pi512[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +foo8 () +{ + __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo9 () +{ + __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo10 () +{ + __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo11 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo12 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo13 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo14 () +{ + __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo15 () +{ + __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo16 () +{ + __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo () +{ + __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo1 () +{ + __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo2 () +{ + __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo3 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo4 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo5 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo6 () +{ + __mmask16 mask1 = _mm512_cmpeq_epi32_mask (pi512[0], pi512[1]); + __mmask16 mask2 = _mm512_cmpeq_epi32_mask (pi512[2], pi512[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +sign_foo7 () +{ + __mmask16 mask1 = _mm512_cmpeq_epi32_mask (pi512[0], pi512[1]); + __mmask16 mask2 = _mm512_cmpeq_epi32_mask (pi512[2], pi512[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +sign_foo8 () +{ + __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo9 () +{ + __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo10 () +{ + __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo11 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo12 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo13 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo14 () +{ + __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo15 () +{ + __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo16 () +{ + __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo () +{ + __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1); + __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo1 () +{ + __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1); + __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo2 () +{ + __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1); + __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo3 () +{ + __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1); + __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo4 () +{ + __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1); + __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo5 () +{ + __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1); + __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo6 () +{ + __mmask16 mask1 = _mm512_cmp_ps_mask (ps512[0], ps512[1], 1); + __mmask16 mask2 = _mm512_cmp_ps_mask (ps512[2], ps512[3], 1); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +float_foo7 () +{ + __mmask16 mask1 = _mm512_cmp_ps_mask (ps512[0], ps512[1], 1); + __mmask16 mask2 = _mm512_cmp_ps_mask (ps512[2], ps512[3], 1); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +float_foo8 () +{ + __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1); + __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo9 () +{ + __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1); + __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo10 () +{ + __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1); + __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo11 () +{ + __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1); + __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo12 () +{ + __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1); + __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo13 () +{ + __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1); + __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo14 () +{ + __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1); + __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo15 () +{ + __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1); + __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo16 () +{ + __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1); + __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-1.c new file mode 100644 index 0000000..eaf6d1e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-1.c @@ -0,0 +1,58 @@ +/* PR target/103750 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ +/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ +/* xfail need to be fixed. */ + +#include <immintrin.h> +extern __m128h* ph128; +extern __m256h* ph256; +extern __m512h* ph512; + +unsigned char +sign_foo3 () +{ + __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1); + __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +sign_foo4 () +{ + __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1); + __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo5 () +{ + __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1); + __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo6 () +{ + __mmask16 mask1 = _mm256_cmp_ph_mask (ph256[0], ph256[1], 1); + __mmask16 mask2 = _mm256_cmp_ph_mask (ph256[2], ph256[3], 1); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo7 () +{ + __mmask16 mask1 = _mm256_cmp_ph_mask (ph256[0], ph256[1], 1); + __mmask16 mask2 = _mm256_cmp_ph_mask (ph256[2], ph256[3], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo8 () +{ + __mmask32 mask1 = _mm512_cmp_ph_mask (ph512[0], ph512[1], 1); + __mmask32 mask2 = _mm512_cmp_ph_mask (ph512[2], ph512[3], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-2.c new file mode 100644 index 0000000..3d3a033 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-2.c @@ -0,0 +1,71 @@ +/* PR target/103750 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ +/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ +/* xfail need to be fixed. */ + +#include <immintrin.h> +extern __m128h* ph128; +extern __m256h* ph256; +extern __m512h* ph512; + +extern char a, b; +void +sign_foo3 () +{ + __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1); + __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1); + + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo4 () +{ + __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1); + __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1); + + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo5 () +{ + __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1); + __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1); + + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo6 () +{ + __mmask16 mask1 = _mm256_cmp_ph_mask (ph256[0], ph256[1], 1); + __mmask16 mask2 = _mm256_cmp_ph_mask (ph256[2], ph256[3], 1); + + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +sign_foo7 () +{ + __mmask16 mask1 = _mm256_cmp_ph_mask (ph256[0], ph256[1], 1); + __mmask16 mask2 = _mm256_cmp_ph_mask (ph256[2], ph256[3], 1); + + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +sign_foo8 () +{ + __mmask32 mask1 = _mm512_cmp_ph_mask (ph512[0], ph512[1], 1); + __mmask32 mask2 = _mm512_cmp_ph_mask (ph512[2], ph512[3], 1); + + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask32_u8 (mask1, mask2); +} |