diff options
author | liuhongt <hongtao.liu@intel.com> | 2020-05-29 13:38:49 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2020-06-04 20:04:40 +0800 |
commit | 43088bb4dadd3d14b6b594c5f9363fe879f3d7f7 (patch) | |
tree | 79ff52c44967f5e91a11e20c3269f538aac102b6 /gcc | |
parent | 53ec04cbdd192468d1975640759084f4e37fe4e8 (diff) | |
download | gcc-43088bb4dadd3d14b6b594c5f9363fe879f3d7f7.zip gcc-43088bb4dadd3d14b6b594c5f9363fe879f3d7f7.tar.gz gcc-43088bb4dadd3d14b6b594c5f9363fe879f3d7f7.tar.bz2 |
Fix zero-masking for vcvtps2ph when dest operand is memory.
When dest is memory, zero-masking is not valid, only merging-masking is available,
2020-06-24 Hongtao Liu <hongtao.liu@inte.com>
gcc/ChangeLog:
PR target/95254
* config/i386/sse.md (*vcvtps2ph_store<merge_mask_name>):
Refine from *vcvtps2ph_store<mask_name>.
(vcvtps2ph256<mask_name>): Refine constraint from vm to v.
(<mask_codefor>avx512f_vcvtps2ph512<mask_name>): Ditto.
(*vcvtps2ph256<merge_mask_name>): New define_insn.
(*avx512f_vcvtps2ph512<merge_mask_name>): Ditto.
* config/i386/subst.md (merge_mask): New define_subst.
(merge_mask_name): New define_subst_attr.
(merge_mask_operand3): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx512f-vcvtps2ph-pr95254.c: New test.
* gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c: Ditto.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/sse.md | 32 | ||||
-rw-r--r-- | gcc/config/i386/subst.md | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c | 18 |
4 files changed, 70 insertions, 4 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 8735445..7815d77 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -21775,19 +21775,19 @@ (set_attr "prefix" "maybe_evex") (set_attr "mode" "V4SF")]) -(define_insn "*vcvtps2ph_store<mask_name>" +(define_insn "*vcvtps2ph_store<merge_mask_name>" [(set (match_operand:V4HI 0 "memory_operand" "=m") (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v") (match_operand:SI 2 "const_0_to_255_operand" "N")] UNSPEC_VCVTPS2PH))] "TARGET_F16C || TARGET_AVX512VL" - "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" + "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}" [(set_attr "type" "ssecvt") (set_attr "prefix" "maybe_evex") (set_attr "mode" "V4SF")]) (define_insn "vcvtps2ph256<mask_name>" - [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm") + [(set (match_operand:V8HI 0 "register_operand" "=v") (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v") (match_operand:SI 2 "const_0_to_255_operand" "N")] UNSPEC_VCVTPS2PH))] @@ -21798,8 +21798,20 @@ (set_attr "btver2_decode" "vector") (set_attr "mode" "V8SF")]) +(define_insn "*vcvtps2ph256<merge_mask_name>" + [(set (match_operand:V8HI 0 "memory_operand" "=m") + (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v") + (match_operand:SI 2 "const_0_to_255_operand" "N")] + UNSPEC_VCVTPS2PH))] + "TARGET_F16C || TARGET_AVX512VL" + "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "maybe_evex") + (set_attr "btver2_decode" "vector") + (set_attr "mode" "V8SF")]) + (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>" - [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm") + [(set (match_operand:V16HI 0 "register_operand" "=v") (unspec:V16HI [(match_operand:V16SF 1 "register_operand" "v") (match_operand:SI 2 "const_0_to_255_operand" "N")] @@ -21810,6 +21822,18 @@ (set_attr "prefix" "evex") (set_attr "mode" "V16SF")]) +(define_insn "*avx512f_vcvtps2ph512<merge_mask_name>" + [(set (match_operand:V16HI 0 "memory_operand" "=m") + (unspec:V16HI + [(match_operand:V16SF 1 "register_operand" "v") + (match_operand:SI 2 "const_0_to_255_operand" "N")] + UNSPEC_VCVTPS2PH))] + "TARGET_AVX512F" + "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "V16SF")]) + ;; For gather* insn patterns (define_mode_iterator VEC_GATHER_MODE [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF]) diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md index a5ca144..58ea9dc 100644 --- a/gcc/config/i386/subst.md +++ b/gcc/config/i386/subst.md @@ -73,6 +73,18 @@ (match_operand:SUBST_V 2 "nonimm_or_0_operand" "0C") (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]) +(define_subst_attr "merge_mask_name" "merge_mask" "" "_merge_mask") +(define_subst_attr "merge_mask_operand3" "merge_mask" "" "%{%3%}") +(define_subst "merge_mask" + [(set (match_operand:SUBST_V 0) + (match_operand:SUBST_V 1))] + "TARGET_AVX512F" + [(set (match_dup 0) + (vec_merge:SUBST_V + (match_dup 1) + (match_dup 0) + (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]) + (define_subst_attr "mask_scalar_merge_name" "mask_scalar_merge" "" "_mask") (define_subst_attr "mask_scalar_merge_operand3" "mask_scalar_merge" "" "%{%3%}") (define_subst_attr "mask_scalar_merge_operand4" "mask_scalar_merge" "" "%{%4%}") diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c new file mode 100644 index 0000000..9e0da94 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f" } */ + +#include<immintrin.h> +extern __m256i res; +void +foo (__m512 a, __mmask16 m) +{ + res = _mm512_maskz_cvtps_ph (m, a, 10); +} + +/* { dg-final { scan-assembler-not "vcvtps2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]\[^\n\]*res\[^\n\]*\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"} } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c new file mode 100644 index 0000000..0c685ea --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vl -mavx512f" } */ + +#include<immintrin.h> +extern __m128i res; +void +foo (__m256 a, __mmask8 m) +{ + res = _mm256_maskz_cvtps_ph (m, a, 10); +} + +void +foo1 (__m128 a, __mmask8 m) +{ + res = _mm_maskz_cvtps_ph (m, a, 10); +} + +/* { dg-final { scan-assembler-not "vcvtps2ph\[ \\t\]+\[^\{\n\]*%\[xy\]mm\[0-9\]\[^\n\]*res\[^\n\]*\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"} } */ |