diff options
author | liuhongt <hongtao.liu@intel.com> | 2021-11-29 10:01:42 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2021-11-29 17:46:00 +0800 |
commit | 11d0a2af33910c6d243e7265fb7ea04d2bc89b25 (patch) | |
tree | 7440f7df96b33f56d7fec5987da2a6e0b1e3899c | |
parent | 9519b694afbf9a35c36cf9f14d35d1c0e9e8cacc (diff) | |
download | gcc-11d0a2af33910c6d243e7265fb7ea04d2bc89b25.zip gcc-11d0a2af33910c6d243e7265fb7ea04d2bc89b25.tar.gz gcc-11d0a2af33910c6d243e7265fb7ea04d2bc89b25.tar.bz2 |
Optimize _Float16 usage for non AVX512FP16.
1. No memory is needed to move HI/HFmode between GPR and SSE registers
under TARGET_SSE2 and above, pinsrw/pextrw are used for them w/o
AVX512FP16.
2. Use gen_sse2_pinsrph/gen_vec_setv4sf_0 to replace
ix86_expand_vector_set in extendhfsf2/truncsfhf2 so that redundant
initialization cound be eliminated.
gcc/ChangeLog:
PR target/102811
* config/i386/i386.c (inline_secondary_memory_needed): HImode
move between GPR and SSE registers is supported under
TARGET_SSE2 and above.
* config/i386/i386.md (extendhfsf2): Optimize expander.
(truncsfhf2): Ditto.
* config/i386/sse.md (sse2p4_1): Adjust attr for V8HFmode to
align with V8HImode.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr102811-2.c: New test.
* gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: Add new
scan-assembler-times.
-rw-r--r-- | gcc/config/i386/i386.c | 5 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 18 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr102811-2.c | 22 |
5 files changed, 41 insertions, 8 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7cf599f..2657e78 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -19437,8 +19437,9 @@ inline_secondary_memory_needed (machine_mode mode, reg_class_t class1, if (msize > UNITS_PER_WORD) return true; - /* In addition to SImode moves, AVX512FP16 also enables HImode moves. */ - int minsize = GET_MODE_SIZE (TARGET_AVX512FP16 ? HImode : SImode); + /* In addition to SImode moves, HImode moves are supported for SSE2 and above, + Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */ + int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode); if (msize < minsize) return true; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 12ea513..a384dae 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4608,9 +4608,18 @@ if (!TARGET_AVX512FP16) { rtx res = gen_reg_rtx (V4SFmode); - rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode)); + rtx tmp = gen_reg_rtx (V8HFmode); + rtx zero = force_reg (V8HFmode, CONST0_RTX (V8HFmode)); - ix86_expand_vector_set (false, tmp, operands[1], 0); + if (TARGET_AVX2) + { + rtx dup = gen_reg_rtx (V8HFmode); + emit_move_insn (dup, gen_rtx_VEC_DUPLICATE (V8HFmode, operands[1])); + emit_move_insn (tmp, gen_rtx_VEC_MERGE (V8HFmode, dup, + zero, const1_rtx)); + } + else + emit_insn (gen_sse2_pinsrph (tmp, zero, operands[1], const1_rtx)); emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp))); emit_move_insn (operands[0], gen_lowpart (SFmode, res)); DONE; @@ -4824,9 +4833,10 @@ if (!TARGET_AVX512FP16) { rtx res = gen_reg_rtx (V8HFmode); - rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode)); + rtx tmp = gen_reg_rtx (V4SFmode); + rtx zero = force_reg (V4SFmode, CONST0_RTX (V4SFmode)); - ix86_expand_vector_set (false, tmp, operands[1], 0); + emit_insn (gen_vec_setv4sf_0 (tmp, zero, operands[1])); emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4))); emit_move_insn (operands[0], gen_lowpart (HFmode, res)); DONE; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5229b23..b371b14 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -17272,7 +17272,7 @@ (V2DI "TARGET_SSE4_1 && TARGET_64BIT")]) (define_mode_attr sse2p4_1 - [(V16QI "sse4_1") (V8HI "sse2") (V8HF "sse4_1") + [(V16QI "sse4_1") (V8HI "sse2") (V8HF "sse2") (V4SI "sse4_1") (V2DI "sse4_1")]) (define_mode_attr pinsr_evex_isa diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c index dfbfb16..9a6c432 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -mf16c -mno-avx512fp16" } */ -/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 1 } } */ /* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */ /* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */ /* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */ diff --git a/gcc/testsuite/gcc.target/i386/pr102811-2.c b/gcc/testsuite/gcc.target/i386/pr102811-2.c new file mode 100644 index 0000000..e511c66 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr102811-2.c @@ -0,0 +1,22 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */ +/* { dg-final { scan-assembler-times "pextrw" 1 } } */ +/* { dg-final { scan-assembler-times "pinsrw" 1 } } */ +/* { dg-final { scan-assembler-not "\\\(%rsp\\\)"} } */ +short test (_Float16 a) +{ + union{ + short b; + _Float16 a;}u; + u.a = a; + return u.b; +} + +_Float16 test1 (short a) +{ + union{ + _Float16 b; + short a;}u; + u.a = a; + return u.b; +} |