aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2021-11-29 10:01:42 +0800
committerliuhongt <hongtao.liu@intel.com>2021-11-29 17:46:00 +0800
commit11d0a2af33910c6d243e7265fb7ea04d2bc89b25 (patch)
tree7440f7df96b33f56d7fec5987da2a6e0b1e3899c
parent9519b694afbf9a35c36cf9f14d35d1c0e9e8cacc (diff)
downloadgcc-11d0a2af33910c6d243e7265fb7ea04d2bc89b25.zip
gcc-11d0a2af33910c6d243e7265fb7ea04d2bc89b25.tar.gz
gcc-11d0a2af33910c6d243e7265fb7ea04d2bc89b25.tar.bz2
Optimize _Float16 usage for non AVX512FP16.
1. No memory is needed to move HI/HFmode between GPR and SSE registers under TARGET_SSE2 and above, pinsrw/pextrw are used for them w/o AVX512FP16. 2. Use gen_sse2_pinsrph/gen_vec_setv4sf_0 to replace ix86_expand_vector_set in extendhfsf2/truncsfhf2 so that redundant initialization cound be eliminated. gcc/ChangeLog: PR target/102811 * config/i386/i386.c (inline_secondary_memory_needed): HImode move between GPR and SSE registers is supported under TARGET_SSE2 and above. * config/i386/i386.md (extendhfsf2): Optimize expander. (truncsfhf2): Ditto. * config/i386/sse.md (sse2p4_1): Adjust attr for V8HFmode to align with V8HImode. gcc/testsuite/ChangeLog: * gcc.target/i386/pr102811-2.c: New test. * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: Add new scan-assembler-times.
-rw-r--r--gcc/config/i386/i386.c5
-rw-r--r--gcc/config/i386/i386.md18
-rw-r--r--gcc/config/i386/sse.md2
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr102811-2.c22
5 files changed, 41 insertions, 8 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7cf599f..2657e78 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19437,8 +19437,9 @@ inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
if (msize > UNITS_PER_WORD)
return true;
- /* In addition to SImode moves, AVX512FP16 also enables HImode moves. */
- int minsize = GET_MODE_SIZE (TARGET_AVX512FP16 ? HImode : SImode);
+ /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
+ Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */
+ int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
if (msize < minsize)
return true;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 12ea513..a384dae 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4608,9 +4608,18 @@
if (!TARGET_AVX512FP16)
{
rtx res = gen_reg_rtx (V4SFmode);
- rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
+ rtx tmp = gen_reg_rtx (V8HFmode);
+ rtx zero = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
- ix86_expand_vector_set (false, tmp, operands[1], 0);
+ if (TARGET_AVX2)
+ {
+ rtx dup = gen_reg_rtx (V8HFmode);
+ emit_move_insn (dup, gen_rtx_VEC_DUPLICATE (V8HFmode, operands[1]));
+ emit_move_insn (tmp, gen_rtx_VEC_MERGE (V8HFmode, dup,
+ zero, const1_rtx));
+ }
+ else
+ emit_insn (gen_sse2_pinsrph (tmp, zero, operands[1], const1_rtx));
emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
emit_move_insn (operands[0], gen_lowpart (SFmode, res));
DONE;
@@ -4824,9 +4833,10 @@
if (!TARGET_AVX512FP16)
{
rtx res = gen_reg_rtx (V8HFmode);
- rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
+ rtx tmp = gen_reg_rtx (V4SFmode);
+ rtx zero = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
- ix86_expand_vector_set (false, tmp, operands[1], 0);
+ emit_insn (gen_vec_setv4sf_0 (tmp, zero, operands[1]));
emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
emit_move_insn (operands[0], gen_lowpart (HFmode, res));
DONE;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5229b23..b371b14 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17272,7 +17272,7 @@
(V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
(define_mode_attr sse2p4_1
- [(V16QI "sse4_1") (V8HI "sse2") (V8HF "sse4_1")
+ [(V16QI "sse4_1") (V8HI "sse2") (V8HF "sse2")
(V4SI "sse4_1") (V2DI "sse4_1")])
(define_mode_attr pinsr_evex_isa
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
index dfbfb16..9a6c432 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
-/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 1 } } */
/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102811-2.c b/gcc/testsuite/gcc.target/i386/pr102811-2.c
new file mode 100644
index 0000000..e511c66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102811-2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
+/* { dg-final { scan-assembler-times "pextrw" 1 } } */
+/* { dg-final { scan-assembler-times "pinsrw" 1 } } */
+/* { dg-final { scan-assembler-not "\\\(%rsp\\\)"} } */
+short test (_Float16 a)
+{
+ union{
+ short b;
+ _Float16 a;}u;
+ u.a = a;
+ return u.b;
+}
+
+_Float16 test1 (short a)
+{
+ union{
+ _Float16 b;
+ short a;}u;
+ u.a = a;
+ return u.b;
+}