aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2024-07-22 11:36:59 +0800
committerliuhongt <hongtao.liu@intel.com>2024-07-23 13:53:05 +0800
commita3f03891065cb9691f6e9cebce4d4542deb92a35 (patch)
treef912b073080afbb0585a2f1626750331229b4693
parentf4062e361cccc5a32597afdb6c8066c87f680276 (diff)
downloadgcc-a3f03891065cb9691f6e9cebce4d4542deb92a35.zip
gcc-a3f03891065cb9691f6e9cebce4d4542deb92a35.tar.gz
gcc-a3f03891065cb9691f6e9cebce4d4542deb92a35.tar.bz2
Relax ix86_hardreg_mov_ok after split1.
ix86_hardreg_mov_ok is added by r11-5066-gbe39636d9f68c4 > The solution proposed here is to have the x86 backend/recog prevent > early RTL passes composing instructions (that set likely_spilled hard > registers) that they (combine) can't simplify, until after reload. > We allow sets from pseudo registers, immediate constants and memory > accesses, but anything more complicated is performed via a temporary > pseudo. Not only does this simplify things for the register allocator, > but any remaining register-to-register moves are easily cleaned up > by the late optimization passes after reload, such as peephole2 and > cprop_hardreg. The restriction is mainly for rtl optimization passes before pass_combine. But split1 splits ``` (insn 17 13 18 2 (set (reg/i:V4SI 20 xmm0) (vec_merge:V4SI (const_vector:V4SI [ (const_int -1 [0xffffffffffffffff]) repeated x4 ]) (const_vector:V4SI [ (const_int 0 [0]) repeated x4 ]) (unspec:QI [ (reg:V4SF 106) (reg:V4SF 102) (const_int 0 [0]) ] UNSPEC_PCMP))) "/app/example.cpp":20:1 2929 {*avx_cmpv4sf3_1} (expr_list:REG_DEAD (reg:V4SF 102) (expr_list:REG_DEAD (reg:V4SF 106) (nil)))) ``` into: ``` (insn 23 13 24 2 (set (reg:V4SF 107) (unspec:V4SF [ (reg:V4SF 106) (reg:V4SF 102) (const_int 0 [0]) ] UNSPEC_PCMP)) "/app/example.cpp":20:1 -1 (nil)) (insn 24 23 18 2 (set (reg/i:V4SI 20 xmm0) (subreg:V4SI (reg:V4SF 107) 0)) "/app/example.cpp":20:1 -1 (nil)) ``` There're many splitters generating MOV insn with SUBREG and would have same problem. Instead of changing those splitters one by one, the patch relaxes ix86_hard_mov_ok to allow mov subreg to hard register after split1. ix86_pre_reload_split () is used to replace !reload_completed && ira_in_progress. gcc/ChangeLog: * config/i386/i386.cc (ix86_hardreg_mov_ok): Relax mov subreg to hard register after split1. gcc/testsuite/ChangeLog: * g++.target/i386/pr115982.C: New test.
-rw-r--r--gcc/config/i386/i386.cc5
-rw-r--r--gcc/testsuite/g++.target/i386/pr115982.C11
2 files changed, 13 insertions, 3 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 9c2ebe7..77c4418 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20212,7 +20212,7 @@ ix86_class_likely_spilled_p (reg_class_t rclass)
}
/* Return true if a set of DST by the expression SRC should be allowed.
- This prevents complex sets of likely_spilled hard regs before reload. */
+ This prevents complex sets of likely_spilled hard regs before split1. */
bool
ix86_hardreg_mov_ok (rtx dst, rtx src)
@@ -20224,8 +20224,7 @@ ix86_hardreg_mov_ok (rtx dst, rtx src)
? standard_sse_constant_p (src, GET_MODE (dst))
: x86_64_immediate_operand (src, GET_MODE (dst)))
&& ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
- && !reload_completed
- && !lra_in_progress)
+ && ix86_pre_reload_split ())
return false;
return true;
}
diff --git a/gcc/testsuite/g++.target/i386/pr115982.C b/gcc/testsuite/g++.target/i386/pr115982.C
new file mode 100644
index 0000000..4b91618
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr115982.C
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+
+typedef float VF __attribute__((__vector_size__(16)));
+typedef int VI __attribute__((__vector_size__(16)));
+
+VI
+foo (VF x)
+{
+ return !x;
+}