aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2021-10-13 19:49:47 +0100
committerRoger Sayle <roger@nextmovesoftware.com>2021-10-13 19:49:47 +0100
commit97c320016642a40a347d558abc952cc487ad4ff6 (patch)
treee89dbe617cd88bbd6b9b683172b149d1c9ceab1c /gcc
parent90582eb6c565afb9cef89c1781af6d9a96b086e9 (diff)
downloadgcc-97c320016642a40a347d558abc952cc487ad4ff6.zip
gcc-97c320016642a40a347d558abc952cc487ad4ff6.tar.gz
gcc-97c320016642a40a347d558abc952cc487ad4ff6.tar.bz2
x86_64: Some SUBREG related optimization tweaks to i386 backend.
This patch contains two SUBREG-related optimization enabling tweaks to the x86 backend. The first change, to ix86_expand_vector_extract, cures the strange -march=cascadelake related non-determinism that affected my new test cases last week. Extracting a QImode or HImode element from an SSE vector performs a zero-extension to SImode, which is currently represented as: (set (subreg:SI (reg:QI target)) (zero_extend:SI (...)) Unfortunately, the semantics of this RTL doesn't quite match what was intended. A set of a paradoxical subreg allows the high-bits to take an arbitrary value (hence the non-determinism). A more correct representation should be: (set (reg:SI temp) (zero_extend:SI (...)) (set (reg:QI target) (subreg:QI (reg:SI temp)) Optionally with the SUBREG rtx annotated as SUBREG_PROMOTED_VAR_P to indicate that value is already zero-extended in the SUBREG_REG. The second change is very similar, which is why I've included it in this patch, where currently the early RTL optimizers can produce: (set (reg:V?? hardreg) (subreg ...)) where this instruction may require a spill/reload from memory when the modes aren't tieable. Alas the presence of the hard register prevents combine/gcse etc. optimizing this away, or reusing the result which would increase the lifetime of the hard register before reload. The solution is to treat vector hard registers the same way as the x86 backend handles scalar hard registers, and only allow sets from pseudos before register allocation, which is achieved by checking ix86_hardreg_mov_ok. Hence the above instruction is expanded and maintained as: (set (reg:V?? pseudo) (subreg ...)) (set (reg:V?? hardreg) (reg:V?? pseudo)) which allows the RTL optimizers freedom to optimize the SUBREG. 2021-10-13 Roger Sayle <roger@nextmovesoftware.com> gcc/ChangeLog * config/i386/i386-expand.c (ix86_expand_vector_move): Use a pseudo intermediate when moving a SUBREG into a hard register, by checking ix86_hardreg_mov_ok. (ix86_expand_vector_extract): Store zero-extended SImode intermediate in a pseudo, then set target using a SUBREG_PROMOTED annotated subreg. * config/i386/sse.md (mov<VMOVE>_internal): Prevent CSE creating complex (SUBREG) sets of (vector) hard registers before reload, by checking ix86_hardreg_mov_ok.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386-expand.c18
-rw-r--r--gcc/config/i386/sse.md3
2 files changed, 18 insertions, 3 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 6e2b792..707e146 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -615,6 +615,16 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
return;
}
+ /* If operand0 is a hard register, make operand1 a pseudo. */
+ if (can_create_pseudo_p ()
+ && !ix86_hardreg_mov_ok (op0, op1))
+ {
+ rtx tmp = gen_reg_rtx (GET_MODE (op0));
+ emit_move_insn (tmp, op1);
+ emit_move_insn (op0, tmp);
+ return;
+ }
+
/* Make operand1 a register if it isn't already. */
if (can_create_pseudo_p ()
&& !register_operand (op0, mode)
@@ -16016,11 +16026,15 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
/* Let the rtl optimizers know about the zero extension performed. */
if (inner_mode == QImode || inner_mode == HImode)
{
+ rtx reg = gen_reg_rtx (SImode);
tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
- target = gen_lowpart (SImode, target);
+ emit_move_insn (reg, tmp);
+ tmp = gen_lowpart (inner_mode, reg);
+ SUBREG_PROMOTED_VAR_P (tmp) = 1;
+ SUBREG_PROMOTED_SET (tmp, 1);
}
- emit_insn (gen_rtx_SET (target, tmp));
+ emit_move_insn (target, tmp);
}
else
{
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a3c4a3f..8a4ab9b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1269,7 +1269,8 @@
" C,<sseconstm1>,vm,v"))]
"TARGET_SSE
&& (register_operand (operands[0], <MODE>mode)
- || register_operand (operands[1], <MODE>mode))"
+ || register_operand (operands[1], <MODE>mode))
+ && ix86_hardreg_mov_ok (operands[0], operands[1])"
{
switch (get_attr_type (insn))
{