aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJan Hubicka <hubicka@ucw.cz>2025-05-02 15:53:35 +0200
committerJan Hubicka <hubicka@ucw.cz>2025-05-02 15:54:23 +0200
commitc85148d036d17295bb2560e10020c924c83a5d13 (patch)
tree8eb00da0d387a38582f8443c2d564d82615f1823 /gcc
parent2d7a0d38e2f8e281ab2269cfe6c048410fa3c886 (diff)
downloadgcc-c85148d036d17295bb2560e10020c924c83a5d13.zip
gcc-c85148d036d17295bb2560e10020c924c83a5d13.tar.gz
gcc-c85148d036d17295bb2560e10020c924c83a5d13.tar.bz2
Make ix86 cost of VEC_SELECT equivalent to SUBREG cost 1
This patch fixes regression of imagick with PGO and AVX512 where correcting size cost of SSE operations (to be 4 instead of 2 originally cut&pasted from x87) made late combine to eliminate zero registers introduced by rapd. The problem is that cost-model mistakely accounts VEC_SELECT as real instruction while it is optimized to nothing if src==dest (which is the case of these testcases). This register is used to eliminate false dependency between source and destination of int->fp conversions. While ix86_insn_cost hook already contains logic to incrase cost of the zero-extend the costs was not enough. gcc/ChangeLog: PR target/119900 * config/i386/i386.cc (ix86_can_change_mode_class): Add TODO comment. (ix86_rtx_costs): Make VEC_SELECT equivalent to SUBREG cost 1.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386.cc39
1 files changed, 36 insertions, 3 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index cb348cb..0c808c2 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20978,7 +20978,11 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
return true;
/* x87 registers can't do subreg at all, as all values are reformatted
- to extended precision. */
+ to extended precision.
+
+ ??? middle-end queries mode changes for ALL_REGS and this makes
+ vec_series_lowpart_p to always return false. We probably should
+ restrict this to modes supported by i387 and check if it is enabled. */
if (MAYBE_FLOAT_CLASS_P (regclass))
return false;
@@ -22756,13 +22760,41 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
}
return false;
- case VEC_SELECT:
case VEC_CONCAT:
/* ??? Assume all of these vector manipulation patterns are
recognizable. In which case they all pretty much have the
- same cost. */
+ same cost.
+ ??? We should still recruse when computing cost. */
*total = cost->sse_op;
return true;
+
+ case VEC_SELECT:
+ /* Special case extracting lower part from the vector.
+ This by itself needs to code and most of SSE/AVX instructions have
+ packed and single forms where the single form may be represented
+ by such VEC_SELECT.
+
+ Use cost 1 (despite the fact that functionally equivalent SUBREG has
+ cost 0). Making VEC_SELECT completely free, for example instructs CSE
+ to forward propagate VEC_SELECT into
+
+ (set (reg eax) (reg src))
+
+ which then prevents fwprop and combining. See i.e.
+ gcc.target/i386/pr91103-1.c.
+
+ ??? rtvec_series_p test should be, for valid patterns, equivalent to
+ vec_series_lowpart_p but is not, since the latter calls
+ can_cange_mode_class on ALL_REGS and this return false since x87 does
+ not support subregs at all. */
+ if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0))
+ *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
+ outer_code, opno, speed) + 1;
+ else
+ /* ??? We should still recruse when computing cost. */
+ *total = cost->sse_op;
+ return true;
+
case VEC_DUPLICATE:
*total = rtx_cost (XEXP (x, 0),
GET_MODE (XEXP (x, 0)),
@@ -22780,6 +22812,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
*total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
else
+ /* ??? We should still recruse when computing cost. */
*total = cost->sse_op;
return true;