diff options
author | Jan Hubicka <hubicka@ucw.cz> | 2025-05-02 15:53:35 +0200 |
---|---|---|
committer | Jan Hubicka <hubicka@ucw.cz> | 2025-05-02 15:54:23 +0200 |
commit | c85148d036d17295bb2560e10020c924c83a5d13 (patch) | |
tree | 8eb00da0d387a38582f8443c2d564d82615f1823 /gcc | |
parent | 2d7a0d38e2f8e281ab2269cfe6c048410fa3c886 (diff) | |
download | gcc-c85148d036d17295bb2560e10020c924c83a5d13.zip gcc-c85148d036d17295bb2560e10020c924c83a5d13.tar.gz gcc-c85148d036d17295bb2560e10020c924c83a5d13.tar.bz2 |
Make ix86 cost of VEC_SELECT equivalent to SUBREG cost 1
This patch fixes regression of imagick with PGO and AVX512 where correcting size
cost of SSE operations (to be 4 instead of 2 originally cut&pasted from x87)
made late combine to eliminate zero registers introduced by rapd. The problem
is that cost-model mistakely accounts VEC_SELECT as real instruction while it is
optimized to nothing if src==dest (which is the case of these testcases).
This register is used to eliminate false dependency between source and destination
of int->fp conversions.
While ix86_insn_cost hook already contains logic to incrase cost of the zero-extend
the costs was not enough.
gcc/ChangeLog:
PR target/119900
* config/i386/i386.cc (ix86_can_change_mode_class): Add TODO
comment.
(ix86_rtx_costs): Make VEC_SELECT equivalent to SUBREG cost 1.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386.cc | 39 |
1 files changed, 36 insertions, 3 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index cb348cb..0c808c2 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -20978,7 +20978,11 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to, return true; /* x87 registers can't do subreg at all, as all values are reformatted - to extended precision. */ + to extended precision. + + ??? middle-end queries mode changes for ALL_REGS and this makes + vec_series_lowpart_p to always return false. We probably should + restrict this to modes supported by i387 and check if it is enabled. */ if (MAYBE_FLOAT_CLASS_P (regclass)) return false; @@ -22756,13 +22760,41 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, } return false; - case VEC_SELECT: case VEC_CONCAT: /* ??? Assume all of these vector manipulation patterns are recognizable. In which case they all pretty much have the - same cost. */ + same cost. + ??? We should still recruse when computing cost. */ *total = cost->sse_op; return true; + + case VEC_SELECT: + /* Special case extracting lower part from the vector. + This by itself needs to code and most of SSE/AVX instructions have + packed and single forms where the single form may be represented + by such VEC_SELECT. + + Use cost 1 (despite the fact that functionally equivalent SUBREG has + cost 0). Making VEC_SELECT completely free, for example instructs CSE + to forward propagate VEC_SELECT into + + (set (reg eax) (reg src)) + + which then prevents fwprop and combining. See i.e. + gcc.target/i386/pr91103-1.c. + + ??? rtvec_series_p test should be, for valid patterns, equivalent to + vec_series_lowpart_p but is not, since the latter calls + can_cange_mode_class on ALL_REGS and this return false since x87 does + not support subregs at all. */ + if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0)) + *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)), + outer_code, opno, speed) + 1; + else + /* ??? We should still recruse when computing cost. */ + *total = cost->sse_op; + return true; + case VEC_DUPLICATE: *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)), @@ -22780,6 +22812,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed); else + /* ??? We should still recruse when computing cost. */ *total = cost->sse_op; return true; |