aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSurya Kumari Jangala <jskumari@linux.ibm.com>2023-09-14 02:12:50 -0500
committerSurya Kumari Jangala <jskumari@linux.ibm.com>2023-09-19 20:52:34 -0500
commit677249a23243b5b51832611767366d6eb199bdc7 (patch)
tree4a2bfc759be6e5df71cc2df32fe73589b4e358aa
parentc44926fe2dc570103339dbb120914dabf655be07 (diff)
downloadgcc-677249a23243b5b51832611767366d6eb199bdc7.zip
gcc-677249a23243b5b51832611767366d6eb199bdc7.tar.gz
gcc-677249a23243b5b51832611767366d6eb199bdc7.tar.bz2
ira: Consider save/restore costs of callee-save registers [PR110071]
In improve_allocation() routine, IRA checks for each allocno if spilling any conflicting allocnos can improve the allocation of this allocno. This routine computes the cost improvement for usage of each profitable hard register for a given allocno. The existing code in improve_allocation() does not consider the save/restore costs of callee save registers while computing the cost improvement. This can result in a callee save register being assigned to a pseudo that is live in the entire function and across a call, overriding a non-callee save register assigned to the pseudo by graph coloring. So the entry basic block requires a prolog, thereby causing shrink wrap to fail. 2023-09-14 Surya Kumari Jangala <jskumari@linux.ibm.com> gcc/ PR rtl-optimization/110071 * ira-color.cc (improve_allocation): Consider cost of callee save registers. gcc/testsuite/ PR rtl-optimization/110071 * gcc.target/powerpc/pr110071.c: New test.
-rw-r--r--gcc/ira-color.cc21
-rw-r--r--gcc/testsuite/gcc.target/powerpc/pr110071.c15
2 files changed, 35 insertions, 1 deletions
diff --git a/gcc/ira-color.cc b/gcc/ira-color.cc
index 5807d6d..f2e8ea3 100644
--- a/gcc/ira-color.cc
+++ b/gcc/ira-color.cc
@@ -3150,13 +3150,15 @@ improve_allocation (void)
int j, k, n, hregno, conflict_hregno, base_cost, class_size, word, nwords;
int check, spill_cost, min_cost, nregs, conflict_nregs, r, best;
bool try_p;
- enum reg_class aclass;
+ enum reg_class aclass, rclass;
machine_mode mode;
int *allocno_costs;
int costs[FIRST_PSEUDO_REGISTER];
HARD_REG_SET conflicting_regs[2], profitable_hard_regs;
ira_allocno_t a;
bitmap_iterator bi;
+ int saved_nregs;
+ int add_cost;
/* Don't bother to optimize the code with static chain pointer and
non-local goto in order not to spill the chain pointer
@@ -3194,6 +3196,7 @@ improve_allocation (void)
conflicting_regs,
&profitable_hard_regs);
class_size = ira_class_hard_regs_num[aclass];
+ mode = ALLOCNO_MODE (a);
/* Set up cost improvement for usage of each profitable hard
register for allocno A. */
for (j = 0; j < class_size; j++)
@@ -3207,6 +3210,22 @@ improve_allocation (void)
costs[hregno] = (allocno_costs == NULL
? ALLOCNO_UPDATED_CLASS_COST (a) : allocno_costs[k]);
costs[hregno] -= allocno_copy_cost_saving (a, hregno);
+
+ if ((saved_nregs = calculate_saved_nregs (hregno, mode)) != 0)
+ {
+ /* We need to save/restore the hard register in
+ epilogue/prologue. Therefore we increase the cost.
+ Since the prolog is placed in the entry BB, the frequency
+ of the entry BB is considered while computing the cost. */
+ rclass = REGNO_REG_CLASS (hregno);
+ add_cost = ((ira_memory_move_cost[mode][rclass][0]
+ + ira_memory_move_cost[mode][rclass][1])
+ * saved_nregs / hard_regno_nregs (hregno,
+ mode) - 1)
+ * REG_FREQ_FROM_BB (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+ costs[hregno] += add_cost;
+ }
+
costs[hregno] -= base_cost;
if (costs[hregno] < 0)
try_p = true;
diff --git a/gcc/testsuite/gcc.target/powerpc/pr110071.c b/gcc/testsuite/gcc.target/powerpc/pr110071.c
new file mode 100644
index 0000000..282349c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr110071.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -fdump-rtl-pro_and_epilogue" } */
+
+/* Verify there is an early return without the prolog and shrink-wrap
+ the function. */
+void bar ();
+long
+foo (long i, long cond)
+{
+ if (cond)
+ bar ();
+ return i+1;
+}
+
+/* { dg-final { scan-rtl-dump-times "Performing shrink-wrapping" 1 "pro_and_epilogue" } } */