aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ira-build.c55
-rw-r--r--gcc/ira-color.c53
-rw-r--r--gcc/ira-int.h37
-rw-r--r--gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c47
4 files changed, 169 insertions, 23 deletions
diff --git a/gcc/ira-build.c b/gcc/ira-build.c
index 0547edf..875b4d8 100644
--- a/gcc/ira-build.c
+++ b/gcc/ira-build.c
@@ -497,6 +497,7 @@ ira_create_allocno (int regno, bool cap_p,
bitmap_set_bit (loop_tree_node->all_allocnos, ALLOCNO_NUM (a));
ALLOCNO_NREFS (a) = 0;
ALLOCNO_FREQ (a) = 0;
+ ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (a) = false;
ALLOCNO_HARD_REGNO (a) = -1;
ALLOCNO_CALL_FREQ (a) = 0;
ALLOCNO_CALLS_CROSSED_NUM (a) = 0;
@@ -1991,6 +1992,35 @@ propagate_modified_regnos (ira_loop_tree_node_t loop_tree_node)
loop_tree_node->modified_regnos);
}
+/* Propagate ALLOCNO_HARD_REG_COSTS from A to PARENT_A. Use SPILL_COST
+ as the cost of spilling a register throughout A (which we have to do
+ for PARENT_A allocations that conflict with A). */
+static void
+ira_propagate_hard_reg_costs (ira_allocno_t parent_a, ira_allocno_t a,
+ int spill_cost)
+{
+ HARD_REG_SET conflicts = ira_total_conflict_hard_regs (a);
+ conflicts &= ~ira_total_conflict_hard_regs (parent_a);
+
+ auto costs = ALLOCNO_HARD_REG_COSTS (a);
+ if (!hard_reg_set_empty_p (conflicts))
+ ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (a) = true;
+ else if (!costs)
+ return;
+
+ auto aclass = ALLOCNO_CLASS (a);
+ ira_allocate_and_set_costs (&ALLOCNO_HARD_REG_COSTS (parent_a),
+ aclass, ALLOCNO_CLASS_COST (parent_a));
+ auto parent_costs = ALLOCNO_HARD_REG_COSTS (parent_a);
+ for (int i = 0; i < ira_class_hard_regs_num[aclass]; ++i)
+ if (TEST_HARD_REG_BIT (conflicts, ira_class_hard_regs[aclass][i]))
+ parent_costs[i] += spill_cost;
+ else if (costs)
+ /* The cost to A of allocating this register to PARENT_A can't
+ be more than the cost of spilling the register throughout A. */
+ parent_costs[i] += MIN (costs[i], spill_cost);
+}
+
/* Propagate new info about allocno A (see comments about accumulated
info in allocno definition) to the corresponding allocno on upper
loop tree level. So allocnos on upper levels accumulate
@@ -2019,12 +2049,27 @@ propagate_allocno_info (void)
&& bitmap_bit_p (ALLOCNO_LOOP_TREE_NODE (a)->border_allocnos,
ALLOCNO_NUM (a)))
{
+ /* Calculate the cost of storing to memory on entry to A's loop,
+ referencing as memory within A's loop, and restoring from
+ memory on exit from A's loop. */
+ ira_loop_border_costs border_costs (a);
+ int spill_cost = INT_MAX;
+ if (ira_subloop_allocnos_can_differ_p (parent_a))
+ spill_cost = (border_costs.spill_inside_loop_cost ()
+ + ALLOCNO_MEMORY_COST (a));
+
if (! ALLOCNO_BAD_SPILL_P (a))
ALLOCNO_BAD_SPILL_P (parent_a) = false;
ALLOCNO_NREFS (parent_a) += ALLOCNO_NREFS (a);
ALLOCNO_FREQ (parent_a) += ALLOCNO_FREQ (a);
+
+ /* If A's allocation can differ from PARENT_A's, we can if necessary
+ spill PARENT_A on entry to A's loop and restore it afterwards.
+ Doing that has cost SPILL_COST. */
+ if (!ira_subloop_allocnos_can_differ_p (parent_a))
+ merge_hard_reg_conflicts (a, parent_a, true);
+
ALLOCNO_CALL_FREQ (parent_a) += ALLOCNO_CALL_FREQ (a);
- merge_hard_reg_conflicts (a, parent_a, true);
ALLOCNO_CALLS_CROSSED_NUM (parent_a)
+= ALLOCNO_CALLS_CROSSED_NUM (a);
ALLOCNO_CHEAP_CALLS_CROSSED_NUM (parent_a)
@@ -2037,15 +2082,15 @@ propagate_allocno_info (void)
+= ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a);
aclass = ALLOCNO_CLASS (a);
ira_assert (aclass == ALLOCNO_CLASS (parent_a));
- ira_allocate_and_accumulate_costs
- (&ALLOCNO_HARD_REG_COSTS (parent_a), aclass,
- ALLOCNO_HARD_REG_COSTS (a));
+ ira_propagate_hard_reg_costs (parent_a, a, spill_cost);
ira_allocate_and_accumulate_costs
(&ALLOCNO_CONFLICT_HARD_REG_COSTS (parent_a),
aclass,
ALLOCNO_CONFLICT_HARD_REG_COSTS (a));
+ /* The cost to A of allocating a register to PARENT_A can't be
+ more than the cost of spilling the register throughout A. */
ALLOCNO_CLASS_COST (parent_a)
- += ALLOCNO_CLASS_COST (a);
+ += MIN (ALLOCNO_CLASS_COST (a), spill_cost);
ALLOCNO_MEMORY_COST (parent_a) += ALLOCNO_MEMORY_COST (a);
}
}
diff --git a/gcc/ira-color.c b/gcc/ira-color.c
index ae0f08a..4344ee6 100644
--- a/gcc/ira-color.c
+++ b/gcc/ira-color.c
@@ -3344,7 +3344,7 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
unsigned int j;
bitmap_iterator bi;
machine_mode mode;
- enum reg_class rclass, aclass, pclass;
+ enum reg_class rclass, aclass;
ira_allocno_t a, subloop_allocno;
ira_loop_tree_node_t subloop_node;
@@ -3389,10 +3389,9 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
/* Remove from processing in the next loop. */
bitmap_clear_bit (consideration_allocno_bitmap, j);
rclass = ALLOCNO_CLASS (a);
- pclass = ira_pressure_class_translate[rclass];
- if (flag_ira_region == IRA_REGION_MIXED
- && (loop_tree_node->reg_pressure[pclass]
- <= ira_class_hard_regs_num[pclass]))
+ subloop_allocno = ALLOCNO_CAP_MEMBER (a);
+ subloop_node = ALLOCNO_LOOP_TREE_NODE (subloop_allocno);
+ if (ira_single_region_allocno_p (a, subloop_allocno))
{
mode = ALLOCNO_MODE (a);
hard_regno = ALLOCNO_HARD_REGNO (a);
@@ -3402,8 +3401,6 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
ira_assert (index >= 0);
}
regno = ALLOCNO_REGNO (a);
- subloop_allocno = ALLOCNO_CAP_MEMBER (a);
- subloop_node = ALLOCNO_LOOP_TREE_NODE (subloop_allocno);
ira_assert (!ALLOCNO_ASSIGNED_P (subloop_allocno));
ALLOCNO_HARD_REGNO (subloop_allocno) = hard_regno;
ALLOCNO_ASSIGNED_P (subloop_allocno) = true;
@@ -3426,7 +3423,6 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
ira_assert (ALLOCNO_CAP_MEMBER (a) == NULL);
mode = ALLOCNO_MODE (a);
rclass = ALLOCNO_CLASS (a);
- pclass = ira_pressure_class_translate[rclass];
hard_regno = ALLOCNO_HARD_REGNO (a);
/* Use hard register class here. ??? */
if (hard_regno >= 0)
@@ -3443,11 +3439,11 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
ira_assert (ALLOCNO_CLASS (subloop_allocno) == rclass);
ira_assert (bitmap_bit_p (subloop_node->all_allocnos,
ALLOCNO_NUM (subloop_allocno)));
- if ((flag_ira_region == IRA_REGION_MIXED
- && (loop_tree_node->reg_pressure[pclass]
- <= ira_class_hard_regs_num[pclass]))
+ if (ira_single_region_allocno_p (a, subloop_allocno)
|| !ira_subloop_allocnos_can_differ_p (a, hard_regno >= 0))
{
+ gcc_assert (!ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P
+ (subloop_allocno));
if (! ALLOCNO_ASSIGNED_P (subloop_allocno))
{
ALLOCNO_HARD_REGNO (subloop_allocno) = hard_regno;
@@ -3583,14 +3579,35 @@ move_spill_restore (void)
if (subloop_allocno == NULL)
continue;
ira_assert (rclass == ALLOCNO_CLASS (subloop_allocno));
- /* We have accumulated cost. To get the real cost of
- allocno usage in the loop we should subtract costs of
- the subloop allocnos. */
- cost -= (ALLOCNO_MEMORY_COST (subloop_allocno)
- - (ALLOCNO_HARD_REG_COSTS (subloop_allocno) == NULL
- ? ALLOCNO_CLASS_COST (subloop_allocno)
- : ALLOCNO_HARD_REG_COSTS (subloop_allocno)[index]));
ira_loop_border_costs border_costs (subloop_allocno);
+
+ /* We have accumulated cost. To get the real cost of
+ allocno usage in the loop we should subtract the costs
+ added by propagate_allocno_info for the subloop allocnos. */
+ int reg_cost
+ = (ALLOCNO_HARD_REG_COSTS (subloop_allocno) == NULL
+ ? ALLOCNO_CLASS_COST (subloop_allocno)
+ : ALLOCNO_HARD_REG_COSTS (subloop_allocno)[index]);
+
+ int spill_cost
+ = (border_costs.spill_inside_loop_cost ()
+ + ALLOCNO_MEMORY_COST (subloop_allocno));
+
+ /* If HARD_REGNO conflicts with SUBLOOP_A then
+ propagate_allocno_info will have propagated
+ the cost of spilling HARD_REGNO in SUBLOOP_NODE.
+ (ira_subloop_allocnos_can_differ_p must be true
+ in that case.) Otherwise, SPILL_COST acted as
+ a cap on the propagated register cost, in cases
+ where the allocations can differ. */
+ auto conflicts = ira_total_conflict_hard_regs (subloop_allocno);
+ if (TEST_HARD_REG_BIT (conflicts, hard_regno))
+ reg_cost = spill_cost;
+ else if (ira_subloop_allocnos_can_differ_p (a))
+ reg_cost = MIN (reg_cost, spill_cost);
+
+ cost -= ALLOCNO_MEMORY_COST (subloop_allocno) - reg_cost;
+
if ((hard_regno2 = ALLOCNO_HARD_REGNO (subloop_allocno)) < 0)
/* The register was spilled in the subloop. If we spill
it in the outer loop too then we'll no longer need to
diff --git a/gcc/ira-int.h b/gcc/ira-int.h
index c5b1a13..8b87498 100644
--- a/gcc/ira-int.h
+++ b/gcc/ira-int.h
@@ -314,6 +314,13 @@ struct ira_allocno
vector where a bit with given index represents allocno with the
same number. */
unsigned int conflict_vec_p : 1;
+ /* True if the parent loop has an allocno for the same register and
+ if the parent allocno's assignment might not be valid in this loop.
+ This means that we cannot merge this allocno and the parent allocno
+ together.
+
+ This is only ever true for non-cap allocnos. */
+ unsigned int might_conflict_with_parent_p : 1;
/* Hard register assigned to given allocno. Negative value means
that memory was allocated to the allocno. During the reload,
spilled allocno has value equal to the corresponding stack slot
@@ -423,6 +430,8 @@ struct ira_allocno
#define ALLOCNO_CAP_MEMBER(A) ((A)->cap_member)
#define ALLOCNO_NREFS(A) ((A)->nrefs)
#define ALLOCNO_FREQ(A) ((A)->freq)
+#define ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P(A) \
+ ((A)->might_conflict_with_parent_p)
#define ALLOCNO_HARD_REGNO(A) ((A)->hard_regno)
#define ALLOCNO_CALL_FREQ(A) ((A)->call_freq)
#define ALLOCNO_CALLS_CROSSED_NUM(A) ((A)->calls_crossed_num)
@@ -1623,4 +1632,32 @@ ira_subloop_allocnos_can_differ_p (ira_allocno_t a, bool allocated_p = true)
return true;
}
+/* Return true if we should treat A and SUBLOOP_A as belonging to a
+ single region. */
+inline bool
+ira_single_region_allocno_p (ira_allocno_t a, ira_allocno_t subloop_a)
+{
+ if (flag_ira_region != IRA_REGION_MIXED)
+ return false;
+
+ if (ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (subloop_a))
+ return false;
+
+ auto rclass = ALLOCNO_CLASS (a);
+ auto pclass = ira_pressure_class_translate[rclass];
+ auto loop_used_regs = ALLOCNO_LOOP_TREE_NODE (a)->reg_pressure[pclass];
+ return loop_used_regs <= ira_class_hard_regs_num[pclass];
+}
+
+/* Return the set of all hard registers that conflict with A. */
+inline HARD_REG_SET
+ira_total_conflict_hard_regs (ira_allocno_t a)
+{
+ auto obj_0 = ALLOCNO_OBJECT (a, 0);
+ HARD_REG_SET conflicts = OBJECT_TOTAL_CONFLICT_HARD_REGS (obj_0);
+ for (int i = 1; i < ALLOCNO_NUM_OBJECTS (a); i++)
+ conflicts |= OBJECT_TOTAL_CONFLICT_HARD_REGS (ALLOCNO_OBJECT (a, i));
+ return conflicts;
+}
+
#endif /* GCC_IRA_INT_H */
diff --git a/gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c b/gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c
new file mode 100644
index 0000000..d4d260c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c
@@ -0,0 +1,47 @@
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#define PROB 0.1
+
+struct L
+{
+ int data;
+ volatile struct L *next;
+ volatile struct L *inner;
+};
+
+/* The thing we're testing here is that the !head->inner path of the outer loop
+ body has no stack accesses. It's possible that we'll need to update this
+ pattern for unrelated code changes. but the test should be XFAILed rather
+ than changed if any new stack accesses occur on the !head->inner path. */
+/*
+** foo:
+** ...
+** ldr (w[0-9]+), \[(x[0-9]+)\]
+** add (w[0-9]+), (?:\3, \1|\1, \3)
+** ldr (x[0-9]+), \[\2, #?16\]
+** str \3, \[\2\]
+** ldr \2, \[\2, #?8\]
+** cbn?z \4, .*
+** ...
+** ret
+*/
+void
+foo (volatile struct L *head, int inc)
+{
+ while (head)
+ {
+ inc = head->data + inc;
+ volatile struct L *inner = head->inner;
+ head->data = inc;
+ head = head->next;
+ if (__builtin_expect_with_probability (inner != 0, 0, PROB))
+ for (int i = 0; i < 1000; ++i)
+ /* Leave x30 for i. */
+ asm volatile ("// foo" :::
+ "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+ "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
+ "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+ "x24", "x25", "x26", "x27", "x28");
+ }
+}