diff options
-rw-r--r-- | gcc/ira-build.c | 55 | ||||
-rw-r--r-- | gcc/ira-color.c | 53 | ||||
-rw-r--r-- | gcc/ira-int.h | 37 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c | 47 |
4 files changed, 169 insertions, 23 deletions
diff --git a/gcc/ira-build.c b/gcc/ira-build.c index 0547edf..875b4d8 100644 --- a/gcc/ira-build.c +++ b/gcc/ira-build.c @@ -497,6 +497,7 @@ ira_create_allocno (int regno, bool cap_p, bitmap_set_bit (loop_tree_node->all_allocnos, ALLOCNO_NUM (a)); ALLOCNO_NREFS (a) = 0; ALLOCNO_FREQ (a) = 0; + ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (a) = false; ALLOCNO_HARD_REGNO (a) = -1; ALLOCNO_CALL_FREQ (a) = 0; ALLOCNO_CALLS_CROSSED_NUM (a) = 0; @@ -1991,6 +1992,35 @@ propagate_modified_regnos (ira_loop_tree_node_t loop_tree_node) loop_tree_node->modified_regnos); } +/* Propagate ALLOCNO_HARD_REG_COSTS from A to PARENT_A. Use SPILL_COST + as the cost of spilling a register throughout A (which we have to do + for PARENT_A allocations that conflict with A). */ +static void +ira_propagate_hard_reg_costs (ira_allocno_t parent_a, ira_allocno_t a, + int spill_cost) +{ + HARD_REG_SET conflicts = ira_total_conflict_hard_regs (a); + conflicts &= ~ira_total_conflict_hard_regs (parent_a); + + auto costs = ALLOCNO_HARD_REG_COSTS (a); + if (!hard_reg_set_empty_p (conflicts)) + ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (a) = true; + else if (!costs) + return; + + auto aclass = ALLOCNO_CLASS (a); + ira_allocate_and_set_costs (&ALLOCNO_HARD_REG_COSTS (parent_a), + aclass, ALLOCNO_CLASS_COST (parent_a)); + auto parent_costs = ALLOCNO_HARD_REG_COSTS (parent_a); + for (int i = 0; i < ira_class_hard_regs_num[aclass]; ++i) + if (TEST_HARD_REG_BIT (conflicts, ira_class_hard_regs[aclass][i])) + parent_costs[i] += spill_cost; + else if (costs) + /* The cost to A of allocating this register to PARENT_A can't + be more than the cost of spilling the register throughout A. */ + parent_costs[i] += MIN (costs[i], spill_cost); +} + /* Propagate new info about allocno A (see comments about accumulated info in allocno definition) to the corresponding allocno on upper loop tree level. So allocnos on upper levels accumulate @@ -2019,12 +2049,27 @@ propagate_allocno_info (void) && bitmap_bit_p (ALLOCNO_LOOP_TREE_NODE (a)->border_allocnos, ALLOCNO_NUM (a))) { + /* Calculate the cost of storing to memory on entry to A's loop, + referencing as memory within A's loop, and restoring from + memory on exit from A's loop. */ + ira_loop_border_costs border_costs (a); + int spill_cost = INT_MAX; + if (ira_subloop_allocnos_can_differ_p (parent_a)) + spill_cost = (border_costs.spill_inside_loop_cost () + + ALLOCNO_MEMORY_COST (a)); + if (! ALLOCNO_BAD_SPILL_P (a)) ALLOCNO_BAD_SPILL_P (parent_a) = false; ALLOCNO_NREFS (parent_a) += ALLOCNO_NREFS (a); ALLOCNO_FREQ (parent_a) += ALLOCNO_FREQ (a); + + /* If A's allocation can differ from PARENT_A's, we can if necessary + spill PARENT_A on entry to A's loop and restore it afterwards. + Doing that has cost SPILL_COST. */ + if (!ira_subloop_allocnos_can_differ_p (parent_a)) + merge_hard_reg_conflicts (a, parent_a, true); + ALLOCNO_CALL_FREQ (parent_a) += ALLOCNO_CALL_FREQ (a); - merge_hard_reg_conflicts (a, parent_a, true); ALLOCNO_CALLS_CROSSED_NUM (parent_a) += ALLOCNO_CALLS_CROSSED_NUM (a); ALLOCNO_CHEAP_CALLS_CROSSED_NUM (parent_a) @@ -2037,15 +2082,15 @@ propagate_allocno_info (void) += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a); aclass = ALLOCNO_CLASS (a); ira_assert (aclass == ALLOCNO_CLASS (parent_a)); - ira_allocate_and_accumulate_costs - (&ALLOCNO_HARD_REG_COSTS (parent_a), aclass, - ALLOCNO_HARD_REG_COSTS (a)); + ira_propagate_hard_reg_costs (parent_a, a, spill_cost); ira_allocate_and_accumulate_costs (&ALLOCNO_CONFLICT_HARD_REG_COSTS (parent_a), aclass, ALLOCNO_CONFLICT_HARD_REG_COSTS (a)); + /* The cost to A of allocating a register to PARENT_A can't be + more than the cost of spilling the register throughout A. */ ALLOCNO_CLASS_COST (parent_a) - += ALLOCNO_CLASS_COST (a); + += MIN (ALLOCNO_CLASS_COST (a), spill_cost); ALLOCNO_MEMORY_COST (parent_a) += ALLOCNO_MEMORY_COST (a); } } diff --git a/gcc/ira-color.c b/gcc/ira-color.c index ae0f08a..4344ee6 100644 --- a/gcc/ira-color.c +++ b/gcc/ira-color.c @@ -3344,7 +3344,7 @@ color_pass (ira_loop_tree_node_t loop_tree_node) unsigned int j; bitmap_iterator bi; machine_mode mode; - enum reg_class rclass, aclass, pclass; + enum reg_class rclass, aclass; ira_allocno_t a, subloop_allocno; ira_loop_tree_node_t subloop_node; @@ -3389,10 +3389,9 @@ color_pass (ira_loop_tree_node_t loop_tree_node) /* Remove from processing in the next loop. */ bitmap_clear_bit (consideration_allocno_bitmap, j); rclass = ALLOCNO_CLASS (a); - pclass = ira_pressure_class_translate[rclass]; - if (flag_ira_region == IRA_REGION_MIXED - && (loop_tree_node->reg_pressure[pclass] - <= ira_class_hard_regs_num[pclass])) + subloop_allocno = ALLOCNO_CAP_MEMBER (a); + subloop_node = ALLOCNO_LOOP_TREE_NODE (subloop_allocno); + if (ira_single_region_allocno_p (a, subloop_allocno)) { mode = ALLOCNO_MODE (a); hard_regno = ALLOCNO_HARD_REGNO (a); @@ -3402,8 +3401,6 @@ color_pass (ira_loop_tree_node_t loop_tree_node) ira_assert (index >= 0); } regno = ALLOCNO_REGNO (a); - subloop_allocno = ALLOCNO_CAP_MEMBER (a); - subloop_node = ALLOCNO_LOOP_TREE_NODE (subloop_allocno); ira_assert (!ALLOCNO_ASSIGNED_P (subloop_allocno)); ALLOCNO_HARD_REGNO (subloop_allocno) = hard_regno; ALLOCNO_ASSIGNED_P (subloop_allocno) = true; @@ -3426,7 +3423,6 @@ color_pass (ira_loop_tree_node_t loop_tree_node) ira_assert (ALLOCNO_CAP_MEMBER (a) == NULL); mode = ALLOCNO_MODE (a); rclass = ALLOCNO_CLASS (a); - pclass = ira_pressure_class_translate[rclass]; hard_regno = ALLOCNO_HARD_REGNO (a); /* Use hard register class here. ??? */ if (hard_regno >= 0) @@ -3443,11 +3439,11 @@ color_pass (ira_loop_tree_node_t loop_tree_node) ira_assert (ALLOCNO_CLASS (subloop_allocno) == rclass); ira_assert (bitmap_bit_p (subloop_node->all_allocnos, ALLOCNO_NUM (subloop_allocno))); - if ((flag_ira_region == IRA_REGION_MIXED - && (loop_tree_node->reg_pressure[pclass] - <= ira_class_hard_regs_num[pclass])) + if (ira_single_region_allocno_p (a, subloop_allocno) || !ira_subloop_allocnos_can_differ_p (a, hard_regno >= 0)) { + gcc_assert (!ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P + (subloop_allocno)); if (! ALLOCNO_ASSIGNED_P (subloop_allocno)) { ALLOCNO_HARD_REGNO (subloop_allocno) = hard_regno; @@ -3583,14 +3579,35 @@ move_spill_restore (void) if (subloop_allocno == NULL) continue; ira_assert (rclass == ALLOCNO_CLASS (subloop_allocno)); - /* We have accumulated cost. To get the real cost of - allocno usage in the loop we should subtract costs of - the subloop allocnos. */ - cost -= (ALLOCNO_MEMORY_COST (subloop_allocno) - - (ALLOCNO_HARD_REG_COSTS (subloop_allocno) == NULL - ? ALLOCNO_CLASS_COST (subloop_allocno) - : ALLOCNO_HARD_REG_COSTS (subloop_allocno)[index])); ira_loop_border_costs border_costs (subloop_allocno); + + /* We have accumulated cost. To get the real cost of + allocno usage in the loop we should subtract the costs + added by propagate_allocno_info for the subloop allocnos. */ + int reg_cost + = (ALLOCNO_HARD_REG_COSTS (subloop_allocno) == NULL + ? ALLOCNO_CLASS_COST (subloop_allocno) + : ALLOCNO_HARD_REG_COSTS (subloop_allocno)[index]); + + int spill_cost + = (border_costs.spill_inside_loop_cost () + + ALLOCNO_MEMORY_COST (subloop_allocno)); + + /* If HARD_REGNO conflicts with SUBLOOP_A then + propagate_allocno_info will have propagated + the cost of spilling HARD_REGNO in SUBLOOP_NODE. + (ira_subloop_allocnos_can_differ_p must be true + in that case.) Otherwise, SPILL_COST acted as + a cap on the propagated register cost, in cases + where the allocations can differ. */ + auto conflicts = ira_total_conflict_hard_regs (subloop_allocno); + if (TEST_HARD_REG_BIT (conflicts, hard_regno)) + reg_cost = spill_cost; + else if (ira_subloop_allocnos_can_differ_p (a)) + reg_cost = MIN (reg_cost, spill_cost); + + cost -= ALLOCNO_MEMORY_COST (subloop_allocno) - reg_cost; + if ((hard_regno2 = ALLOCNO_HARD_REGNO (subloop_allocno)) < 0) /* The register was spilled in the subloop. If we spill it in the outer loop too then we'll no longer need to diff --git a/gcc/ira-int.h b/gcc/ira-int.h index c5b1a13..8b87498 100644 --- a/gcc/ira-int.h +++ b/gcc/ira-int.h @@ -314,6 +314,13 @@ struct ira_allocno vector where a bit with given index represents allocno with the same number. */ unsigned int conflict_vec_p : 1; + /* True if the parent loop has an allocno for the same register and + if the parent allocno's assignment might not be valid in this loop. + This means that we cannot merge this allocno and the parent allocno + together. + + This is only ever true for non-cap allocnos. */ + unsigned int might_conflict_with_parent_p : 1; /* Hard register assigned to given allocno. Negative value means that memory was allocated to the allocno. During the reload, spilled allocno has value equal to the corresponding stack slot @@ -423,6 +430,8 @@ struct ira_allocno #define ALLOCNO_CAP_MEMBER(A) ((A)->cap_member) #define ALLOCNO_NREFS(A) ((A)->nrefs) #define ALLOCNO_FREQ(A) ((A)->freq) +#define ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P(A) \ + ((A)->might_conflict_with_parent_p) #define ALLOCNO_HARD_REGNO(A) ((A)->hard_regno) #define ALLOCNO_CALL_FREQ(A) ((A)->call_freq) #define ALLOCNO_CALLS_CROSSED_NUM(A) ((A)->calls_crossed_num) @@ -1623,4 +1632,32 @@ ira_subloop_allocnos_can_differ_p (ira_allocno_t a, bool allocated_p = true) return true; } +/* Return true if we should treat A and SUBLOOP_A as belonging to a + single region. */ +inline bool +ira_single_region_allocno_p (ira_allocno_t a, ira_allocno_t subloop_a) +{ + if (flag_ira_region != IRA_REGION_MIXED) + return false; + + if (ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (subloop_a)) + return false; + + auto rclass = ALLOCNO_CLASS (a); + auto pclass = ira_pressure_class_translate[rclass]; + auto loop_used_regs = ALLOCNO_LOOP_TREE_NODE (a)->reg_pressure[pclass]; + return loop_used_regs <= ira_class_hard_regs_num[pclass]; +} + +/* Return the set of all hard registers that conflict with A. */ +inline HARD_REG_SET +ira_total_conflict_hard_regs (ira_allocno_t a) +{ + auto obj_0 = ALLOCNO_OBJECT (a, 0); + HARD_REG_SET conflicts = OBJECT_TOTAL_CONFLICT_HARD_REGS (obj_0); + for (int i = 1; i < ALLOCNO_NUM_OBJECTS (a); i++) + conflicts |= OBJECT_TOTAL_CONFLICT_HARD_REGS (ALLOCNO_OBJECT (a, i)); + return conflicts; +} + #endif /* GCC_IRA_INT_H */ diff --git a/gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c b/gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c new file mode 100644 index 0000000..d4d260c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c @@ -0,0 +1,47 @@ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#define PROB 0.1 + +struct L +{ + int data; + volatile struct L *next; + volatile struct L *inner; +}; + +/* The thing we're testing here is that the !head->inner path of the outer loop + body has no stack accesses. It's possible that we'll need to update this + pattern for unrelated code changes. but the test should be XFAILed rather + than changed if any new stack accesses occur on the !head->inner path. */ +/* +** foo: +** ... +** ldr (w[0-9]+), \[(x[0-9]+)\] +** add (w[0-9]+), (?:\3, \1|\1, \3) +** ldr (x[0-9]+), \[\2, #?16\] +** str \3, \[\2\] +** ldr \2, \[\2, #?8\] +** cbn?z \4, .* +** ... +** ret +*/ +void +foo (volatile struct L *head, int inc) +{ + while (head) + { + inc = head->data + inc; + volatile struct L *inner = head->inner; + head->data = inc; + head = head->next; + if (__builtin_expect_with_probability (inner != 0, 0, PROB)) + for (int i = 0; i < 1000; ++i) + /* Leave x30 for i. */ + asm volatile ("// foo" ::: + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28"); + } +} |