diff options
-rw-r--r-- | gcc/config/aarch64/aarch64.cc | 118 | ||||
-rw-r--r-- | gcc/config/i386/i386.cc | 27 | ||||
-rw-r--r-- | gcc/doc/tm.texi | 77 | ||||
-rw-r--r-- | gcc/doc/tm.texi.in | 6 | ||||
-rw-r--r-- | gcc/hard-reg-set.h | 15 | ||||
-rw-r--r-- | gcc/ira-color.cc | 83 | ||||
-rw-r--r-- | gcc/target.def | 87 | ||||
-rw-r--r-- | gcc/target.h | 12 | ||||
-rw-r--r-- | gcc/targhooks.cc | 27 | ||||
-rw-r--r-- | gcc/targhooks.h | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/callee_save_1.c | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/callee_save_2.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/callee_save_3.c | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/pr103350-1.c | 2 |
14 files changed, 458 insertions, 39 deletions
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 9196b8d..9bea8ce 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -15873,6 +15873,118 @@ aarch64_memory_move_cost (machine_mode mode, reg_class_t rclass_i, bool in) : base + aarch64_tune_params.memmov_cost.store_int); } +/* CALLEE_SAVED_REGS is the set of callee-saved registers that the + RA has already decided to use. Return the total number of registers + in class RCLASS that need to be saved and restored, including the + frame link registers. */ +static int +aarch64_count_saves (const HARD_REG_SET &callee_saved_regs, reg_class rclass) +{ + auto saved_gprs = callee_saved_regs & reg_class_contents[rclass]; + auto nregs = hard_reg_set_popcount (saved_gprs); + + if (TEST_HARD_REG_BIT (reg_class_contents[rclass], LR_REGNUM)) + { + if (aarch64_needs_frame_chain ()) + nregs += 2; + else if (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)) + nregs += 1; + } + return nregs; +} + +/* CALLEE_SAVED_REGS is the set of callee-saved registers that the + RA has already decided to use. Return the total number of registers + that need to be saved above the hard frame pointer, including the + frame link registers. */ +static int +aarch64_count_above_hard_fp_saves (const HARD_REG_SET &callee_saved_regs) +{ + /* FP and Advanced SIMD registers are saved above the frame pointer + but SVE registers are saved below it. */ + if (known_le (GET_MODE_SIZE (aarch64_reg_save_mode (V8_REGNUM)), 16U)) + return aarch64_count_saves (callee_saved_regs, POINTER_AND_FP_REGS); + return aarch64_count_saves (callee_saved_regs, POINTER_REGS); +} + +/* Implement TARGET_CALLEE_SAVE_COST. */ +static int +aarch64_callee_save_cost (spill_cost_type spill_type, unsigned int regno, + machine_mode mode, unsigned int nregs, int mem_cost, + const HARD_REG_SET &callee_saved_regs, + bool existing_spill_p) +{ + /* If we've already committed to saving an odd number of GPRs, assume that + saving one more will involve turning an STR into an STP and an LDR + into an LDP. This should still be more expensive than not spilling + (meaning that the minimum cost is 1), but it should usually be cheaper + than a separate store or load. */ + if (GP_REGNUM_P (regno) + && nregs == 1 + && (aarch64_count_saves (callee_saved_regs, GENERAL_REGS) & 1)) + return 1; + + /* Similarly for saving FP registers, if we only need to save the low + 64 bits. (We can also use STP/LDP instead of STR/LDR for Q registers, + but that is less likely to be a saving.) */ + if (FP_REGNUM_P (regno) + && nregs == 1 + && known_eq (GET_MODE_SIZE (aarch64_reg_save_mode (regno)), 8U) + && (aarch64_count_saves (callee_saved_regs, FP_REGS) & 1)) + return 1; + + /* If this would be the first register that we save, add the cost of + allocating or deallocating the frame. For GPR, FPR, and Advanced SIMD + saves, the allocation and deallocation can be folded into the save and + restore. */ + if (!existing_spill_p + && !GP_REGNUM_P (regno) + && !(FP_REGNUM_P (regno) + && known_le (GET_MODE_SIZE (aarch64_reg_save_mode (regno)), 16U))) + return default_callee_save_cost (spill_type, regno, mode, nregs, mem_cost, + callee_saved_regs, existing_spill_p); + + return mem_cost; +} + +/* Implement TARGET_FRAME_ALLOCATION_COST. */ +static int +aarch64_frame_allocation_cost (frame_cost_type, + const HARD_REG_SET &callee_saved_regs) +{ + /* The intention is to model the relative costs of different approaches + to storing data on the stack, rather than to model the cost of saving + data vs not saving it. This means that we should return 0 if: + + - any frame is going to be allocated with: + + stp x29, x30, [sp, #-...]! + + to create a frame link. + + - any frame is going to be allocated with: + + str x30, [sp, #-...]! + + to save the link register. + + In both cases, the allocation and deallocation instructions are the + same however we store data to the stack. (In the second case, the STR + could be converted to an STP by saving an extra call-preserved register, + but that is modeled by aarch64_callee_save_cost.) + + In other cases, assume that a frame would need to be allocated with a + separate subtraction and deallocated with a separate addition. Saves + of call-clobbered registers can then reclaim this cost using a + predecrement store and a postincrement load. + + For simplicity, give this addition or subtraction the same cost as + a GPR move. We could parameterize this if necessary. */ + if (aarch64_count_above_hard_fp_saves (callee_saved_regs) == 0) + return aarch64_tune_params.regmove_cost->GP2GP; + return 0; +} + /* Implement TARGET_INSN_COST. We have the opportunity to do something much more productive here, such as using insn attributes to cost things. But we don't, not yet. @@ -31568,6 +31680,12 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_MEMORY_MOVE_COST #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost +#undef TARGET_CALLEE_SAVE_COST +#define TARGET_CALLEE_SAVE_COST aarch64_callee_save_cost + +#undef TARGET_FRAME_ALLOCATION_COST +#define TARGET_FRAME_ALLOCATION_COST aarch64_frame_allocation_cost + #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index fb93a6f..be5e27f 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -20600,12 +20600,26 @@ ix86_class_likely_spilled_p (reg_class_t rclass) return false; } -/* Implement TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE. */ +/* Implement TARGET_CALLEE_SAVE_COST. */ static int -ix86_ira_callee_saved_register_cost_scale (int) -{ - return 1; +ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, machine_mode, + unsigned int, int mem_cost, const HARD_REG_SET &, bool) +{ + /* Account for the fact that push and pop are shorter and do their + own allocation and deallocation. */ + if (GENERAL_REGNO_P (hard_regno)) + { + /* push is 1 byte while typical spill is 4-5 bytes. + ??? We probably should adjust size costs accordingly. + Costs are relative to reg-reg move that has 2 bytes for 32bit + and 3 bytes otherwise. Be sure that no cost table sets cost + to 2, so we end up with 0. */ + if (mem_cost <= 2 || optimize_function_for_size_p (cfun)) + return 1; + return mem_cost - 2; + } + return mem_cost; } /* Return true if a set of DST by the expression SRC should be allowed. @@ -27092,9 +27106,8 @@ ix86_libgcc_floating_mode_supported_p #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true #undef TARGET_CLASS_LIKELY_SPILLED_P #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p -#undef TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE -#define TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE \ - ix86_ira_callee_saved_register_cost_scale +#undef TARGET_CALLEE_SAVE_COST +#define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 9f42913..a96700c 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -3047,14 +3047,6 @@ A target hook which can change allocno class for given pseudo from The default version of this target hook always returns given class. @end deftypefn -@deftypefn {Target Hook} int TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE (int @var{hard_regno}) -A target hook which returns the callee-saved register @var{hard_regno} -cost scale in epilogue and prologue used by IRA. - -The default version of this target hook returns 1 if optimizing for -size, otherwise returns the entry block frequency. -@end deftypefn - @deftypefn {Target Hook} bool TARGET_LRA_P (void) A target hook which returns true if we use LRA instead of reload pass. @@ -7011,6 +7003,75 @@ value to the result of that function. The arguments to that function are the same as to this target hook. @end deftypefn +@deftypefn {Target Hook} int TARGET_CALLEE_SAVE_COST (spill_cost_type @var{cost_type}, unsigned int @var{hard_regno}, machine_mode @var{mode}, unsigned int @var{nregs}, int @var{mem_cost}, const HARD_REG_SET @var{&allocated_callee_regs}, bool @var{existing_spills_p}) +Return the one-off cost of saving or restoring callee-saved registers +(also known as call-preserved registers or non-volatile registers). +The parameters are as follows: + +@itemize +@item +@var{cost_type} is @samp{spill_cost_type::SAVE} for saving a register +and @samp{spill_cost_type::RESTORE} for restoring a register. + +@item +@var{hard_regno} and @var{mode} represent the whole register that +the register allocator is considering using; of these, +@var{nregs} registers are fully or partially callee-saved. + +@item +@var{mem_cost} is the normal cost for storing (for saves) +or loading (for restores) the @var{nregs} registers. + +@item +@var{allocated_callee_regs} is the set of callee-saved registers +that are already in use. + +@item +@var{existing_spills_p} is true if the register allocator has +already decided to spill registers to memory. +@end itemize + +If @var{existing_spills_p} is false, the cost of a save should account +for frame allocations in a way that is consistent with +@code{TARGET_FRAME_ALLOCATION_COST}'s handling of allocations for spills. +Similarly, the cost of a restore should then account for frame deallocations +in a way that is consistent with @code{TARGET_FRAME_ALLOCATION_COST}'s +handling of deallocations. + +Note that this hook should not attempt to apply a frequency scale +to the cost: it is the caller's responsibility to do that where +appropriate. + +The default implementation returns @var{mem_cost}, plus the allocation +or deallocation cost returned by @code{TARGET_FRAME_ALLOCATION_COST}, +where appropriate. +@end deftypefn + +@deftypefn {Target Hook} int TARGET_FRAME_ALLOCATION_COST (frame_cost_type @var{cost_type}, const HARD_REG_SET @var{&allocated_callee_regs}) +Return the cost of allocating or deallocating a frame for the sake of +a spill; @var{cost_type} chooses between allocation and deallocation. +The term ``spill'' here includes both forcing a pseudo register to memory +and using caller-saved registers for pseudo registers that are live across +a call. + +This hook is only called if the register allocator has not so far +decided to spill. The allocator may have decided to use callee-saved +registers; if so, @var{allocated_callee_regs} is the set of callee-saved +registers that the allocator has used. There might also be other reasons +why a stack frame is already needed; for example, @samp{get_frame_size ()} +might be nonzero, or the target might already require a frame for +target-specific reasons. + +When the register allocator uses this hook to cost spills, it also uses +@code{TARGET_CALLEE_SAVE_COST} to cost new callee-saved registers, passing +@samp{false} as the @var{existing_spills_p} argument. The intention is to +allow the target to apply an apples-for-apples comparison between the +cost of using callee-saved registers and using spills in cases where the +allocator has not yet committed to using both strategies. + +The default implementation returns 0. +@end deftypefn + @defmac BRANCH_COST (@var{speed_p}, @var{predictable_p}) A C expression for the cost of a branch instruction. A value of 1 is the default; other values are interpreted relative to that. Parameter diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 6dbe225..eccc4d8 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -2388,8 +2388,6 @@ in the reload pass. @hook TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS -@hook TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE - @hook TARGET_LRA_P @hook TARGET_REGISTER_PRIORITY @@ -4584,6 +4582,10 @@ These macros are obsolete, new ports should use the target hook @hook TARGET_MEMORY_MOVE_COST +@hook TARGET_CALLEE_SAVE_COST + +@hook TARGET_FRAME_ALLOCATION_COST + @defmac BRANCH_COST (@var{speed_p}, @var{predictable_p}) A C expression for the cost of a branch instruction. A value of 1 is the default; other values are interpreted relative to that. Parameter diff --git a/gcc/hard-reg-set.h b/gcc/hard-reg-set.h index 48025d2..0d03aed 100644 --- a/gcc/hard-reg-set.h +++ b/gcc/hard-reg-set.h @@ -191,6 +191,12 @@ hard_reg_set_empty_p (const_hard_reg_set x) return x == HARD_CONST (0); } +inline int +hard_reg_set_popcount (const_hard_reg_set x) +{ + return popcount_hwi (x); +} + #else inline void @@ -254,6 +260,15 @@ hard_reg_set_empty_p (const_hard_reg_set x) bad |= x.elts[i]; return bad == 0; } + +inline int +hard_reg_set_popcount (const_hard_reg_set x) +{ + int count = 0; + for (unsigned int i = 0; i < ARRAY_SIZE (x.elts); ++i) + count += popcount_hwi (x.elts[i]); + return count; +} #endif /* Iterator for hard register sets. */ diff --git a/gcc/ira-color.cc b/gcc/ira-color.cc index 233060e..4b92960 100644 --- a/gcc/ira-color.cc +++ b/gcc/ira-color.cc @@ -1195,10 +1195,16 @@ finish_update_cost_records (void) update_cost_record_pool.release (); } +/* True if we have allocated memory, or intend to do so. */ +static bool allocated_memory_p; + /* Array whose element value is TRUE if the corresponding hard register was already allocated for an allocno. */ static bool allocated_hardreg_p[FIRST_PSEUDO_REGISTER]; +/* Which callee-saved hard registers we've decided to save. */ +static HARD_REG_SET allocated_callee_save_regs; + /* Describes one element in a queue of allocnos whose costs need to be updated. Each allocno in the queue is known to have an allocno class. */ @@ -1740,6 +1746,20 @@ check_hard_reg_p (ira_allocno_t a, int hard_regno, return j == nregs; } +/* Record that we have allocated NREGS registers starting at HARD_REGNO. */ + +static void +record_allocation (int hard_regno, int nregs) +{ + for (int i = 0; i < nregs; ++i) + if (!allocated_hardreg_p[hard_regno + i]) + { + allocated_hardreg_p[hard_regno + i] = true; + if (!crtl->abi->clobbers_full_reg_p (hard_regno + i)) + SET_HARD_REG_BIT (allocated_callee_save_regs, hard_regno + i); + } +} + /* Return number of registers needed to be saved and restored at function prologue/epilogue if we allocate HARD_REGNO to hold value of MODE. */ @@ -1961,6 +1981,12 @@ assign_hard_reg (ira_allocno_t a, bool retry_p) #endif auto_bitmap allocnos_to_spill; HARD_REG_SET soft_conflict_regs = {}; + int entry_freq = REG_FREQ_FROM_BB (ENTRY_BLOCK_PTR_FOR_FN (cfun)); + int exit_freq = REG_FREQ_FROM_BB (EXIT_BLOCK_PTR_FOR_FN (cfun)); + int spill_cost = 0; + /* Whether we have spilled pseudos or used caller-saved registers for values + that are live across a call. */ + bool existing_spills_p = allocated_memory_p || caller_save_needed; ira_assert (! ALLOCNO_ASSIGNED_P (a)); get_conflict_and_start_profitable_regs (a, retry_p, @@ -1979,6 +2005,18 @@ assign_hard_reg (ira_allocno_t a, bool retry_p) start_update_cost (); mem_cost += ALLOCNO_UPDATED_MEMORY_COST (a); + if (!existing_spills_p) + { + auto entry_cost = targetm.frame_allocation_cost + (frame_cost_type::ALLOCATION, allocated_callee_save_regs); + spill_cost += entry_cost * entry_freq; + + auto exit_cost = targetm.frame_allocation_cost + (frame_cost_type::DEALLOCATION, allocated_callee_save_regs); + spill_cost += exit_cost * exit_freq; + } + mem_cost += spill_cost; + ira_allocate_and_copy_costs (&ALLOCNO_UPDATED_HARD_REG_COSTS (a), aclass, ALLOCNO_HARD_REG_COSTS (a)); a_costs = ALLOCNO_UPDATED_HARD_REG_COSTS (a); @@ -2175,16 +2213,37 @@ assign_hard_reg (ira_allocno_t a, bool retry_p) /* We need to save/restore the hard register in epilogue/prologue. Therefore we increase the cost. */ { + int nregs = hard_regno_nregs (hard_regno, mode); + add_cost = 0; rclass = REGNO_REG_CLASS (hard_regno); - add_cost = ((ira_memory_move_cost[mode][rclass][0] - + ira_memory_move_cost[mode][rclass][1]) - * saved_nregs / hard_regno_nregs (hard_regno, - mode) - 1) - * targetm.ira_callee_saved_register_cost_scale (hard_regno); + + auto entry_cost = targetm.callee_save_cost + (spill_cost_type::SAVE, hard_regno, mode, saved_nregs, + ira_memory_move_cost[mode][rclass][0] * saved_nregs / nregs, + allocated_callee_save_regs, existing_spills_p); + /* In the event of a tie between caller-save and callee-save, + prefer callee-save. We apply this to the entry cost rather + than the exit cost since the entry frequency must be at + least as high as the exit frequency. */ + if (entry_cost > 1) + entry_cost -= 1; + add_cost += entry_cost * entry_freq; + + auto exit_cost = targetm.callee_save_cost + (spill_cost_type::RESTORE, hard_regno, mode, saved_nregs, + ira_memory_move_cost[mode][rclass][1] * saved_nregs / nregs, + allocated_callee_save_regs, existing_spills_p); + add_cost += exit_cost * exit_freq; + cost += add_cost; full_cost += add_cost; } } + if (ira_need_caller_save_p (a, hard_regno)) + { + cost += spill_cost; + full_cost += spill_cost; + } if (min_cost > cost) min_cost = cost; if (min_full_cost > full_cost) @@ -2211,11 +2270,13 @@ assign_hard_reg (ira_allocno_t a, bool retry_p) fail: if (best_hard_regno >= 0) { - for (i = hard_regno_nregs (best_hard_regno, mode) - 1; i >= 0; i--) - allocated_hardreg_p[best_hard_regno + i] = true; + record_allocation (best_hard_regno, + hard_regno_nregs (best_hard_regno, mode)); spill_soft_conflicts (a, allocnos_to_spill, soft_conflict_regs, best_hard_regno); } + else + allocated_memory_p = true; if (! retry_p) restore_costs_from_copies (a); ALLOCNO_HARD_REGNO (a) = best_hard_regno; @@ -3368,8 +3429,7 @@ improve_allocation (void) /* Assign the best chosen hard register to A. */ ALLOCNO_HARD_REGNO (a) = best; - for (j = nregs - 1; j >= 0; j--) - allocated_hardreg_p[best + j] = true; + record_allocation (best, nregs); if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) fprintf (ira_dump_file, "Assigning %d to a%dr%d\n", @@ -5199,6 +5259,7 @@ color (void) { allocno_stack_vec.create (ira_allocnos_num); memset (allocated_hardreg_p, 0, sizeof (allocated_hardreg_p)); + CLEAR_HARD_REG_SET (allocated_callee_save_regs); ira_initiate_assign (); do_coloring (); ira_finish_assign (); @@ -5327,10 +5388,14 @@ ira_color (void) ira_allocno_iterator ai; /* Setup updated costs. */ + allocated_memory_p = false; FOR_EACH_ALLOCNO (a, ai) { ALLOCNO_UPDATED_MEMORY_COST (a) = ALLOCNO_MEMORY_COST (a); ALLOCNO_UPDATED_CLASS_COST (a) = ALLOCNO_CLASS_COST (a); + if (ALLOCNO_CLASS (a) == NO_REGS + && !ira_equiv_no_lvalue_p (ALLOCNO_REGNO (a))) + allocated_memory_p = true; } if (ira_conflicts_p) color (); diff --git a/gcc/target.def b/gcc/target.def index c348b15..6c7cdc8 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -3776,6 +3776,81 @@ are the same as to this target hook.", default_memory_move_cost) DEFHOOK +(callee_save_cost, + "Return the one-off cost of saving or restoring callee-saved registers\n\ +(also known as call-preserved registers or non-volatile registers).\n\ +The parameters are as follows:\n\ +\n\ +@itemize\n\ +@item\n\ +@var{cost_type} is @samp{spill_cost_type::SAVE} for saving a register\n\ +and @samp{spill_cost_type::RESTORE} for restoring a register.\n\ +\n\ +@item\n\ +@var{hard_regno} and @var{mode} represent the whole register that\n\ +the register allocator is considering using; of these,\n\ +@var{nregs} registers are fully or partially callee-saved.\n\ +\n\ +@item\n\ +@var{mem_cost} is the normal cost for storing (for saves)\n\ +or loading (for restores) the @var{nregs} registers.\n\ +\n\ +@item\n\ +@var{allocated_callee_regs} is the set of callee-saved registers\n\ +that are already in use.\n\ +\n\ +@item\n\ +@var{existing_spills_p} is true if the register allocator has\n\ +already decided to spill registers to memory.\n\ +@end itemize\n\ +\n\ +If @var{existing_spills_p} is false, the cost of a save should account\n\ +for frame allocations in a way that is consistent with\n\ +@code{TARGET_FRAME_ALLOCATION_COST}'s handling of allocations for spills.\n\ +Similarly, the cost of a restore should then account for frame deallocations\n\ +in a way that is consistent with @code{TARGET_FRAME_ALLOCATION_COST}'s\n\ +handling of deallocations.\n\ +\n\ +Note that this hook should not attempt to apply a frequency scale\n\ +to the cost: it is the caller's responsibility to do that where\n\ +appropriate.\n\ +\n\ +The default implementation returns @var{mem_cost}, plus the allocation\n\ +or deallocation cost returned by @code{TARGET_FRAME_ALLOCATION_COST},\n\ +where appropriate.", + int, (spill_cost_type cost_type, unsigned int hard_regno, + machine_mode mode, unsigned int nregs, int mem_cost, + const HARD_REG_SET &allocated_callee_regs, bool existing_spills_p), + default_callee_save_cost) + +DEFHOOK +(frame_allocation_cost, + "Return the cost of allocating or deallocating a frame for the sake of\n\ +a spill; @var{cost_type} chooses between allocation and deallocation.\n\ +The term ``spill'' here includes both forcing a pseudo register to memory\n\ +and using caller-saved registers for pseudo registers that are live across\n\ +a call.\n\ +\n\ +This hook is only called if the register allocator has not so far\n\ +decided to spill. The allocator may have decided to use callee-saved\n\ +registers; if so, @var{allocated_callee_regs} is the set of callee-saved\n\ +registers that the allocator has used. There might also be other reasons\n\ +why a stack frame is already needed; for example, @samp{get_frame_size ()}\n\ +might be nonzero, or the target might already require a frame for\n\ +target-specific reasons.\n\ +\n\ +When the register allocator uses this hook to cost spills, it also uses\n\ +@code{TARGET_CALLEE_SAVE_COST} to cost new callee-saved registers, passing\n\ +@samp{false} as the @var{existing_spills_p} argument. The intention is to\n\ +allow the target to apply an apples-for-apples comparison between the\n\ +cost of using callee-saved registers and using spills in cases where the\n\ +allocator has not yet committed to using both strategies.\n\ +\n\ +The default implementation returns 0.", + int, (frame_cost_type cost_type, const HARD_REG_SET &allocated_callee_regs), + default_frame_allocation_cost) + +DEFHOOK (use_by_pieces_infrastructure_p, "GCC will attempt several strategies when asked to copy between\n\ two areas of memory, or to set, clear or store to memory, for example\n\ @@ -5714,18 +5789,6 @@ DEFHOOK reg_class_t, (int, reg_class_t, reg_class_t), default_ira_change_pseudo_allocno_class) -/* Scale of callee-saved register cost in epilogue and prologue used by - IRA. */ -DEFHOOK -(ira_callee_saved_register_cost_scale, - "A target hook which returns the callee-saved register @var{hard_regno}\n\ -cost scale in epilogue and prologue used by IRA.\n\ -\n\ -The default version of this target hook returns 1 if optimizing for\n\ -size, otherwise returns the entry block frequency.", - int, (int hard_regno), - default_ira_callee_saved_register_cost_scale) - /* Return true if we use LRA instead of reload. */ DEFHOOK (lra_p, diff --git a/gcc/target.h b/gcc/target.h index 3e1ee68..2bf35e2 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -284,6 +284,18 @@ enum poly_value_estimate_kind POLY_VALUE_LIKELY }; +enum class spill_cost_type +{ + SAVE, + RESTORE +}; + +enum class frame_cost_type +{ + ALLOCATION, + DEALLOCATION +}; + typedef void (*emit_support_tinfos_callback) (tree); extern bool verify_type_context (location_t, type_context_kind, const_tree, diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc index 344075e..c79458e 100644 --- a/gcc/targhooks.cc +++ b/gcc/targhooks.cc @@ -2083,6 +2083,33 @@ default_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, #endif } +/* The default implementation of TARGET_CALLEE_SAVE_COST. */ + +int +default_callee_save_cost (spill_cost_type spill_type, unsigned int, + machine_mode, unsigned int, int mem_cost, + const HARD_REG_SET &callee_saved_regs, + bool existing_spills_p) +{ + if (!existing_spills_p) + { + auto frame_type = (spill_type == spill_cost_type::SAVE + ? frame_cost_type::ALLOCATION + : frame_cost_type::DEALLOCATION); + mem_cost += targetm.frame_allocation_cost (frame_type, + callee_saved_regs); + } + return mem_cost; +} + +/* The default implementation of TARGET_FRAME_ALLOCATION_COST. */ + +int +default_frame_allocation_cost (frame_cost_type, const HARD_REG_SET &) +{ + return 0; +} + /* The default implementation of TARGET_SLOW_UNALIGNED_ACCESS. */ bool diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 8871e01..f16b587 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -235,6 +235,11 @@ extern tree default_builtin_tm_load_store (tree); extern int default_memory_move_cost (machine_mode, reg_class_t, bool); extern int default_register_move_cost (machine_mode, reg_class_t, reg_class_t); +extern int default_callee_save_cost (spill_cost_type, unsigned int, + machine_mode, unsigned int, int, + const HARD_REG_SET &, bool); +extern int default_frame_allocation_cost (frame_cost_type, + const HARD_REG_SET &); extern bool default_slow_unaligned_access (machine_mode, unsigned int); extern HOST_WIDE_INT default_estimated_poly_value (poly_int64, poly_value_estimate_kind); diff --git a/gcc/testsuite/gcc.target/aarch64/callee_save_1.c b/gcc/testsuite/gcc.target/aarch64/callee_save_1.c new file mode 100644 index 0000000..f284861 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/callee_save_1.c @@ -0,0 +1,12 @@ +/* { dg-options "-O2" } */ + +int test (int x), test2 (int x); + +int foo (int x, int y) { + test (x); + int lhs = test2 (y); + return x + lhs; +} + +/* { dg-final { scan-assembler {\tstp\tx19, x20, \[sp,} } } */ +/* { dg-final { scan-assembler {\tldp\tx19, x20, \[sp,} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/callee_save_2.c b/gcc/testsuite/gcc.target/aarch64/callee_save_2.c new file mode 100644 index 0000000..744b464 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/callee_save_2.c @@ -0,0 +1,14 @@ +/* { dg-options "-O2 -fomit-frame-pointer" } */ + +int test (int x), test2 (int x); + +int foo (int x, int y) { + test (x); + int lhs = test2 (y); + return x + lhs; +} + +/* { dg-final { scan-assembler {\tstp\tx30, x19, \[sp,} } } */ +/* { dg-final { scan-assembler {\tldp\tx30, x19, \[sp\],} } } */ +/* { dg-final { scan-assembler {\tstr\tw1, \[sp,} } } */ +/* { dg-final { scan-assembler {\tldr\tw0, \[sp,} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/callee_save_3.c b/gcc/testsuite/gcc.target/aarch64/callee_save_3.c new file mode 100644 index 0000000..50b6853 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/callee_save_3.c @@ -0,0 +1,12 @@ +/* { dg-options "-O2" } */ + +float test (); +float g; + +float foo (float x, float y) { + g = x + test (); + return (x + test ()) * y; +} + +/* { dg-final { scan-assembler {\tstp\td14, d15, \[sp,} } } */ +/* { dg-final { scan-assembler {\tldp\td14, d15, \[sp,} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr103350-1.c b/gcc/testsuite/gcc.target/aarch64/pr103350-1.c index a0e764e..129c6ac 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr103350-1.c +++ b/gcc/testsuite/gcc.target/aarch64/pr103350-1.c @@ -1,5 +1,5 @@ /* { dg-do run { target le } } */ -/* { dg-additional-options "-Os -fno-tree-ter -save-temps -fdump-rtl-ree-all -free -std=c99 -w" } */ +/* { dg-additional-options "-Os -fno-tree-ter -save-temps -fdump-rtl-ree-all -free -std=c99 -w -fno-caller-saves" } */ typedef unsigned char u8; typedef unsigned char __attribute__((__vector_size__ (8))) v64u8; |