diff options
author | Jan Hubicka <jh@suse.cz> | 2003-03-19 13:51:28 +0100 |
---|---|---|
committer | Jan Hubicka <hubicka@gcc.gnu.org> | 2003-03-19 12:51:28 +0000 |
commit | d9b40e8dbeca786fec7b2f01af13be0e8e892c39 (patch) | |
tree | 1c90fa89017a7fc684eead643f2ef294b91c3aa7 /gcc | |
parent | 38b2a9713feb97b645f4ecec5de23c0051261d6c (diff) | |
download | gcc-d9b40e8dbeca786fec7b2f01af13be0e8e892c39.zip gcc-d9b40e8dbeca786fec7b2f01af13be0e8e892c39.tar.gz gcc-d9b40e8dbeca786fec7b2f01af13be0e8e892c39.tar.bz2 |
i386.h (machine_function): New fields use_fast_prologue_epilogue.
* i386.h (machine_function): New fields use_fast_prologue_epilogue.
* i386.c (use_fast_prologue_epilogue): Remove.
(ix86_frame): New field save_regs-using_mov;
(ix86_compute_frame_layout): Decide on fast prologues;
allocate saved registers in red zone.
(ix86_expand_epilogue, ix86_expand_prolgoues): Obey new parameters.
From-SVN: r64579
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 93 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 3 |
3 files changed, 66 insertions, 39 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index cacac6f..29a35b8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +Wed Mar 19 11:28:45 CET 2003 Jan Hubicka <jh@suse.cz> + + * i386.h (machine_function): New fields use_fast_prologue_epilogue. + * i386.c (use_fast_prologue_epilogue): Remove. + (ix86_frame): New field save_regs-using_mov; + (ix86_compute_frame_layout): Decide on fast prologues; + allocate saved registers in red zone. + (ix86_expand_epilogue, ix86_expand_prolgoues): Obey new parameters. + 2003-03-19 Nick Clifton <nickc@redhat.com> * config/mcore/mcore.h (CPP_SPEC): Remove trailing semi-colon. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 2dddfb7..5959a91 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -530,10 +530,6 @@ const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO; epilogue code. */ #define FAST_PROLOGUE_INSN_COUNT 20 -/* Set by prologue expander and used by epilogue expander to determine - the style used. */ -static int use_fast_prologue_epilogue; - /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ static const char *const qi_reg_name[] = QI_REGISTER_NAMES; static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; @@ -724,6 +720,10 @@ struct ix86_frame HOST_WIDE_INT frame_pointer_offset; HOST_WIDE_INT hard_frame_pointer_offset; HOST_WIDE_INT stack_pointer_offset; + + /* When save_regs_using_mov is set, emit prologue using + move instead of push instructions. */ + bool save_regs_using_mov; }; /* Used to enable/disable debugging features. */ @@ -4914,6 +4914,37 @@ ix86_compute_frame_layout (frame) frame->nregs = ix86_nsaved_regs (); total_size = size; + if (!optimize_size && !reload_completed) + { + int count = frame->nregs; + + /* The fast prologue uses move instead of push to save registers. This + is significantly longer, but also executes faster as modern hardware + can execute the moves in parallel, but can't do that for push/pop. + + Be careful about choosing what prologue to emit: When function takes + many instructions to execute we may use slow version as well as in + case function is known to be outside hot spot (this is known with + feedback only). Weight the size of function by number of registers + to save as it is cheap to use one or two push instructions but very + slow to use many of them. */ + if (count) + count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; + if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL + || (flag_branch_probabilities + && cfun->function_frequency < FUNCTION_FREQUENCY_HOT)) + cfun->machine->use_fast_prologue_epilogue = false; + else + cfun->machine->use_fast_prologue_epilogue + = !expensive_function_p (count); + } + if (TARGET_PROLOGUE_USING_MOVE + && cfun->machine->use_fast_prologue_epilogue) + frame->save_regs_using_mov = true; + else + frame->save_regs_using_mov = false; + + /* Skip return address and saved base pointer. */ offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; @@ -4986,10 +5017,15 @@ ix86_compute_frame_layout (frame) (size + frame->padding1 + frame->padding2 + frame->outgoing_arguments_size + frame->va_arg_size); + if (!frame->to_allocate && frame->nregs <= 1) + frame->save_regs_using_mov = false; + if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging && current_function_is_leaf) { frame->red_zone_size = frame->to_allocate; + if (frame->save_regs_using_mov) + frame->red_zone_size += frame->nregs * UNITS_PER_WORD; if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; } @@ -5058,35 +5094,9 @@ ix86_expand_prologue () rtx insn; bool pic_reg_used; struct ix86_frame frame; - int use_mov = 0; HOST_WIDE_INT allocate; ix86_compute_frame_layout (&frame); - if (!optimize_size) - { - int count = frame.nregs; - - /* The fast prologue uses move instead of push to save registers. This - is significantly longer, but also executes faster as modern hardware - can execute the moves in parallel, but can't do that for push/pop. - - Be careful about choosing what prologue to emit: When function takes - many instructions to execute we may use slow version as well as in - case function is known to be outside hot spot (this is known with - feedback only). Weight the size of function by number of registers - to save as it is cheap to use one or two push instructions but very - slow to use many of them. */ - if (count) - count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; - if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL - || (flag_branch_probabilities - && cfun->function_frequency < FUNCTION_FREQUENCY_HOT)) - use_fast_prologue_epilogue = 0; - else - use_fast_prologue_epilogue = !expensive_function_p (count); - if (TARGET_PROLOGUE_USING_MOVE) - use_mov = use_fast_prologue_epilogue; - } /* Note: AT&T enter does NOT have reversed args. Enter is probably slower on all targets. Also sdb doesn't like it. */ @@ -5101,16 +5111,19 @@ ix86_expand_prologue () } allocate = frame.to_allocate; - /* In case we are dealing only with single register and empty frame, - push is equivalent of the mov+add sequence. */ - if (allocate == 0 && frame.nregs <= 1) - use_mov = 0; - if (!use_mov) + if (!frame.save_regs_using_mov) ix86_emit_save_regs (); else allocate += frame.nregs * UNITS_PER_WORD; + /* When using red zone we may start register saving before allocating + the stack frame saving one cycle of the prologue. */ + if (TARGET_RED_ZONE && frame.save_regs_using_mov) + ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx + : stack_pointer_rtx, + -frame.nregs * UNITS_PER_WORD); + if (allocate == 0) ; else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) @@ -5144,7 +5157,7 @@ ix86_expand_prologue () call. */ emit_insn (gen_blockage (const0_rtx)); } - if (use_mov) + if (frame.save_regs_using_mov && !TARGET_RED_ZONE) { if (!frame_pointer_needed || !frame.to_allocate) ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); @@ -5243,11 +5256,12 @@ ix86_expand_epilogue (style) tuning in future. */ if ((!sp_valid && frame.nregs <= 1) || (TARGET_EPILOGUE_USING_MOVE - && use_fast_prologue_epilogue + && cfun->machine->use_fast_prologue_epilogue && (frame.nregs > 1 || frame.to_allocate)) || (frame_pointer_needed && !frame.nregs && frame.to_allocate) || (frame_pointer_needed && TARGET_USE_LEAVE - && use_fast_prologue_epilogue && frame.nregs == 1) + && cfun->machine->use_fast_prologue_epilogue + && frame.nregs == 1) || current_function_calls_eh_return) { /* Restore registers. We can use ebp or esp to address the memory @@ -5294,7 +5308,8 @@ ix86_expand_epilogue (style) GEN_INT (frame.to_allocate + frame.nregs * UNITS_PER_WORD))); /* If not an i386, mov & pop is faster than "leave". */ - else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue) + else if (TARGET_USE_LEAVE || optimize_size + || !cfun->machine->use_fast_prologue_epilogue) emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); else { diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 4b21c86..ce66622 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -3220,6 +3220,9 @@ struct machine_function GTY(()) int save_varrargs_registers; int accesses_prev_frame; int optimize_mode_switching; + /* Set by ix86_compute_frame_layout and used by prologue/epilogue expander to + determine the style used. */ + int use_fast_prologue_epilogue; }; #define ix86_stack_locals (cfun->machine->stack_locals) |