diff options
-rw-r--r-- | gcc/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 290 |
2 files changed, 300 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 98e170e..92b4a77 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2013-04-16 Greta Yorsh <Greta.Yorsh at arm.com> + + * config/arm/arm.c (emit_multi_reg_push): New declaration + for an existing function. + (arm_emit_strd_push): New function. + (arm_expand_prologue): Used here. + (arm_emit_ldrd_pop): New function. + (arm_expand_epilogue): Used here. + (arm_get_frame_offsets): Update condition. + (arm_emit_multi_reg_pop): Add a special case for load of a single + register with writeback. + 2013-04-16 Uros Bizjak <ubizjak@gmail.com> * doc/invoke.texi (i386 Option): Reword -mstack-protector-guard diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 89affa7..bac709f 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -173,6 +173,7 @@ static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); static tree arm_builtin_decl (unsigned, bool); static void emit_constant_insn (rtx cond, rtx pattern); static rtx emit_set_insn (rtx, rtx); +static rtx emit_multi_reg_push (unsigned long); static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode, tree, bool); static rtx arm_function_arg (cumulative_args_t, enum machine_mode, @@ -16693,6 +16694,148 @@ thumb2_emit_strd_push (unsigned long saved_regs_mask) return; } +/* STRD in ARM mode requires consecutive registers. This function emits STRD + whenever possible, otherwise it emits single-word stores. The first store + also allocates stack space for all saved registers, using writeback with + post-addressing mode. All other stores use offset addressing. If no STRD + can be emitted, this function emits a sequence of single-word stores, + and not an STM as before, because single-word stores provide more freedom + scheduling and can be turned into an STM by peephole optimizations. */ +static void +arm_emit_strd_push (unsigned long saved_regs_mask) +{ + int num_regs = 0; + int i, j, dwarf_index = 0; + int offset = 0; + rtx dwarf = NULL_RTX; + rtx insn = NULL_RTX; + rtx tmp, mem; + + /* TODO: A more efficient code can be emitted by changing the + layout, e.g., first push all pairs that can use STRD to keep the + stack aligned, and then push all other registers. */ + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + num_regs++; + + gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM))); + gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM))); + gcc_assert (num_regs > 0); + + /* Create sequence for DWARF info. */ + dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1)); + + /* For dwarf info, we generate explicit stack update. */ + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_index++) = tmp; + + /* Save registers. */ + offset = - 4 * num_regs; + j = 0; + while (j <= LAST_ARM_REGNUM) + if (saved_regs_mask & (1 << j)) + { + if ((j % 2 == 0) + && (saved_regs_mask & (1 << (j + 1)))) + { + /* Current register and previous register form register pair for + which STRD can be generated. */ + if (offset < 0) + { + /* Allocate stack space for all saved registers. */ + tmp = plus_constant (Pmode, stack_pointer_rtx, offset); + tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp); + mem = gen_frame_mem (DImode, tmp); + offset = 0; + } + else if (offset > 0) + mem = gen_frame_mem (DImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + else + mem = gen_frame_mem (DImode, stack_pointer_rtx); + + tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j)); + RTX_FRAME_RELATED_P (tmp) = 1; + tmp = emit_insn (tmp); + + /* Record the first store insn. */ + if (dwarf_index == 1) + insn = tmp; + + /* Generate dwarf info. */ + mem = gen_frame_mem (SImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_index++) = tmp; + + mem = gen_frame_mem (SImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset + 4)); + tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_index++) = tmp; + + offset += 8; + j += 2; + } + else + { + /* Emit a single word store. */ + if (offset < 0) + { + /* Allocate stack space for all saved registers. */ + tmp = plus_constant (Pmode, stack_pointer_rtx, offset); + tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp); + mem = gen_frame_mem (SImode, tmp); + offset = 0; + } + else if (offset > 0) + mem = gen_frame_mem (SImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + else + mem = gen_frame_mem (SImode, stack_pointer_rtx); + + tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j)); + RTX_FRAME_RELATED_P (tmp) = 1; + tmp = emit_insn (tmp); + + /* Record the first store insn. */ + if (dwarf_index == 1) + insn = tmp; + + /* Generate dwarf info. */ + mem = gen_frame_mem (SImode, + plus_constant(Pmode, + stack_pointer_rtx, + offset)); + tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_index++) = tmp; + + offset += 4; + j += 1; + } + } + else + j++; + + /* Attach dwarf info to the first insn we generate. */ + gcc_assert (insn != NULL_RTX); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + RTX_FRAME_RELATED_P (insn) = 1; +} + /* Generate and emit an insn that we will recognize as a push_multi. Unfortunately, since this insn does not reflect very well the actual semantics of the operation, we need to annotate the insn for the benefit @@ -16892,6 +17035,17 @@ arm_emit_multi_reg_pop (unsigned long saved_regs_mask) if (saved_regs_mask & (1 << i)) { reg = gen_rtx_REG (SImode, i); + if ((num_regs == 1) && emit_update && !return_in_pc) + { + /* Emit single load with writeback. */ + tmp = gen_frame_mem (SImode, + gen_rtx_POST_INC (Pmode, + stack_pointer_rtx)); + tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp)); + REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + return; + } + tmp = gen_rtx_SET (VOIDmode, reg, gen_frame_mem @@ -17123,6 +17277,129 @@ thumb2_emit_ldrd_pop (unsigned long saved_regs_mask) return; } +/* LDRD in ARM mode needs consecutive registers as operands. This function + emits LDRD whenever possible, otherwise it emits single-word loads. It uses + offset addressing and then generates one separate stack udpate. This provides + more scheduling freedom, compared to writeback on every load. However, + if the function returns using load into PC directly + (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated + before the last load. TODO: Add a peephole optimization to recognize + the new epilogue sequence as an LDM instruction whenever possible. TODO: Add + peephole optimization to merge the load at stack-offset zero + with the stack update instruction using load with writeback + in post-index addressing mode. */ +static void +arm_emit_ldrd_pop (unsigned long saved_regs_mask) +{ + int j = 0; + int offset = 0; + rtx par = NULL_RTX; + rtx dwarf = NULL_RTX; + rtx tmp, mem; + + /* Restore saved registers. */ + gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM)))); + j = 0; + while (j <= LAST_ARM_REGNUM) + if (saved_regs_mask & (1 << j)) + { + if ((j % 2) == 0 + && (saved_regs_mask & (1 << (j + 1))) + && (j + 1) != PC_REGNUM) + { + /* Current register and next register form register pair for which + LDRD can be generated. PC is always the last register popped, and + we handle it separately. */ + if (offset > 0) + mem = gen_frame_mem (DImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + else + mem = gen_frame_mem (DImode, stack_pointer_rtx); + + tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem); + RTX_FRAME_RELATED_P (tmp) = 1; + tmp = emit_insn (tmp); + + /* Generate dwarf info. */ + + dwarf = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, j), + NULL_RTX); + dwarf = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, j + 1), + dwarf); + + REG_NOTES (tmp) = dwarf; + + offset += 8; + j += 2; + } + else if (j != PC_REGNUM) + { + /* Emit a single word load. */ + if (offset > 0) + mem = gen_frame_mem (SImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + else + mem = gen_frame_mem (SImode, stack_pointer_rtx); + + tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem); + RTX_FRAME_RELATED_P (tmp) = 1; + tmp = emit_insn (tmp); + + /* Generate dwarf info. */ + REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, j), + NULL_RTX); + + offset += 4; + j += 1; + } + else /* j == PC_REGNUM */ + j++; + } + else + j++; + + /* Update the stack. */ + if (offset > 0) + { + tmp = gen_rtx_SET (Pmode, + stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + RTX_FRAME_RELATED_P (tmp) = 1; + emit_insn (tmp); + offset = 0; + } + + if (saved_regs_mask & (1 << PC_REGNUM)) + { + /* Only PC is to be popped. */ + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + XVECEXP (par, 0, 0) = ret_rtx; + tmp = gen_rtx_SET (SImode, + gen_rtx_REG (SImode, PC_REGNUM), + gen_frame_mem (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx))); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (par, 0, 1) = tmp; + par = emit_jump_insn (par); + + /* Generate dwarf info. */ + dwarf = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, PC_REGNUM), + NULL_RTX); + REG_NOTES (par) = dwarf; + } +} + /* Calculate the size of the return value that is passed in registers. */ static unsigned arm_size_return_regs (void) @@ -17332,9 +17609,10 @@ arm_get_frame_offsets (void) /* If it is safe to use r3, then do so. This sometimes generates better code on Thumb-2 by avoiding the need to use 32-bit push/pop instructions. */ - if (! any_sibcall_uses_r3 () + if (! any_sibcall_uses_r3 () && arm_size_return_regs () <= 12 - && (offsets->saved_regs_mask & (1 << 3)) == 0) + && (offsets->saved_regs_mask & (1 << 3)) == 0 + && (TARGET_THUMB2 || !current_tune->prefer_ldrd_strd)) { reg = 3; } @@ -17766,6 +18044,12 @@ arm_expand_prologue (void) { thumb2_emit_strd_push (live_regs_mask); } + else if (TARGET_ARM + && !TARGET_APCS_FRAME + && !IS_INTERRUPT (func_type)) + { + arm_emit_strd_push (live_regs_mask); + } else { insn = emit_multi_reg_push (live_regs_mask); @@ -23952,6 +24236,8 @@ arm_expand_epilogue (bool really_return) { if (TARGET_THUMB2) thumb2_emit_ldrd_pop (saved_regs_mask); + else if (TARGET_ARM && !IS_INTERRUPT (func_type)) + arm_emit_ldrd_pop (saved_regs_mask); else arm_emit_multi_reg_pop (saved_regs_mask); } |