aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-11 13:13:54 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-11 13:13:54 +0000
commitf5470a77425a54efebfe1732488c40f05ef176d0 (patch)
tree7c9ad00e360700db9b5efc7ff32f9f1e50591383
parent0d0e0188d80f337d2d997ac787b62cc7e8387612 (diff)
downloadgcc-f5470a77425a54efebfe1732488c40f05ef176d0.zip
gcc-f5470a77425a54efebfe1732488c40f05ef176d0.tar.gz
gcc-f5470a77425a54efebfe1732488c40f05ef176d0.tar.bz2
[AArch64] Rework interface to add constant/offset routines
The port had aarch64_add_offset and aarch64_add_constant routines that did similar things. This patch replaces them with an expanded version of aarch64_add_offset that takes separate source and destination registers. The new routine also takes a poly_int64 offset instead of a HOST_WIDE_INT offset, but it leaves the HOST_WIDE_INT case to aarch64_add_offset_1, which is basically a repurposed aarch64_add_constant_internal. The SVE patch will put the handling of VL-based constants in aarch64_add_offset, while still using aarch64_add_offset_1 for the constant part. The vcall_offset == 0 path in aarch64_output_mi_thunk will use temp0 as well as temp1 once SVE is added. A side-effect of the patch is that we now generate: mov x29, sp instead of: add x29, sp, 0 in the pr70044.c test. 2018-01-11 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64.c (aarch64_force_temporary): Assert that x exists before using it. (aarch64_add_constant_internal): Rename to... (aarch64_add_offset_1): ...this. Replace regnum with separate src and dest rtxes. Handle the case in which they're different, including when the offset is zero. Replace scratchreg with an rtx. Use 2 additions if there is no spare register into which we can move a 16-bit constant. (aarch64_add_constant): Delete. (aarch64_add_offset): Replace reg with separate src and dest rtxes. Take a poly_int64 offset instead of a HOST_WIDE_INT. Use aarch64_add_offset_1. (aarch64_add_sp, aarch64_sub_sp): Take the scratch register as an rtx rather than an int. Take the delta as a poly_int64 rather than a HOST_WIDE_INT. Use aarch64_add_offset. (aarch64_expand_mov_immediate): Update uses of aarch64_add_offset. (aarch64_expand_prologue): Update calls to aarch64_sub_sp, aarch64_allocate_and_probe_stack_space and aarch64_add_offset. (aarch64_expand_epilogue): Update calls to aarch64_add_offset and aarch64_add_sp. (aarch64_output_mi_thunk): Use aarch64_add_offset rather than aarch64_add_constant. gcc/testsuite/ * gcc.target/aarch64/pr70044.c: Allow "mov x29, sp" too. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256532
-rw-r--r--gcc/ChangeLog27
-rw-r--r--gcc/config/aarch64/aarch64.c205
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/pr70044.c2
4 files changed, 153 insertions, 87 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index fbd23bf..19a3757 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,4 +1,31 @@
2018-01-11 Richard Sandiford <richard.sandiford@linaro.org>
+ Alan Hayward <alan.hayward@arm.com>
+ David Sherwood <david.sherwood@arm.com>
+
+ * config/aarch64/aarch64.c (aarch64_force_temporary): Assert that
+ x exists before using it.
+ (aarch64_add_constant_internal): Rename to...
+ (aarch64_add_offset_1): ...this. Replace regnum with separate
+ src and dest rtxes. Handle the case in which they're different,
+ including when the offset is zero. Replace scratchreg with an rtx.
+ Use 2 additions if there is no spare register into which we can
+ move a 16-bit constant.
+ (aarch64_add_constant): Delete.
+ (aarch64_add_offset): Replace reg with separate src and dest
+ rtxes. Take a poly_int64 offset instead of a HOST_WIDE_INT.
+ Use aarch64_add_offset_1.
+ (aarch64_add_sp, aarch64_sub_sp): Take the scratch register as
+ an rtx rather than an int. Take the delta as a poly_int64
+ rather than a HOST_WIDE_INT. Use aarch64_add_offset.
+ (aarch64_expand_mov_immediate): Update uses of aarch64_add_offset.
+ (aarch64_expand_prologue): Update calls to aarch64_sub_sp,
+ aarch64_allocate_and_probe_stack_space and aarch64_add_offset.
+ (aarch64_expand_epilogue): Update calls to aarch64_add_offset
+ and aarch64_add_sp.
+ (aarch64_output_mi_thunk): Use aarch64_add_offset rather than
+ aarch64_add_constant.
+
+2018-01-11 Richard Sandiford <richard.sandiford@linaro.org>
* config/aarch64/aarch64.c (aarch64_reinterpret_float_as_int):
Use scalar_float_mode.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d8ae9d2..42d97d5 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1883,30 +1883,13 @@ aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
return force_reg (mode, value);
else
{
- x = aarch64_emit_move (x, value);
+ gcc_assert (x);
+ aarch64_emit_move (x, value);
return x;
}
}
-static rtx
-aarch64_add_offset (scalar_int_mode mode, rtx temp, rtx reg,
- HOST_WIDE_INT offset)
-{
- if (!aarch64_plus_immediate (GEN_INT (offset), mode))
- {
- rtx high;
- /* Load the full offset into a register. This
- might be improvable in the future. */
- high = GEN_INT (offset);
- offset = 0;
- high = aarch64_force_temporary (mode, temp, high);
- reg = aarch64_force_temporary (mode, temp,
- gen_rtx_PLUS (mode, high, reg));
- }
- return plus_constant (mode, reg, offset);
-}
-
static int
aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
scalar_int_mode mode)
@@ -2031,12 +2014,16 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
return num_insns;
}
-/* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to hold a
- temporary value if necessary. FRAME_RELATED_P should be true if
- the RTX_FRAME_RELATED flag should be set and CFA adjustments added
- to the generated instructions. If SCRATCHREG is known to hold
- abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
- immediate again.
+/* A subroutine of aarch64_add_offset. Set DEST to SRC + OFFSET for
+ a non-polynomial OFFSET. MODE is the mode of the addition.
+ FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
+ be set and CFA adjustments added to the generated instructions.
+
+ TEMP1, if nonnull, is a register of mode MODE that can be used as a
+ temporary if register allocation is already complete. This temporary
+ register may overlap DEST but must not overlap SRC. If TEMP1 is known
+ to hold abs (OFFSET), EMIT_MOVE_IMM can be set to false to avoid emitting
+ the immediate again.
Since this function may be used to adjust the stack pointer, we must
ensure that it cannot cause transient stack deallocation (for example
@@ -2044,73 +2031,119 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
large immediate). */
static void
-aarch64_add_constant_internal (scalar_int_mode mode, int regnum,
- int scratchreg, HOST_WIDE_INT delta,
- bool frame_related_p, bool emit_move_imm)
+aarch64_add_offset_1 (scalar_int_mode mode, rtx dest,
+ rtx src, HOST_WIDE_INT offset, rtx temp1,
+ bool frame_related_p, bool emit_move_imm)
{
- HOST_WIDE_INT mdelta = abs_hwi (delta);
- rtx this_rtx = gen_rtx_REG (mode, regnum);
+ gcc_assert (emit_move_imm || temp1 != NULL_RTX);
+ gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
+
+ HOST_WIDE_INT moffset = abs_hwi (offset);
rtx_insn *insn;
- if (!mdelta)
- return;
+ if (!moffset)
+ {
+ if (!rtx_equal_p (dest, src))
+ {
+ insn = emit_insn (gen_rtx_SET (dest, src));
+ RTX_FRAME_RELATED_P (insn) = frame_related_p;
+ }
+ return;
+ }
/* Single instruction adjustment. */
- if (aarch64_uimm12_shift (mdelta))
+ if (aarch64_uimm12_shift (moffset))
{
- insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta)));
+ insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (offset)));
RTX_FRAME_RELATED_P (insn) = frame_related_p;
return;
}
- /* Emit 2 additions/subtractions if the adjustment is less than 24 bits.
- Only do this if mdelta is not a 16-bit move as adjusting using a move
- is better. */
- if (mdelta < 0x1000000 && !aarch64_move_imm (mdelta, mode))
+ /* Emit 2 additions/subtractions if the adjustment is less than 24 bits
+ and either:
+
+ a) the offset cannot be loaded by a 16-bit move or
+ b) there is no spare register into which we can move it. */
+ if (moffset < 0x1000000
+ && ((!temp1 && !can_create_pseudo_p ())
+ || !aarch64_move_imm (moffset, mode)))
{
- HOST_WIDE_INT low_off = mdelta & 0xfff;
+ HOST_WIDE_INT low_off = moffset & 0xfff;
- low_off = delta < 0 ? -low_off : low_off;
- insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (low_off)));
+ low_off = offset < 0 ? -low_off : low_off;
+ insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (low_off)));
RTX_FRAME_RELATED_P (insn) = frame_related_p;
- insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta - low_off)));
+ insn = emit_insn (gen_add2_insn (dest, GEN_INT (offset - low_off)));
RTX_FRAME_RELATED_P (insn) = frame_related_p;
return;
}
/* Emit a move immediate if required and an addition/subtraction. */
- rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
if (emit_move_imm)
- aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode);
- insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx)
- : gen_add2_insn (this_rtx, scratch_rtx));
+ {
+ gcc_assert (temp1 != NULL_RTX || can_create_pseudo_p ());
+ temp1 = aarch64_force_temporary (mode, temp1, GEN_INT (moffset));
+ }
+ insn = emit_insn (offset < 0
+ ? gen_sub3_insn (dest, src, temp1)
+ : gen_add3_insn (dest, src, temp1));
if (frame_related_p)
{
RTX_FRAME_RELATED_P (insn) = frame_related_p;
- rtx adj = plus_constant (mode, this_rtx, delta);
- add_reg_note (insn , REG_CFA_ADJUST_CFA, gen_rtx_SET (this_rtx, adj));
+ rtx adj = plus_constant (mode, src, offset);
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (dest, adj));
}
}
-static inline void
-aarch64_add_constant (scalar_int_mode mode, int regnum, int scratchreg,
- HOST_WIDE_INT delta)
+/* Set DEST to SRC + OFFSET. MODE is the mode of the addition.
+ FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
+ be set and CFA adjustments added to the generated instructions.
+
+ TEMP1, if nonnull, is a register of mode MODE that can be used as a
+ temporary if register allocation is already complete. This temporary
+ register may overlap DEST but must not overlap SRC. If TEMP1 is known
+ to hold abs (OFFSET), EMIT_MOVE_IMM can be set to false to avoid emitting
+ the immediate again.
+
+ Since this function may be used to adjust the stack pointer, we must
+ ensure that it cannot cause transient stack deallocation (for example
+ by first incrementing SP and then decrementing when adjusting by a
+ large immediate). */
+
+static void
+aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
+ poly_int64 offset, rtx temp1, bool frame_related_p,
+ bool emit_move_imm = true)
{
- aarch64_add_constant_internal (mode, regnum, scratchreg, delta, false, true);
+ gcc_assert (emit_move_imm || temp1 != NULL_RTX);
+ gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
+
+ /* SVE support will go here. */
+ HOST_WIDE_INT constant = offset.to_constant ();
+ aarch64_add_offset_1 (mode, dest, src, constant, temp1,
+ frame_related_p, emit_move_imm);
}
+/* Add DELTA to the stack pointer, marking the instructions frame-related.
+ TEMP1 is available as a temporary if nonnull. EMIT_MOVE_IMM is false
+ if TEMP1 already contains abs (DELTA). */
+
static inline void
-aarch64_add_sp (int scratchreg, HOST_WIDE_INT delta, bool emit_move_imm)
+aarch64_add_sp (rtx temp1, poly_int64 delta, bool emit_move_imm)
{
- aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, delta,
- true, emit_move_imm);
+ aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, delta,
+ temp1, true, emit_move_imm);
}
+/* Subtract DELTA from the stack pointer, marking the instructions
+ frame-related if FRAME_RELATED_P. TEMP1 is available as a temporary
+ if nonnull. */
+
static inline void
-aarch64_sub_sp (int scratchreg, HOST_WIDE_INT delta, bool frame_related_p)
+aarch64_sub_sp (rtx temp1, poly_int64 delta, bool frame_related_p)
{
- aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, -delta,
- frame_related_p, true);
+ aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, -delta,
+ temp1, frame_related_p);
}
void
@@ -2143,9 +2176,8 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
{
gcc_assert (can_create_pseudo_p ());
base = aarch64_force_temporary (int_mode, dest, base);
- base = aarch64_add_offset (int_mode, NULL, base,
- INTVAL (offset));
- aarch64_emit_move (dest, base);
+ aarch64_add_offset (int_mode, dest, base, INTVAL (offset),
+ NULL_RTX, false);
return;
}
@@ -2184,9 +2216,8 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
{
gcc_assert(can_create_pseudo_p ());
base = aarch64_force_temporary (int_mode, dest, base);
- base = aarch64_add_offset (int_mode, NULL, base,
- INTVAL (offset));
- aarch64_emit_move (dest, base);
+ aarch64_add_offset (int_mode, dest, base, INTVAL (offset),
+ NULL_RTX, false);
return;
}
/* FALLTHRU */
@@ -3738,7 +3769,10 @@ aarch64_expand_prologue (void)
aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
}
- aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
+ rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
+ rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
+
+ aarch64_sub_sp (ip0_rtx, initial_adjust, true);
if (callee_adjust != 0)
aarch64_push_regs (reg1, reg2, callee_adjust);
@@ -3748,10 +3782,9 @@ aarch64_expand_prologue (void)
if (callee_adjust == 0)
aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM,
R30_REGNUM, false);
- insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
- stack_pointer_rtx,
- GEN_INT (callee_offset)));
- RTX_FRAME_RELATED_P (insn) = frame_pointer_needed;
+ aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
+ stack_pointer_rtx, callee_offset, ip1_rtx,
+ frame_pointer_needed);
emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
}
@@ -3759,7 +3792,7 @@ aarch64_expand_prologue (void)
callee_adjust != 0 || emit_frame_chain);
aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
callee_adjust != 0 || emit_frame_chain);
- aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
+ aarch64_sub_sp (ip1_rtx, final_adjust, !frame_pointer_needed);
}
/* Return TRUE if we can use a simple_return insn.
@@ -3815,17 +3848,16 @@ aarch64_expand_epilogue (bool for_sibcall)
/* Restore the stack pointer from the frame pointer if it may not
be the same as the stack pointer. */
+ rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
+ rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
if (frame_pointer_needed && (final_adjust || cfun->calls_alloca))
- {
- insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
- hard_frame_pointer_rtx,
- GEN_INT (-callee_offset)));
- /* If writeback is used when restoring callee-saves, the CFA
- is restored on the instruction doing the writeback. */
- RTX_FRAME_RELATED_P (insn) = callee_adjust == 0;
- }
+ /* If writeback is used when restoring callee-saves, the CFA
+ is restored on the instruction doing the writeback. */
+ aarch64_add_offset (Pmode, stack_pointer_rtx,
+ hard_frame_pointer_rtx, -callee_offset,
+ ip1_rtx, callee_adjust == 0);
else
- aarch64_add_sp (IP1_REGNUM, final_adjust, df_regs_ever_live_p (IP1_REGNUM));
+ aarch64_add_sp (ip1_rtx, final_adjust, df_regs_ever_live_p (IP1_REGNUM));
aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
callee_adjust != 0, &cfi_ops);
@@ -3848,7 +3880,7 @@ aarch64_expand_epilogue (bool for_sibcall)
cfi_ops = NULL;
}
- aarch64_add_sp (IP0_REGNUM, initial_adjust, df_regs_ever_live_p (IP0_REGNUM));
+ aarch64_add_sp (ip0_rtx, initial_adjust, df_regs_ever_live_p (IP0_REGNUM));
if (cfi_ops)
{
@@ -3953,16 +3985,16 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
reload_completed = 1;
emit_note (NOTE_INSN_PROLOGUE_END);
+ this_rtx = gen_rtx_REG (Pmode, this_regno);
+ temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
+ temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
+
if (vcall_offset == 0)
- aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
+ aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, false);
else
{
gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
- this_rtx = gen_rtx_REG (Pmode, this_regno);
- temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
- temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
-
addr = this_rtx;
if (delta != 0)
{
@@ -3970,7 +4002,8 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
plus_constant (Pmode, this_rtx, delta));
else
- aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
+ aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1,
+ false);
}
if (Pmode == ptr_mode)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d54bc9f..afd7327 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2018-01-11 Richard Sandiford <richard.sandiford@linaro.org>
+ Alan Hayward <alan.hayward@arm.com>
+ David Sherwood <david.sherwood@arm.com>
+
+ * gcc.target/aarch64/pr70044.c: Allow "mov x29, sp" too.
+
2018-01-11 Tamar Christina <tamar.christina@arm.com>
* gcc.target/aarch64/advsimd-intrinsics/vdot-compile-2.c: New.
diff --git a/gcc/testsuite/gcc.target/aarch64/pr70044.c b/gcc/testsuite/gcc.target/aarch64/pr70044.c
index 1a84941..6080a07 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr70044.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr70044.c
@@ -11,4 +11,4 @@ main (int argc, char **argv)
}
/* Check that the frame pointer really is created. */
-/* { dg-final { scan-lto-assembler "add x29, sp," } } */
+/* { dg-final { scan-lto-assembler "(mov|add) x29, sp" } } */