aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Earnshaw <rearnsha@arm.com>2025-02-07 13:55:58 +0000
committerRichard Earnshaw <rearnsha@arm.com>2025-02-07 17:19:58 +0000
commit0b6453d5575d4aa773a1fe25060123bc6f539891 (patch)
treedf520db2760ae2a6712e3264a35c3446dfe3ef86
parent7bee37094c502de7c191ee5f2f9ce72789d27c99 (diff)
downloadgcc-0b6453d5575d4aa773a1fe25060123bc6f539891.zip
gcc-0b6453d5575d4aa773a1fe25060123bc6f539891.tar.gz
gcc-0b6453d5575d4aa773a1fe25060123bc6f539891.tar.bz2
arm: Prefer POP {lo-reg} over LDR lo-reg, ... for thumb2 [PR118089]
For thumb2, popping a single low register off the stack should prefer POP over LDR to mirror the behaviour of the PUSH on entry. This saves a couple of bytes in the resulting image. This is a relatively niche case as it's rare to push a single low register onto the stack, but still worth getting right. Whilst fixing this I've also restructured the code here somewhat to fix a bug I observed by inspection and to improve the code slightly. Firstly, the single register case is hoisted above the main loop. This not only avoids creating some RTL that immediately becomes garbage but also avoids us needing to check for this case in every iteration of the main loop body. Secondly, we iterate over just the non-zero bits in the reg mask rather than every bit and then checking if there's work to do for that bit. Finally, when emitting a pop that also pops SP off the stack we shouldn't be emitting a stack-adjust CFA note. The new SP value comes from the popped value, not from an adjustment of the previous SP value. gcc: PR target/118089 * config/arm/arm.cc (arm_emit_multi_reg_pop): Restructure. Don't emit LDR on thumb2 when POP can be used for smaller code. Don't add a CFA adjust note when SP is popped off the stack. gcc/testsuite: PR target/118089 * gcc.target/arm/thumb2-pop-loreg.c: New test.
-rw-r--r--gcc/config/arm/arm.cc99
-rw-r--r--gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c18
2 files changed, 75 insertions, 42 deletions
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 5034015..a95ddf8 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -22543,24 +22543,50 @@ static void
arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
{
int num_regs = 0;
- int i, j;
rtx par;
rtx dwarf = NULL_RTX;
rtx tmp, reg;
bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
int offset_adj;
int emit_update;
+ unsigned long reg_bits;
offset_adj = return_in_pc ? 1 : 0;
- for (i = 0; i <= LAST_ARM_REGNUM; i++)
- if (saved_regs_mask & (1 << i))
- num_regs++;
+ for (reg_bits = saved_regs_mask; reg_bits;
+ reg_bits &= ~(reg_bits & -reg_bits))
+ num_regs++;
gcc_assert (num_regs && num_regs <= 16);
/* If SP is in reglist, then we don't emit SP update insn. */
emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
+ /* If popping just one register, use LDR reg, [SP], #4, unless
+ we're generating Thumb code and reg is a low reg. */
+ if (num_regs == 1
+ && emit_update
+ && !return_in_pc
+ && (TARGET_ARM
+ /* For Thumb we want to use POP for a single low register. */
+ || (saved_regs_mask & ~0xff)))
+ {
+ int i = exact_log2 (saved_regs_mask);
+
+ rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
+ if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
+ dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
+ /* Emit single load with writeback. */
+ tmp = gen_frame_mem (SImode,
+ gen_rtx_POST_INC (Pmode,
+ stack_pointer_rtx));
+ tmp = emit_insn (gen_rtx_SET (reg, tmp));
+ REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
+ dwarf);
+ arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD,
+ stack_pointer_rtx, stack_pointer_rtx);
+ return;
+ }
+
/* The parallel needs to hold num_regs SETs
and one SET for the stack update. */
par = gen_rtx_PARALLEL (VOIDmode,
@@ -22582,50 +22608,39 @@ arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
}
/* Now restore every reg, which may include PC. */
- for (j = 0, i = 0; j < num_regs; i++)
- if (saved_regs_mask & (1 << i))
- {
- rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
- if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
- dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
- if ((num_regs == 1) && emit_update && !return_in_pc)
- {
- /* Emit single load with writeback. */
- tmp = gen_frame_mem (SImode,
- gen_rtx_POST_INC (Pmode,
- stack_pointer_rtx));
- tmp = emit_insn (gen_rtx_SET (reg, tmp));
- REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
- dwarf);
- arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD,
- stack_pointer_rtx, stack_pointer_rtx);
- return;
- }
-
- tmp = gen_rtx_SET (reg,
- gen_frame_mem
- (SImode,
- plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
- RTX_FRAME_RELATED_P (tmp) = 1;
- XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
-
- /* We need to maintain a sequence for DWARF info too. As dwarf info
- should not have PC, skip PC. */
- if (i != PC_REGNUM)
- dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
+ int j = 0;
+ int elt = emit_update + offset_adj;
+ for (reg_bits = saved_regs_mask; reg_bits;
+ reg_bits &= ~(reg_bits & -reg_bits))
+ {
+ int i = exact_log2 (reg_bits & -reg_bits);
+ rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
- j++;
- }
+ if (i == IP_REGNUM && arm_current_function_pac_enabled_p ())
+ dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
+ tmp = gen_rtx_SET (reg,
+ gen_frame_mem
+ (SImode,
+ plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
+ RTX_FRAME_RELATED_P (tmp) = 1;
+ XVECEXP (par, 0, elt) = tmp;
- if (return_in_pc)
- par = emit_jump_insn (par);
- else
- par = emit_insn (par);
+ /* We need to maintain a sequence for DWARF info too. As dwarf info
+ should not have PC, skip PC. */
+ if (i != PC_REGNUM)
+ dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
+ j++;
+ elt++;
+ }
+ par = return_in_pc ? emit_jump_insn (par) : emit_insn (par);
REG_NOTES (par) = dwarf;
- if (!return_in_pc)
+
+ if (!return_in_pc && emit_update)
arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
stack_pointer_rtx, stack_pointer_rtx);
+ else if (!return_in_pc)
+ RTX_FRAME_RELATED_P (par) = 1;
}
/* Generate and emit an insn pattern that we will recognize as a pop_multi
diff --git a/gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c b/gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c
new file mode 100644
index 0000000..6db66b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/thumb2-pop-loreg.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective_target arm_thumb2_ok } */
+/* { dg-options "-Os" } */
+
+int __attribute__((noinline)) f (void)
+{
+ asm ("");
+}
+
+int g (void)
+{
+ char buf[32];
+ register char *x asm ("r4") = buf;
+ asm volatile ("" : : "r" (x));
+ return f();
+}
+/* Unstacking a single low register in thumb2 should use POP. */
+/* { dg-final { scan-assembler "pop\t{r4}" } } */