diff options
author | John Brawn <john.brawn@arm.com> | 2023-07-31 16:55:49 +0100 |
---|---|---|
committer | John Brawn <john.brawn@arm.com> | 2023-08-07 17:53:32 +0100 |
commit | f83ab2b3beb0303e0775ea187d9575faa4a048eb (patch) | |
tree | 3089fd787140a6915d9e08f62f631cfeb92a172e /llvm/lib | |
parent | a749b32a11055aa680db7cdcb68687345132abe5 (diff) | |
download | llvm-f83ab2b3beb0303e0775ea187d9575faa4a048eb.zip llvm-f83ab2b3beb0303e0775ea187d9575faa4a048eb.tar.gz llvm-f83ab2b3beb0303e0775ea187d9575faa4a048eb.tar.bz2 |
[ARM] Improve generation of thumb stack accesses
Currently when a stack access is out of range of an sp-relative ldr or
str then we jump straight to generating the offset with a literal pool
load or mov32 pseudo-instruction. This patch improves that in two
ways:
* If the offset is within range of sp-relative add plus an ldr then
use that.
* When we use the mov32 pseudo-instruction, if putting part of the
offset into the ldr will simplify the expansion of the mov32 then
do so.
Differential Revision: https://reviews.llvm.org/D156875
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/ARM/ThumbRegisterInfo.cpp | 47 |
1 files changed, 37 insertions, 10 deletions
diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp index 0c010ed..d8eb6c3 100644 --- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -128,6 +128,19 @@ static void emitThumbRegPlusImmInReg( const ARMBaseRegisterInfo &MRI, unsigned MIFlags = MachineInstr::NoFlags) { MachineFunction &MF = *MBB.getParent(); const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>(); + + // Use a single sp-relative add if the immediate is small enough. + if (BaseReg == ARM::SP && + (DestReg.isVirtual() || isARMLowRegister(DestReg)) && NumBytes >= 0 && + NumBytes <= 1020 && (NumBytes % 4) == 0) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), DestReg) + .addReg(ARM::SP) + .addImm(NumBytes / 4) + .add(predOps(ARMCC::AL)) + .setMIFlags(MIFlags); + return; + } + bool isHigh = !isARMLowRegister(DestReg) || (BaseReg != 0 && !isARMLowRegister(BaseReg)); bool isSub = false; @@ -422,19 +435,33 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II, return true; } + // The offset doesn't fit, but we may be able to put some of the offset into + // the ldr to simplify the generation of the rest of it. NumBits = 5; Mask = (1 << NumBits) - 1; - - // If this is a thumb spill / restore, we will be using a constpool load to - // materialize the offset. - if (Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { - ImmOp.ChangeToImmediate(0); - } else { - // Otherwise, it didn't fit. Pull in what we can to simplify the immed. - ImmedOffset = ImmedOffset & Mask; - ImmOp.ChangeToImmediate(ImmedOffset); - Offset &= ~(Mask * Scale); + InstrOffs = 0; + auto &ST = MF.getSubtarget<ARMSubtarget>(); + // If using the maximum ldr offset will put the rest into the range of a + // single sp-relative add then do so. + if (FrameReg == ARM::SP && Offset - (Mask * Scale) <= 1020) { + InstrOffs = Mask; + } else if (ST.genExecuteOnly()) { + // With execute-only the offset is generated either with movw+movt or an + // add+lsl sequence. If subtracting an offset will make the top half zero + // then that saves a movt or lsl+add. Otherwise if we don't have movw then + // we may be able to subtract a value such that it makes the bottom byte + // zero, saving an add. + unsigned BottomBits = (Offset / Scale) & Mask; + bool CanMakeBottomByteZero = ((Offset - BottomBits * Scale) & 0xff) == 0; + bool TopHalfZero = (Offset & 0xffff0000) == 0; + bool CanMakeTopHalfZero = ((Offset - Mask * Scale) & 0xffff0000) == 0; + if (!TopHalfZero && CanMakeTopHalfZero) + InstrOffs = Mask; + else if (!ST.useMovt() && CanMakeBottomByteZero) + InstrOffs = BottomBits; } + ImmOp.ChangeToImmediate(InstrOffs); + Offset -= InstrOffs * Scale; } return Offset == 0; |