aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorJohn Brawn <john.brawn@arm.com>2023-07-31 16:55:49 +0100
committerJohn Brawn <john.brawn@arm.com>2023-08-07 17:53:32 +0100
commitf83ab2b3beb0303e0775ea187d9575faa4a048eb (patch)
tree3089fd787140a6915d9e08f62f631cfeb92a172e /llvm/lib
parenta749b32a11055aa680db7cdcb68687345132abe5 (diff)
downloadllvm-f83ab2b3beb0303e0775ea187d9575faa4a048eb.zip
llvm-f83ab2b3beb0303e0775ea187d9575faa4a048eb.tar.gz
llvm-f83ab2b3beb0303e0775ea187d9575faa4a048eb.tar.bz2
[ARM] Improve generation of thumb stack accesses
Currently when a stack access is out of range of an sp-relative ldr or str then we jump straight to generating the offset with a literal pool load or mov32 pseudo-instruction. This patch improves that in two ways: * If the offset is within range of sp-relative add plus an ldr then use that. * When we use the mov32 pseudo-instruction, if putting part of the offset into the ldr will simplify the expansion of the mov32 then do so. Differential Revision: https://reviews.llvm.org/D156875
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/ARM/ThumbRegisterInfo.cpp47
1 files changed, 37 insertions, 10 deletions
diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
index 0c010ed..d8eb6c3 100644
--- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -128,6 +128,19 @@ static void emitThumbRegPlusImmInReg(
const ARMBaseRegisterInfo &MRI, unsigned MIFlags = MachineInstr::NoFlags) {
MachineFunction &MF = *MBB.getParent();
const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>();
+
+ // Use a single sp-relative add if the immediate is small enough.
+ if (BaseReg == ARM::SP &&
+ (DestReg.isVirtual() || isARMLowRegister(DestReg)) && NumBytes >= 0 &&
+ NumBytes <= 1020 && (NumBytes % 4) == 0) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), DestReg)
+ .addReg(ARM::SP)
+ .addImm(NumBytes / 4)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MIFlags);
+ return;
+ }
+
bool isHigh = !isARMLowRegister(DestReg) ||
(BaseReg != 0 && !isARMLowRegister(BaseReg));
bool isSub = false;
@@ -422,19 +435,33 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II,
return true;
}
+ // The offset doesn't fit, but we may be able to put some of the offset into
+ // the ldr to simplify the generation of the rest of it.
NumBits = 5;
Mask = (1 << NumBits) - 1;
-
- // If this is a thumb spill / restore, we will be using a constpool load to
- // materialize the offset.
- if (Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
- ImmOp.ChangeToImmediate(0);
- } else {
- // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
- ImmedOffset = ImmedOffset & Mask;
- ImmOp.ChangeToImmediate(ImmedOffset);
- Offset &= ~(Mask * Scale);
+ InstrOffs = 0;
+ auto &ST = MF.getSubtarget<ARMSubtarget>();
+ // If using the maximum ldr offset will put the rest into the range of a
+ // single sp-relative add then do so.
+ if (FrameReg == ARM::SP && Offset - (Mask * Scale) <= 1020) {
+ InstrOffs = Mask;
+ } else if (ST.genExecuteOnly()) {
+ // With execute-only the offset is generated either with movw+movt or an
+ // add+lsl sequence. If subtracting an offset will make the top half zero
+ // then that saves a movt or lsl+add. Otherwise if we don't have movw then
+ // we may be able to subtract a value such that it makes the bottom byte
+ // zero, saving an add.
+ unsigned BottomBits = (Offset / Scale) & Mask;
+ bool CanMakeBottomByteZero = ((Offset - BottomBits * Scale) & 0xff) == 0;
+ bool TopHalfZero = (Offset & 0xffff0000) == 0;
+ bool CanMakeTopHalfZero = ((Offset - Mask * Scale) & 0xffff0000) == 0;
+ if (!TopHalfZero && CanMakeTopHalfZero)
+ InstrOffs = Mask;
+ else if (!ST.useMovt() && CanMakeBottomByteZero)
+ InstrOffs = BottomBits;
}
+ ImmOp.ChangeToImmediate(InstrOffs);
+ Offset -= InstrOffs * Scale;
}
return Offset == 0;