diff options
author | AtariDreams <gfunni234@gmail.com> | 2024-04-05 05:18:22 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-05 10:18:22 +0100 |
commit | c5d000b1a84cfee99db157c6819e0a9c518f8ac8 (patch) | |
tree | 222ebfbdba2148abb72fcb38c547c2de01facf30 /llvm/lib | |
parent | 5ed60ffd790a2d1090dca9c685d6ccf09b3f08da (diff) | |
download | llvm-c5d000b1a84cfee99db157c6819e0a9c518f8ac8.zip llvm-c5d000b1a84cfee99db157c6819e0a9c518f8ac8.tar.gz llvm-c5d000b1a84cfee99db157c6819e0a9c518f8ac8.tar.bz2 |
[Thumb] Resolve FIXME: Use 'mov hi, $src; mov $dst, hi' (#81908)
Consider the following:
ldr r0, [r4]
ldr r7, [r0, #4]
cmp r7, r3
bhi .LBB0_6
cmp r0, r2
push {r0}
pop {r4}
bne .LBB0_3
movs r0, r6
pop {r4, r5, r6, r7}
pop {r1}
bx r1
Here is a snippet of the generated THUMB1 code of the K&R malloc
function that clang currently compiles to.
push {r0} ends up being popped to pop {r4}.
movs r4, r0 would destroy the flags set by cmp right above.
The compiler has no alternative in this case, except one:
the only alternative is to transfer through a high register.
However, it seems like LLVM does not consider that this is a valid
approach, even though it is a free clobbering a high register.
This patch addresses the FIXME so the compiler can do that when it can
in r10 or r11, or r12.
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/ARM/Thumb1InstrInfo.cpp | 49 |
1 files changed, 42 insertions, 7 deletions
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 85eabdb..5b0b799 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -12,6 +12,8 @@ #include "Thumb1InstrInfo.h" #include "ARMSubtarget.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -47,24 +49,57 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, assert(ARM::GPRRegClass.contains(DestReg, SrcReg) && "Thumb1 can only copy GPR registers"); - if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg) - || !ARM::tGPRRegClass.contains(DestReg)) + if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg) || + !ARM::tGPRRegClass.contains(DestReg)) BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)) .add(predOps(ARMCC::AL)); else { - // FIXME: Can also use 'mov hi, $src; mov $dst, hi', - // with hi as either r10 or r11. - const TargetRegisterInfo *RegInfo = st.getRegisterInfo(); - if (MBB.computeRegisterLiveness(RegInfo, ARM::CPSR, I) - == MachineBasicBlock::LQR_Dead) { + LiveRegUnits UsedRegs(*RegInfo); + UsedRegs.addLiveOuts(MBB); + + auto InstUpToI = MBB.end(); + while (InstUpToI != I) + // The pre-decrement is on purpose here. + // We want to have the liveness right before I. + UsedRegs.stepBackward(*--InstUpToI); + + if (UsedRegs.available(ARM::CPSR)) { BuildMI(MBB, I, DL, get(ARM::tMOVSr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)) ->addRegisterDead(ARM::CPSR, RegInfo); return; } + // Use high register to move source to destination + // if movs is not an option. + BitVector Allocatable = RegInfo->getAllocatableSet( + MF, RegInfo->getRegClass(ARM::hGPRRegClassID)); + + Register TmpReg = ARM::NoRegister; + // Prefer R12 as it is known to not be preserved anyway + if (UsedRegs.available(ARM::R12) && Allocatable.test(ARM::R12)) { + TmpReg = ARM::R12; + } else { + for (Register Reg : Allocatable.set_bits()) { + if (UsedRegs.available(Reg)) { + TmpReg = Reg; + break; + } + } + } + + if (TmpReg) { + BuildMI(MBB, I, DL, get(ARM::tMOVr), TmpReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .add(predOps(ARMCC::AL)); + BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) + .addReg(TmpReg, getKillRegState(true)) + .add(predOps(ARMCC::AL)); + return; + } + // 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it BuildMI(MBB, I, DL, get(ARM::tPUSH)) .add(predOps(ARMCC::AL)) |