aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorAtariDreams <gfunni234@gmail.com>2024-04-05 05:18:22 -0400
committerGitHub <noreply@github.com>2024-04-05 10:18:22 +0100
commitc5d000b1a84cfee99db157c6819e0a9c518f8ac8 (patch)
tree222ebfbdba2148abb72fcb38c547c2de01facf30 /llvm/lib
parent5ed60ffd790a2d1090dca9c685d6ccf09b3f08da (diff)
downloadllvm-c5d000b1a84cfee99db157c6819e0a9c518f8ac8.zip
llvm-c5d000b1a84cfee99db157c6819e0a9c518f8ac8.tar.gz
llvm-c5d000b1a84cfee99db157c6819e0a9c518f8ac8.tar.bz2
[Thumb] Resolve FIXME: Use 'mov hi, $src; mov $dst, hi' (#81908)
Consider the following: ldr r0, [r4] ldr r7, [r0, #4] cmp r7, r3 bhi .LBB0_6 cmp r0, r2 push {r0} pop {r4} bne .LBB0_3 movs r0, r6 pop {r4, r5, r6, r7} pop {r1} bx r1 Here is a snippet of the generated THUMB1 code of the K&R malloc function that clang currently compiles to. push {r0} ends up being popped to pop {r4}. movs r4, r0 would destroy the flags set by cmp right above. The compiler has no alternative in this case, except one: the only alternative is to transfer through a high register. However, it seems like LLVM does not consider that this is a valid approach, even though it is a free clobbering a high register. This patch addresses the FIXME so the compiler can do that when it can in r10 or r11, or r12.
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/ARM/Thumb1InstrInfo.cpp49
1 files changed, 42 insertions, 7 deletions
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 85eabdb..5b0b799 100644
--- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -12,6 +12,8 @@
#include "Thumb1InstrInfo.h"
#include "ARMSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -47,24 +49,57 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
"Thumb1 can only copy GPR registers");
- if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg)
- || !ARM::tGPRRegClass.contains(DestReg))
+ if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg) ||
+ !ARM::tGPRRegClass.contains(DestReg))
BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc))
.add(predOps(ARMCC::AL));
else {
- // FIXME: Can also use 'mov hi, $src; mov $dst, hi',
- // with hi as either r10 or r11.
-
const TargetRegisterInfo *RegInfo = st.getRegisterInfo();
- if (MBB.computeRegisterLiveness(RegInfo, ARM::CPSR, I)
- == MachineBasicBlock::LQR_Dead) {
+ LiveRegUnits UsedRegs(*RegInfo);
+ UsedRegs.addLiveOuts(MBB);
+
+ auto InstUpToI = MBB.end();
+ while (InstUpToI != I)
+ // The pre-decrement is on purpose here.
+ // We want to have the liveness right before I.
+ UsedRegs.stepBackward(*--InstUpToI);
+
+ if (UsedRegs.available(ARM::CPSR)) {
BuildMI(MBB, I, DL, get(ARM::tMOVSr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc))
->addRegisterDead(ARM::CPSR, RegInfo);
return;
}
+ // Use high register to move source to destination
+ // if movs is not an option.
+ BitVector Allocatable = RegInfo->getAllocatableSet(
+ MF, RegInfo->getRegClass(ARM::hGPRRegClassID));
+
+ Register TmpReg = ARM::NoRegister;
+ // Prefer R12 as it is known to not be preserved anyway
+ if (UsedRegs.available(ARM::R12) && Allocatable.test(ARM::R12)) {
+ TmpReg = ARM::R12;
+ } else {
+ for (Register Reg : Allocatable.set_bits()) {
+ if (UsedRegs.available(Reg)) {
+ TmpReg = Reg;
+ break;
+ }
+ }
+ }
+
+ if (TmpReg) {
+ BuildMI(MBB, I, DL, get(ARM::tMOVr), TmpReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(TmpReg, getKillRegState(true))
+ .add(predOps(ARMCC::AL));
+ return;
+ }
+
// 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
BuildMI(MBB, I, DL, get(ARM::tPUSH))
.add(predOps(ARMCC::AL))