[Thumb] Resolve FIXME: Use 'mov hi, $src; mov $dst, hi' (#81908)

Consider the following: ldr r0, [r4] ldr r7, [r0, #4] cmp r7, r3 bhi .LBB0_6 cmp r0, r2 push {r0} pop {r4} bne .LBB0_3 movs r0, r6 pop {r4, r5, r6, r7} pop {r1} bx r1 Here is a snippet of the generated THUMB1 code of the K&R malloc function that clang currently compiles to. push {r0} ends up being popped to pop {r4}. movs r4, r0 would destroy the flags set by cmp right above. The compiler has no alternative in this case, except one: the only alternative is to transfer through a high register. However, it seems like LLVM does not consider that this is a valid approach, even though it is a free clobbering a high register. This patch addresses the FIXME so the compiler can do that when it can in r10 or r11, or r12.
author: AtariDreams <gfunni234@gmail.com> 2024-04-05 05:18:22 -0400
committer: GitHub <noreply@github.com> 2024-04-05 10:18:22 +0100
commit: c5d000b1a84cfee99db157c6819e0a9c518f8ac8 (patch)
tree: 222ebfbdba2148abb72fcb38c547c2de01facf30 /llvm/lib
parent: 5ed60ffd790a2d1090dca9c685d6ccf09b3f08da (diff)
download: llvm-c5d000b1a84cfee99db157c6819e0a9c518f8ac8.zip
llvm-c5d000b1a84cfee99db157c6819e0a9c518f8ac8.tar.gz
llvm-c5d000b1a84cfee99db157c6819e0a9c518f8ac8.tar.bz2
1 files changed, 42 insertions, 7 deletions
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 85eabdb..5b0b799 100644
--- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -12,6 +12,8 @@
 
 #include "Thumb1InstrInfo.h"
 #include "ARMSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@@ -47,24 +49,57 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
          "Thumb1 can only copy GPR registers");
 
-  if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg)
-      || !ARM::tGPRRegClass.contains(DestReg))
+  if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg) ||
+      !ARM::tGPRRegClass.contains(DestReg))
     BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
         .addReg(SrcReg, getKillRegState(KillSrc))
         .add(predOps(ARMCC::AL));
   else {
-    // FIXME: Can also use 'mov hi, $src; mov $dst, hi',
-    // with hi as either r10 or r11.
-
     const TargetRegisterInfo *RegInfo = st.getRegisterInfo();
-    if (MBB.computeRegisterLiveness(RegInfo, ARM::CPSR, I)
-        == MachineBasicBlock::LQR_Dead) {
+    LiveRegUnits UsedRegs(*RegInfo);
+    UsedRegs.addLiveOuts(MBB);
+
+    auto InstUpToI = MBB.end();
+    while (InstUpToI != I)
+      // The pre-decrement is on purpose here.
+      // We want to have the liveness right before I.
+      UsedRegs.stepBackward(*--InstUpToI);
+
+    if (UsedRegs.available(ARM::CPSR)) {
       BuildMI(MBB, I, DL, get(ARM::tMOVSr), DestReg)
           .addReg(SrcReg, getKillRegState(KillSrc))
           ->addRegisterDead(ARM::CPSR, RegInfo);
       return;
     }
 
+    // Use high register to move source to destination
+    // if movs is not an option.
+    BitVector Allocatable = RegInfo->getAllocatableSet(
+        MF, RegInfo->getRegClass(ARM::hGPRRegClassID));
+
+    Register TmpReg = ARM::NoRegister;
+    // Prefer R12 as it is known to not be preserved anyway
+    if (UsedRegs.available(ARM::R12) && Allocatable.test(ARM::R12)) {
+      TmpReg = ARM::R12;
+    } else {
+      for (Register Reg : Allocatable.set_bits()) {
+        if (UsedRegs.available(Reg)) {
+          TmpReg = Reg;
+          break;
+        }
+      }
+    }
+
+    if (TmpReg) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVr), TmpReg)
+          .addReg(SrcReg, getKillRegState(KillSrc))
+          .add(predOps(ARMCC::AL));
+      BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+          .addReg(TmpReg, getKillRegState(true))
+          .add(predOps(ARMCC::AL));
+      return;
+    }
+
     // 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
     BuildMI(MBB, I, DL, get(ARM::tPUSH))
         .add(predOps(ARMCC::AL))
author	AtariDreams <gfunni234@gmail.com>	2024-04-05 05:18:22 -0400
committer	GitHub <noreply@github.com>	2024-04-05 10:18:22 +0100
commit	c5d000b1a84cfee99db157c6819e0a9c518f8ac8 (patch)
tree	222ebfbdba2148abb72fcb38c547c2de01facf30 /llvm/lib
parent	5ed60ffd790a2d1090dca9c685d6ccf09b3f08da (diff)
download	llvm-c5d000b1a84cfee99db157c6819e0a9c518f8ac8.zip llvm-c5d000b1a84cfee99db157c6819e0a9c518f8ac8.tar.gz llvm-c5d000b1a84cfee99db157c6819e0a9c518f8ac8.tar.bz2