aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwanglei <wanglei@loongson.cn>2023-10-19 09:20:27 +0800
committerTobias Hieta <tobias@hieta.se>2023-10-27 14:48:09 +0200
commit4b7f4152a8d166166c6756f933d7dcaa82c0e55e (patch)
tree3ea32ce0492dd237c5d22fb37c8c7953883819fd
parentfb62a201a199663dcaf6511bd7a453ff669438e4 (diff)
downloadllvm-4b7f4152a8d166166c6756f933d7dcaa82c0e55e.zip
llvm-4b7f4152a8d166166c6756f933d7dcaa82c0e55e.tar.gz
llvm-4b7f4152a8d166166c6756f933d7dcaa82c0e55e.tar.bz2
[LoongArch] Implement COPY instruction between CFRs (#69300)
With this patch, all CFRs can be used for register allocation. (cherry picked from commit 271087e3a0875672b26c185a28b3552d5600d2fb)
-rw-r--r--llvm/lib/Target/LoongArch/LoongArch.h2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp121
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td17
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td12
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp6
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp7
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp1
-rw-r--r--llvm/test/CodeGen/LoongArch/O0-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/LoongArch/cfr-copy.mir34
-rw-r--r--llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir26
-rw-r--r--llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir13
-rw-r--r--llvm/test/CodeGen/LoongArch/opt-pipeline.ll1
12 files changed, 227 insertions, 14 deletions
diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h
index 05f4ac8..09ca089 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.h
+++ b/llvm/lib/Target/LoongArch/LoongArch.h
@@ -36,9 +36,11 @@ bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO,
FunctionPass *createLoongArchExpandAtomicPseudoPass();
FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM);
FunctionPass *createLoongArchPreRAExpandPseudoPass();
+FunctionPass *createLoongArchExpandPseudoPass();
void initializeLoongArchDAGToDAGISelPass(PassRegistry &);
void initializeLoongArchExpandAtomicPseudoPass(PassRegistry &);
void initializeLoongArchPreRAExpandPseudoPass(PassRegistry &);
+void initializeLoongArchExpandPseudoPass(PassRegistry &);
} // end namespace llvm
#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
index dd0b2cf..72c1f1c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
@@ -29,6 +29,8 @@ using namespace llvm;
#define LOONGARCH_PRERA_EXPAND_PSEUDO_NAME \
"LoongArch Pre-RA pseudo instruction expansion pass"
+#define LOONGARCH_EXPAND_PSEUDO_NAME \
+ "LoongArch pseudo instruction expansion pass"
namespace {
@@ -513,15 +515,134 @@ bool LoongArchPreRAExpandPseudo::expandFunctionCALL(
return true;
}
+class LoongArchExpandPseudo : public MachineFunctionPass {
+public:
+ const LoongArchInstrInfo *TII;
+ static char ID;
+
+ LoongArchExpandPseudo() : MachineFunctionPass(ID) {
+ initializeLoongArchExpandPseudoPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return LOONGARCH_EXPAND_PSEUDO_NAME;
+ }
+
+private:
+ bool expandMBB(MachineBasicBlock &MBB);
+ bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandCopyCFR(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+};
+
+char LoongArchExpandPseudo::ID = 0;
+
+bool LoongArchExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ TII =
+ static_cast<const LoongArchInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ bool Modified = false;
+ for (auto &MBB : MF)
+ Modified |= expandMBB(MBB);
+
+ return Modified;
+}
+
+bool LoongArchExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+ Modified |= expandMI(MBB, MBBI, NMBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ switch (MBBI->getOpcode()) {
+ case LoongArch::PseudoCopyCFR:
+ return expandCopyCFR(MBB, MBBI, NextMBBI);
+ }
+
+ return false;
+}
+
+bool LoongArchExpandPseudo::expandCopyCFR(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineFunction *MF = MBB.getParent();
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+
+ // Expand:
+ // MBB:
+ // fcmp.caf.s $dst, $fa0, $fa0 # set $dst 0(false)
+ // bceqz $src, SinkBB
+ // FalseBB:
+ // fcmp.cueq.s $dst, $fa0, $fa0 # set $dst 1(true)
+ // SinkBB:
+ // fallthrough
+
+ const BasicBlock *LLVM_BB = MBB.getBasicBlock();
+ auto *FalseBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ auto *SinkBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+ MF->insert(++MBB.getIterator(), FalseBB);
+ MF->insert(++FalseBB->getIterator(), SinkBB);
+
+ Register DestReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ // DestReg = 0
+ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::SET_CFR_FALSE), DestReg);
+ // Insert branch instruction.
+ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::BCEQZ))
+ .addReg(SrcReg)
+ .addMBB(SinkBB);
+ // DestReg = 1
+ BuildMI(FalseBB, DL, TII->get(LoongArch::SET_CFR_TRUE), DestReg);
+
+ FalseBB->addSuccessor(SinkBB);
+
+ SinkBB->splice(SinkBB->end(), &MBB, MI, MBB.end());
+ SinkBB->transferSuccessors(&MBB);
+
+ MBB.addSuccessor(FalseBB);
+ MBB.addSuccessor(SinkBB);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ // Make sure live-ins are correctly attached to this new basic block.
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *FalseBB);
+ computeAndAddLiveIns(LiveRegs, *SinkBB);
+
+ return true;
+}
+
} // end namespace
INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "loongarch-prera-expand-pseudo",
LOONGARCH_PRERA_EXPAND_PSEUDO_NAME, false, false)
+INITIALIZE_PASS(LoongArchExpandPseudo, "loongarch-expand-pseudo",
+ LOONGARCH_EXPAND_PSEUDO_NAME, false, false)
+
namespace llvm {
FunctionPass *createLoongArchPreRAExpandPseudoPass() {
return new LoongArchPreRAExpandPseudo();
}
+FunctionPass *createLoongArchExpandPseudoPass() {
+ return new LoongArchExpandPseudo();
+}
} // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index eb49ae3..826db54 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -126,6 +126,23 @@ def PseudoST_CFR : Pseudo<(outs),
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
def PseudoLD_CFR : Pseudo<(outs CFR:$ccd),
(ins GPR:$rj, grlenimm:$imm)>;
+
+// SET_CFR_{FALSE,TRUE}
+// These instructions are defined in order to avoid expensive check error if
+// regular instruction patterns are used.
+// fcmp.caf.s $dst, $fa0, $fa0
+def SET_CFR_FALSE : SET_CFR<0x0c100000, "fcmp.caf.s">;
+// fcmp.cueq.s $dst, $fa0, $fa0
+def SET_CFR_TRUE : SET_CFR<0x0c160000, "fcmp.cueq.s">;
+
+// Pseudo instruction for copying CFRs.
+def PseudoCopyCFR : Pseudo<(outs CFR:$dst), (ins CFR:$src)> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Size = 12;
+}
+
} // Predicates = [HasBasicF]
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td
index f853fca..f66f620 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td
@@ -218,3 +218,15 @@ class FP_STORE_2RI12<bits<32> op, RegisterClass rc = FPR32>
: FPFmt2RI12<op, (outs), (ins rc:$fd, GPR:$rj, simm12:$imm12),
"$fd, $rj, $imm12">;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 1
+
+// This class is used to define `SET_CFR_{FALSE,TRUE}` instructions which are
+// used to expand `PseudoCopyCFR`.
+class SET_CFR<bits<32> op, string opcstr>
+ : FP_CMP<op> {
+ let isCodeGenOnly = 1;
+ let fj = 0; // fa0
+ let fk = 0; // fa0
+ let AsmString = opcstr # "\t$cd, $$fa0, $$fa0";
+ let OutOperandList = (outs CFR:$cd);
+ let InOperandList = (ins);
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index f5e32c4..ef79b8a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -61,6 +61,12 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
+ // CFR->CFR copy.
+ if (LoongArch::CFRRegClass.contains(DstReg, SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(LoongArch::PseudoCopyCFR), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
// FPR->FPR copies.
unsigned Opc;
diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
index 4037c4d..257b947 100644
--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
@@ -98,13 +98,6 @@ LoongArchRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (TFI->hasBP(MF))
markSuperRegs(Reserved, LoongArchABI::getBPReg()); // bp
- // FIXME: To avoid generating COPY instructions between CFRs, only use $fcc0.
- // This is required to work around the fact that COPY instruction between CFRs
- // is not provided in LoongArch.
- if (MF.getSubtarget<LoongArchSubtarget>().hasBasicF())
- for (size_t Reg = LoongArch::FCC1; Reg <= LoongArch::FCC7; ++Reg)
- markSuperRegs(Reserved, Reg);
-
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index 46e4a06..d0a4e93 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -180,6 +180,7 @@ LoongArchTargetMachine::getTargetTransformInfo(const Function &F) const {
void LoongArchPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
void LoongArchPassConfig::addPreEmitPass2() {
+ addPass(createLoongArchExpandPseudoPass());
// Schedule the expansion of AtomicPseudos at the last possible moment,
// avoiding the possibility for other passes to break the requirements for
// forward progress in the LL/SC block.
diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
index 327e461..84d235d 100644
--- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
@@ -69,6 +69,7 @@
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Stack Frame Layout Analysis
+; CHECK-NEXT: LoongArch pseudo instruction expansion pass
; CHECK-NEXT: LoongArch atomic pseudo instruction expansion pass
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
diff --git a/llvm/test/CodeGen/LoongArch/cfr-copy.mir b/llvm/test/CodeGen/LoongArch/cfr-copy.mir
new file mode 100644
index 0000000..4224c99
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/cfr-copy.mir
@@ -0,0 +1,34 @@
+# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+# RUN: llc --mtriple=loongarch64 --mattr=+d %s -o - | FileCheck %s
+
+## Check the PseudoCopyCFR instruction expand.
+
+--- |
+ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
+ target triple = "loongarch64"
+
+ define void @test() {
+ ; CHECK-LABEL: test:
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: fcmp.caf.s $fcc1, $fa0, $fa0
+ ; CHECK-NEXT: bceqz $fcc0, .LBB0_2
+ ; CHECK-NEXT: # %bb.1:
+ ; CHECK-NEXT: fcmp.cueq.s $fcc1, $fa0, $fa0
+ ; CHECK-NEXT: .LBB0_2:
+ ; CHECK-NEXT: movcf2gr $a0, $fcc1
+ ; CHECK-NEXT: ret
+ ret void
+ }
+...
+---
+name: test
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $fcc0
+
+ $fcc1 = COPY $fcc0
+ $r4 = COPY $fcc1
+ PseudoRET implicit killed $r4
+
+...
diff --git a/llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir b/llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir
new file mode 100644
index 0000000..c5a6da7
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir
@@ -0,0 +1,26 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc --mtriple=loongarch64 --mattr=+d --stop-after=postrapseudos %s \
+# RUN: -o - | FileCheck %s
+
+## Check the COPY instruction between CFRs.
+## A pseudo (PseudoCopyCFR) is generated after postrapseudos pass.
+
+...
+---
+name: test
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $fcc0
+
+ ; CHECK-LABEL: name: test
+ ; CHECK: liveins: $fcc0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $fcc1 = PseudoCopyCFR $fcc0
+ ; CHECK-NEXT: $r4 = MOVCF2GR killed $fcc1
+ ; CHECK-NEXT: PseudoRET implicit killed $r4
+ $fcc1 = COPY $fcc0
+ $r4 = COPY $fcc1
+ PseudoRET implicit killed $r4
+
+...
diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir
index fa5fccb..18dbc5c 100644
--- a/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir
+++ b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc --mtriple=loongarch64 --mattr=+d --run-pass=greedy %s -o - | FileCheck %s
+# RUN: llc --mtriple=loongarch64 --mattr=+d --regalloc=fast \
+# RUN: --stop-before=postra-machine-sink %s -o - | FileCheck %s
## Check that fcc register clobbered by inlineasm is correctly saved by examing
## a pair of pseudos (PseudoST_CFR and PseudoLD_CFR) are generated before and
@@ -15,13 +16,11 @@ body: |
; CHECK-LABEL: name: test
; CHECK: liveins: $f0_64, $f1_64
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $f1_64
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $f0_64
- ; CHECK-NEXT: [[FCMP_CLT_D:%[0-9]+]]:cfr = FCMP_CLT_D [[COPY]], [[COPY1]]
- ; CHECK-NEXT: PseudoST_CFR [[FCMP_CLT_D]], %stack.0, 0 :: (store (s64) into %stack.0)
+ ; CHECK-NEXT: renamable $fcc0 = FCMP_CLT_D renamable $f1_64, renamable $f0_64
+ ; CHECK-NEXT: PseudoST_CFR $fcc0, %stack.0, 0 :: (store (s64) into %stack.0)
; CHECK-NEXT: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $fcc0
- ; CHECK-NEXT: [[PseudoLD_CFR:%[0-9]+]]:cfr = PseudoLD_CFR %stack.0, 0 :: (load (s64) from %stack.0)
- ; CHECK-NEXT: $r4 = COPY [[PseudoLD_CFR]]
+ ; CHECK-NEXT: $fcc0 = PseudoLD_CFR %stack.0, 0 :: (load (s64) from %stack.0)
+ ; CHECK-NEXT: $r4 = COPY killed renamable $fcc0
; CHECK-NEXT: PseudoRET implicit killed $r4
%1:fpr64 = COPY $f1_64
%0:fpr64 = COPY $f0_64
diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
index 8b1d635b..3134d94 100644
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -165,6 +165,7 @@
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Stack Frame Layout Analysis
+; CHECK-NEXT: LoongArch pseudo instruction expansion pass
; CHECK-NEXT: LoongArch atomic pseudo instruction expansion pass
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter