diff options
author | wanglei <wanglei@loongson.cn> | 2023-10-19 09:20:27 +0800 |
---|---|---|
committer | Tobias Hieta <tobias@hieta.se> | 2023-10-27 14:48:09 +0200 |
commit | 4b7f4152a8d166166c6756f933d7dcaa82c0e55e (patch) | |
tree | 3ea32ce0492dd237c5d22fb37c8c7953883819fd | |
parent | fb62a201a199663dcaf6511bd7a453ff669438e4 (diff) | |
download | llvm-4b7f4152a8d166166c6756f933d7dcaa82c0e55e.zip llvm-4b7f4152a8d166166c6756f933d7dcaa82c0e55e.tar.gz llvm-4b7f4152a8d166166c6756f933d7dcaa82c0e55e.tar.bz2 |
[LoongArch] Implement COPY instruction between CFRs (#69300)
With this patch, all CFRs can be used for register allocation.
(cherry picked from commit 271087e3a0875672b26c185a28b3552d5600d2fb)
-rw-r--r-- | llvm/lib/Target/LoongArch/LoongArch.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp | 121 | ||||
-rw-r--r-- | llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td | 17 | ||||
-rw-r--r-- | llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td | 12 | ||||
-rw-r--r-- | llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/O0-pipeline.ll | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/cfr-copy.mir | 34 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir | 26 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/opt-pipeline.ll | 1 |
12 files changed, 227 insertions, 14 deletions
diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h index 05f4ac8..09ca089 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.h +++ b/llvm/lib/Target/LoongArch/LoongArch.h @@ -36,9 +36,11 @@ bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO, FunctionPass *createLoongArchExpandAtomicPseudoPass(); FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM); FunctionPass *createLoongArchPreRAExpandPseudoPass(); +FunctionPass *createLoongArchExpandPseudoPass(); void initializeLoongArchDAGToDAGISelPass(PassRegistry &); void initializeLoongArchExpandAtomicPseudoPass(PassRegistry &); void initializeLoongArchPreRAExpandPseudoPass(PassRegistry &); +void initializeLoongArchExpandPseudoPass(PassRegistry &); } // end namespace llvm #endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp index dd0b2cf..72c1f1c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp @@ -29,6 +29,8 @@ using namespace llvm; #define LOONGARCH_PRERA_EXPAND_PSEUDO_NAME \ "LoongArch Pre-RA pseudo instruction expansion pass" +#define LOONGARCH_EXPAND_PSEUDO_NAME \ + "LoongArch pseudo instruction expansion pass" namespace { @@ -513,15 +515,134 @@ bool LoongArchPreRAExpandPseudo::expandFunctionCALL( return true; } +class LoongArchExpandPseudo : public MachineFunctionPass { +public: + const LoongArchInstrInfo *TII; + static char ID; + + LoongArchExpandPseudo() : MachineFunctionPass(ID) { + initializeLoongArchExpandPseudoPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return LOONGARCH_EXPAND_PSEUDO_NAME; + } + +private: + bool expandMBB(MachineBasicBlock &MBB); + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandCopyCFR(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); +}; + +char LoongArchExpandPseudo::ID = 0; + +bool LoongArchExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + TII = + static_cast<const LoongArchInstrInfo *>(MF.getSubtarget().getInstrInfo()); + + bool Modified = false; + for (auto &MBB : MF) + Modified |= expandMBB(MBB); + + return Modified; +} + +bool LoongArchExpandPseudo::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + switch (MBBI->getOpcode()) { + case LoongArch::PseudoCopyCFR: + return expandCopyCFR(MBB, MBBI, NextMBBI); + } + + return false; +} + +bool LoongArchExpandPseudo::expandCopyCFR( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineFunction *MF = MBB.getParent(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + // Expand: + // MBB: + // fcmp.caf.s $dst, $fa0, $fa0 # set $dst 0(false) + // bceqz $src, SinkBB + // FalseBB: + // fcmp.cueq.s $dst, $fa0, $fa0 # set $dst 1(true) + // SinkBB: + // fallthrough + + const BasicBlock *LLVM_BB = MBB.getBasicBlock(); + auto *FalseBB = MF->CreateMachineBasicBlock(LLVM_BB); + auto *SinkBB = MF->CreateMachineBasicBlock(LLVM_BB); + + MF->insert(++MBB.getIterator(), FalseBB); + MF->insert(++FalseBB->getIterator(), SinkBB); + + Register DestReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + // DestReg = 0 + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::SET_CFR_FALSE), DestReg); + // Insert branch instruction. + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::BCEQZ)) + .addReg(SrcReg) + .addMBB(SinkBB); + // DestReg = 1 + BuildMI(FalseBB, DL, TII->get(LoongArch::SET_CFR_TRUE), DestReg); + + FalseBB->addSuccessor(SinkBB); + + SinkBB->splice(SinkBB->end(), &MBB, MI, MBB.end()); + SinkBB->transferSuccessors(&MBB); + + MBB.addSuccessor(FalseBB); + MBB.addSuccessor(SinkBB); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + + // Make sure live-ins are correctly attached to this new basic block. + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *FalseBB); + computeAndAddLiveIns(LiveRegs, *SinkBB); + + return true; +} + } // end namespace INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "loongarch-prera-expand-pseudo", LOONGARCH_PRERA_EXPAND_PSEUDO_NAME, false, false) +INITIALIZE_PASS(LoongArchExpandPseudo, "loongarch-expand-pseudo", + LOONGARCH_EXPAND_PSEUDO_NAME, false, false) + namespace llvm { FunctionPass *createLoongArchPreRAExpandPseudoPass() { return new LoongArchPreRAExpandPseudo(); } +FunctionPass *createLoongArchExpandPseudoPass() { + return new LoongArchExpandPseudo(); +} } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index eb49ae3..826db54 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -126,6 +126,23 @@ def PseudoST_CFR : Pseudo<(outs), let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in def PseudoLD_CFR : Pseudo<(outs CFR:$ccd), (ins GPR:$rj, grlenimm:$imm)>; + +// SET_CFR_{FALSE,TRUE} +// These instructions are defined in order to avoid expensive check error if +// regular instruction patterns are used. +// fcmp.caf.s $dst, $fa0, $fa0 +def SET_CFR_FALSE : SET_CFR<0x0c100000, "fcmp.caf.s">; +// fcmp.cueq.s $dst, $fa0, $fa0 +def SET_CFR_TRUE : SET_CFR<0x0c160000, "fcmp.cueq.s">; + +// Pseudo instruction for copying CFRs. +def PseudoCopyCFR : Pseudo<(outs CFR:$dst), (ins CFR:$src)> { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Size = 12; +} + } // Predicates = [HasBasicF] //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td index f853fca..f66f620 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td @@ -218,3 +218,15 @@ class FP_STORE_2RI12<bits<32> op, RegisterClass rc = FPR32> : FPFmt2RI12<op, (outs), (ins rc:$fd, GPR:$rj, simm12:$imm12), "$fd, $rj, $imm12">; } // hasSideEffects = 0, mayLoad = 0, mayStore = 1 + +// This class is used to define `SET_CFR_{FALSE,TRUE}` instructions which are +// used to expand `PseudoCopyCFR`. +class SET_CFR<bits<32> op, string opcstr> + : FP_CMP<op> { + let isCodeGenOnly = 1; + let fj = 0; // fa0 + let fk = 0; // fa0 + let AsmString = opcstr # "\t$cd, $$fa0, $$fa0"; + let OutOperandList = (outs CFR:$cd); + let InOperandList = (ins); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index f5e32c4..ef79b8a 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -61,6 +61,12 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); return; } + // CFR->CFR copy. + if (LoongArch::CFRRegClass.contains(DstReg, SrcReg)) { + BuildMI(MBB, MBBI, DL, get(LoongArch::PseudoCopyCFR), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } // FPR->FPR copies. unsigned Opc; diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp index 4037c4d..257b947 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp @@ -98,13 +98,6 @@ LoongArchRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (TFI->hasBP(MF)) markSuperRegs(Reserved, LoongArchABI::getBPReg()); // bp - // FIXME: To avoid generating COPY instructions between CFRs, only use $fcc0. - // This is required to work around the fact that COPY instruction between CFRs - // is not provided in LoongArch. - if (MF.getSubtarget<LoongArchSubtarget>().hasBasicF()) - for (size_t Reg = LoongArch::FCC1; Reg <= LoongArch::FCC7; ++Reg) - markSuperRegs(Reserved, Reg); - assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index 46e4a06..d0a4e93 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -180,6 +180,7 @@ LoongArchTargetMachine::getTargetTransformInfo(const Function &F) const { void LoongArchPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); } void LoongArchPassConfig::addPreEmitPass2() { + addPass(createLoongArchExpandPseudoPass()); // Schedule the expansion of AtomicPseudos at the last possible moment, // avoiding the possibility for other passes to break the requirements for // forward progress in the LL/SC block. diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll index 327e461..84d235d 100644 --- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll @@ -69,6 +69,7 @@ ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Stack Frame Layout Analysis +; CHECK-NEXT: LoongArch pseudo instruction expansion pass ; CHECK-NEXT: LoongArch atomic pseudo instruction expansion pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter diff --git a/llvm/test/CodeGen/LoongArch/cfr-copy.mir b/llvm/test/CodeGen/LoongArch/cfr-copy.mir new file mode 100644 index 0000000..4224c99 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/cfr-copy.mir @@ -0,0 +1,34 @@ +# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +# RUN: llc --mtriple=loongarch64 --mattr=+d %s -o - | FileCheck %s + +## Check the PseudoCopyCFR instruction expand. + +--- | + target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" + target triple = "loongarch64" + + define void @test() { + ; CHECK-LABEL: test: + ; CHECK: # %bb.0: + ; CHECK-NEXT: fcmp.caf.s $fcc1, $fa0, $fa0 + ; CHECK-NEXT: bceqz $fcc0, .LBB0_2 + ; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: fcmp.cueq.s $fcc1, $fa0, $fa0 + ; CHECK-NEXT: .LBB0_2: + ; CHECK-NEXT: movcf2gr $a0, $fcc1 + ; CHECK-NEXT: ret + ret void + } +... +--- +name: test +tracksRegLiveness: true +body: | + bb.0: + liveins: $fcc0 + + $fcc1 = COPY $fcc0 + $r4 = COPY $fcc1 + PseudoRET implicit killed $r4 + +... diff --git a/llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir b/llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir new file mode 100644 index 0000000..c5a6da7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir @@ -0,0 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc --mtriple=loongarch64 --mattr=+d --stop-after=postrapseudos %s \ +# RUN: -o - | FileCheck %s + +## Check the COPY instruction between CFRs. +## A pseudo (PseudoCopyCFR) is generated after postrapseudos pass. + +... +--- +name: test +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $fcc0 + + ; CHECK-LABEL: name: test + ; CHECK: liveins: $fcc0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $fcc1 = PseudoCopyCFR $fcc0 + ; CHECK-NEXT: $r4 = MOVCF2GR killed $fcc1 + ; CHECK-NEXT: PseudoRET implicit killed $r4 + $fcc1 = COPY $fcc0 + $r4 = COPY $fcc1 + PseudoRET implicit killed $r4 + +... diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir index fa5fccb..18dbc5c 100644 --- a/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir +++ b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc --mtriple=loongarch64 --mattr=+d --run-pass=greedy %s -o - | FileCheck %s +# RUN: llc --mtriple=loongarch64 --mattr=+d --regalloc=fast \ +# RUN: --stop-before=postra-machine-sink %s -o - | FileCheck %s ## Check that fcc register clobbered by inlineasm is correctly saved by examing ## a pair of pseudos (PseudoST_CFR and PseudoLD_CFR) are generated before and @@ -15,13 +16,11 @@ body: | ; CHECK-LABEL: name: test ; CHECK: liveins: $f0_64, $f1_64 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $f1_64 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $f0_64 - ; CHECK-NEXT: [[FCMP_CLT_D:%[0-9]+]]:cfr = FCMP_CLT_D [[COPY]], [[COPY1]] - ; CHECK-NEXT: PseudoST_CFR [[FCMP_CLT_D]], %stack.0, 0 :: (store (s64) into %stack.0) + ; CHECK-NEXT: renamable $fcc0 = FCMP_CLT_D renamable $f1_64, renamable $f0_64 + ; CHECK-NEXT: PseudoST_CFR $fcc0, %stack.0, 0 :: (store (s64) into %stack.0) ; CHECK-NEXT: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $fcc0 - ; CHECK-NEXT: [[PseudoLD_CFR:%[0-9]+]]:cfr = PseudoLD_CFR %stack.0, 0 :: (load (s64) from %stack.0) - ; CHECK-NEXT: $r4 = COPY [[PseudoLD_CFR]] + ; CHECK-NEXT: $fcc0 = PseudoLD_CFR %stack.0, 0 :: (load (s64) from %stack.0) + ; CHECK-NEXT: $r4 = COPY killed renamable $fcc0 ; CHECK-NEXT: PseudoRET implicit killed $r4 %1:fpr64 = COPY $f1_64 %0:fpr64 = COPY $f0_64 diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll index 8b1d635b..3134d94 100644 --- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll @@ -165,6 +165,7 @@ ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Stack Frame Layout Analysis +; CHECK-NEXT: LoongArch pseudo instruction expansion pass ; CHECK-NEXT: LoongArch atomic pseudo instruction expansion pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter |