//===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass performs peephole optimizations to cleanup ugly code sequences at // MachineInstruction layer. // // Currently, there are two optimizations implemented: // - One pre-RA MachineSSA pass to eliminate type promotion sequences, those // zero extend 32-bit subregisters to 64-bit registers, if the compiler // could prove the subregisters is defined by 32-bit operations in which // case the upper half of the underlying 64-bit registers were zeroed // implicitly. // // - One post-RA PreEmit pass to do final cleanup on some redundant // instructions generated due to bad RA on subregister. //===----------------------------------------------------------------------===// #include "BPF.h" #include "BPFInstrInfo.h" #include "BPFTargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include using namespace llvm; #define DEBUG_TYPE "bpf-mi-zext-elim" static cl::opt GotolAbsLowBound("gotol-abs-low-bound", cl::Hidden, cl::init(INT16_MAX >> 1), cl::desc("Specify gotol lower bound")); STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated"); namespace { struct BPFMIPeephole : public MachineFunctionPass { static char ID; const BPFInstrInfo *TII; MachineFunction *MF; MachineRegisterInfo *MRI; BPFMIPeephole() : MachineFunctionPass(ID) { initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry()); } private: // Initialize class variables. void initialize(MachineFunction &MFParm); bool isCopyFrom32Def(MachineInstr *CopyMI); bool isInsnFrom32Def(MachineInstr *DefInsn); bool isPhiFrom32Def(MachineInstr *MovMI); bool isMovFrom32Def(MachineInstr *MovMI); bool eliminateZExtSeq(); bool eliminateZExt(); std::set PhiInsns; public: // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { if (skipFunction(MF.getFunction())) return false; initialize(MF); // First try to eliminate (zext, lshift, rshift) and then // try to eliminate zext. bool ZExtSeqExist, ZExtExist; ZExtSeqExist = eliminateZExtSeq(); ZExtExist = eliminateZExt(); return ZExtSeqExist || ZExtExist; } }; // Initialize class variables. void BPFMIPeephole::initialize(MachineFunction &MFParm) { MF = &MFParm; MRI = &MF->getRegInfo(); TII = MF->getSubtarget().getInstrInfo(); LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n"); } bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI) { MachineOperand &opnd = CopyMI->getOperand(1); if (!opnd.isReg()) return false; // Return false if getting value from a 32bit physical register. // Most likely, this physical register is aliased to // function call return value or current function parameters. Register Reg = opnd.getReg(); if (!Reg.isVirtual()) return false; if (MRI->getRegClass(Reg) == &BPF::GPRRegClass) return false; MachineInstr *DefInsn = MRI->getVRegDef(Reg); if (!isInsnFrom32Def(DefInsn)) return false; return true; } bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI) { for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) { MachineOperand &opnd = PhiMI->getOperand(i); if (!opnd.isReg()) return false; MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg()); if (!PhiDef) return false; if (PhiDef->isPHI()) { if (!PhiInsns.insert(PhiDef).second) return false; if (!isPhiFrom32Def(PhiDef)) return false; } if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef)) return false; } return true; } // The \p DefInsn instruction defines a virtual register. bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn) { if (!DefInsn) return false; if (DefInsn->isPHI()) { if (!PhiInsns.insert(DefInsn).second) return false; if (!isPhiFrom32Def(DefInsn)) return false; } else if (DefInsn->getOpcode() == BPF::COPY) { if (!isCopyFrom32Def(DefInsn)) return false; } return true; } bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI) { MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg()); LLVM_DEBUG(dbgs() << " Def of Mov Src:"); LLVM_DEBUG(DefInsn->dump()); PhiInsns.clear(); if (!isInsnFrom32Def(DefInsn)) return false; LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n"); return true; } bool BPFMIPeephole::eliminateZExtSeq() { MachineInstr* ToErase = nullptr; bool Eliminated = false; for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &MI : MBB) { // If the previous instruction was marked for elimination, remove it now. if (ToErase) { ToErase->eraseFromParent(); ToErase = nullptr; } // Eliminate the 32-bit to 64-bit zero extension sequence when possible. // // MOV_32_64 rB, wA // SLL_ri rB, rB, 32 // SRL_ri rB, rB, 32 if (MI.getOpcode() == BPF::SRL_ri && MI.getOperand(2).getImm() == 32) { Register DstReg = MI.getOperand(0).getReg(); Register ShfReg = MI.getOperand(1).getReg(); MachineInstr *SllMI = MRI->getVRegDef(ShfReg); LLVM_DEBUG(dbgs() << "Starting SRL found:"); LLVM_DEBUG(MI.dump()); if (!SllMI || SllMI->isPHI() || SllMI->getOpcode() != BPF::SLL_ri || SllMI->getOperand(2).getImm() != 32) continue; LLVM_DEBUG(dbgs() << " SLL found:"); LLVM_DEBUG(SllMI->dump()); MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg()); if (!MovMI || MovMI->isPHI() || MovMI->getOpcode() != BPF::MOV_32_64) continue; LLVM_DEBUG(dbgs() << " Type cast Mov found:"); LLVM_DEBUG(MovMI->dump()); Register SubReg = MovMI->getOperand(1).getReg(); if (!isMovFrom32Def(MovMI)) { LLVM_DEBUG(dbgs() << " One ZExt elim sequence failed qualifying elim.\n"); continue; } BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg) .addImm(0).addReg(SubReg).addImm(BPF::sub_32); SllMI->eraseFromParent(); MovMI->eraseFromParent(); // MI is the right shift, we can't erase it in it's own iteration. // Mark it to ToErase, and erase in the next iteration. ToErase = &MI; ZExtElemNum++; Eliminated = true; } } } return Eliminated; } bool BPFMIPeephole::eliminateZExt() { MachineInstr* ToErase = nullptr; bool Eliminated = false; for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &MI : MBB) { // If the previous instruction was marked for elimination, remove it now. if (ToErase) { ToErase->eraseFromParent(); ToErase = nullptr; } if (MI.getOpcode() != BPF::MOV_32_64) continue; // Eliminate MOV_32_64 if possible. // MOV_32_64 rA, wB // // If wB has been zero extended, replace it with a SUBREG_TO_REG. // This is to workaround BPF programs where pkt->{data, data_end} // is encoded as u32, but actually the verifier populates them // as 64bit pointer. The MOV_32_64 will zero out the top 32 bits. LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:"); LLVM_DEBUG(MI.dump()); if (!isMovFrom32Def(&MI)) continue; LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n"); Register dst = MI.getOperand(0).getReg(); Register src = MI.getOperand(1).getReg(); // Build a SUBREG_TO_REG instruction. BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), dst) .addImm(0).addReg(src).addImm(BPF::sub_32); ToErase = &MI; Eliminated = true; } } return Eliminated; } } // end default namespace INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE, "BPF MachineSSA Peephole Optimization For ZEXT Eliminate", false, false) char BPFMIPeephole::ID = 0; FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); } STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated"); namespace { struct BPFMIPreEmitPeephole : public MachineFunctionPass { static char ID; MachineFunction *MF; const TargetRegisterInfo *TRI; const BPFInstrInfo *TII; bool SupportGotol; BPFMIPreEmitPeephole() : MachineFunctionPass(ID) { initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry()); } private: // Initialize class variables. void initialize(MachineFunction &MFParm); bool in16BitRange(int Num); bool eliminateRedundantMov(); bool adjustBranch(); public: // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { if (skipFunction(MF.getFunction())) return false; initialize(MF); bool Changed; Changed = eliminateRedundantMov(); if (SupportGotol) Changed = adjustBranch() || Changed; return Changed; } }; // Initialize class variables. void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) { MF = &MFParm; TII = MF->getSubtarget().getInstrInfo(); TRI = MF->getSubtarget().getRegisterInfo(); SupportGotol = MF->getSubtarget().hasGotol(); LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n"); } bool BPFMIPreEmitPeephole::eliminateRedundantMov() { MachineInstr* ToErase = nullptr; bool Eliminated = false; for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &MI : MBB) { // If the previous instruction was marked for elimination, remove it now. if (ToErase) { LLVM_DEBUG(dbgs() << " Redundant Mov Eliminated:"); LLVM_DEBUG(ToErase->dump()); ToErase->eraseFromParent(); ToErase = nullptr; } // Eliminate identical move: // // MOV rA, rA // // Note that we cannot remove // MOV_32_64 rA, wA // MOV_rr_32 wA, wA // as these two instructions having side effects, zeroing out // top 32 bits of rA. unsigned Opcode = MI.getOpcode(); if (Opcode == BPF::MOV_rr) { Register dst = MI.getOperand(0).getReg(); Register src = MI.getOperand(1).getReg(); if (dst != src) continue; ToErase = &MI; RedundantMovElemNum++; Eliminated = true; } } } return Eliminated; } bool BPFMIPreEmitPeephole::in16BitRange(int Num) { // Well, the cut-off is not precisely at 16bit range since // new codes are added during the transformation. So let us // a little bit conservative. return Num >= -GotolAbsLowBound && Num <= GotolAbsLowBound; } // Before cpu=v4, only 16bit branch target offset (-0x8000 to 0x7fff) // is supported for both unconditional (JMP) and condition (JEQ, JSGT, // etc.) branches. In certain cases, e.g., full unrolling, the branch // target offset might exceed 16bit range. If this happens, the llvm // will generate incorrect code as the offset is truncated to 16bit. // // To fix this rare case, a new insn JMPL is introduced. This new // insn supports supports 32bit branch target offset. The compiler // does not use this insn during insn selection. Rather, BPF backend // will estimate the branch target offset and do JMP -> JMPL and // JEQ -> JEQ + JMPL conversion if the estimated branch target offset // is beyond 16bit. bool BPFMIPreEmitPeephole::adjustBranch() { bool Changed = false; int CurrNumInsns = 0; DenseMap SoFarNumInsns; DenseMap FollowThroughBB; std::vector MBBs; MachineBasicBlock *PrevBB = nullptr; for (MachineBasicBlock &MBB : *MF) { // MBB.size() is the number of insns in this basic block, including some // debug info, e.g., DEBUG_VALUE, so we may over-count a little bit. // Typically we have way more normal insns than DEBUG_VALUE insns. // Also, if we indeed need to convert conditional branch like JEQ to // JEQ + JMPL, we actually introduced some new insns like below. CurrNumInsns += (int)MBB.size(); SoFarNumInsns[&MBB] = CurrNumInsns; if (PrevBB != nullptr) FollowThroughBB[PrevBB] = &MBB; PrevBB = &MBB; // A list of original BBs to make later traveral easier. MBBs.push_back(&MBB); } FollowThroughBB[PrevBB] = nullptr; for (unsigned i = 0; i < MBBs.size(); i++) { // We have four cases here: // (1). no terminator, simple follow through. // (2). jmp to another bb. // (3). conditional jmp to another bb or follow through. // (4). conditional jmp followed by an unconditional jmp. MachineInstr *CondJmp = nullptr, *UncondJmp = nullptr; MachineBasicBlock *MBB = MBBs[i]; for (MachineInstr &Term : MBB->terminators()) { if (Term.isConditionalBranch()) { assert(CondJmp == nullptr); CondJmp = &Term; } else if (Term.isUnconditionalBranch()) { assert(UncondJmp == nullptr); UncondJmp = &Term; } } // (1). no terminator, simple follow through. if (!CondJmp && !UncondJmp) continue; MachineBasicBlock *CondTargetBB, *JmpBB; CurrNumInsns = SoFarNumInsns[MBB]; // (2). jmp to another bb. if (!CondJmp && UncondJmp) { JmpBB = UncondJmp->getOperand(0).getMBB(); if (in16BitRange(SoFarNumInsns[JmpBB] - JmpBB->size() - CurrNumInsns)) continue; // replace this insn as a JMPL. BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB); UncondJmp->eraseFromParent(); Changed = true; continue; } const BasicBlock *TermBB = MBB->getBasicBlock(); int Dist; // (3). conditional jmp to another bb or follow through. if (!UncondJmp) { CondTargetBB = CondJmp->getOperand(2).getMBB(); MachineBasicBlock *FollowBB = FollowThroughBB[MBB]; Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns; if (in16BitRange(Dist)) continue; // We have // B2: ... // if (cond) goto B5 // B3: ... // where B2 -> B5 is beyond 16bit range. // // We do not have 32bit cond jmp insn. So we try to do // the following. // B2: ... // if (cond) goto New_B1 // New_B0 goto B3 // New_B1: gotol B5 // B3: ... // Basically two new basic blocks are created. MachineBasicBlock *New_B0 = MF->CreateMachineBasicBlock(TermBB); MachineBasicBlock *New_B1 = MF->CreateMachineBasicBlock(TermBB); // Insert New_B0 and New_B1 into function block list. MachineFunction::iterator MBB_I = ++MBB->getIterator(); MF->insert(MBB_I, New_B0); MF->insert(MBB_I, New_B1); // replace B2 cond jump if (CondJmp->getOperand(1).isReg()) BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode())) .addReg(CondJmp->getOperand(0).getReg()) .addReg(CondJmp->getOperand(1).getReg()) .addMBB(New_B1); else BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode())) .addReg(CondJmp->getOperand(0).getReg()) .addImm(CondJmp->getOperand(1).getImm()) .addMBB(New_B1); // it is possible that CondTargetBB and FollowBB are the same. But the // above Dist checking should already filtered this case. MBB->removeSuccessor(CondTargetBB); MBB->removeSuccessor(FollowBB); MBB->addSuccessor(New_B0); MBB->addSuccessor(New_B1); // Populate insns in New_B0 and New_B1. BuildMI(New_B0, CondJmp->getDebugLoc(), TII->get(BPF::JMP)).addMBB(FollowBB); BuildMI(New_B1, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)) .addMBB(CondTargetBB); New_B0->addSuccessor(FollowBB); New_B1->addSuccessor(CondTargetBB); CondJmp->eraseFromParent(); Changed = true; continue; } // (4). conditional jmp followed by an unconditional jmp. CondTargetBB = CondJmp->getOperand(2).getMBB(); JmpBB = UncondJmp->getOperand(0).getMBB(); // We have // B2: ... // if (cond) goto B5 // JMP B7 // B3: ... // // If only B2->B5 is out of 16bit range, we can do // B2: ... // if (cond) goto new_B // JMP B7 // New_B: gotol B5 // B3: ... // // If only 'JMP B7' is out of 16bit range, we can replace // 'JMP B7' with 'JMPL B7'. // // If both B2->B5 and 'JMP B7' is out of range, just do // both the above transformations. Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns; if (!in16BitRange(Dist)) { MachineBasicBlock *New_B = MF->CreateMachineBasicBlock(TermBB); // Insert New_B0 into function block list. MF->insert(++MBB->getIterator(), New_B); // replace B2 cond jump if (CondJmp->getOperand(1).isReg()) BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode())) .addReg(CondJmp->getOperand(0).getReg()) .addReg(CondJmp->getOperand(1).getReg()) .addMBB(New_B); else BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode())) .addReg(CondJmp->getOperand(0).getReg()) .addImm(CondJmp->getOperand(1).getImm()) .addMBB(New_B); if (CondTargetBB != JmpBB) MBB->removeSuccessor(CondTargetBB); MBB->addSuccessor(New_B); // Populate insn in New_B. BuildMI(New_B, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(CondTargetBB); New_B->addSuccessor(CondTargetBB); CondJmp->eraseFromParent(); Changed = true; } if (!in16BitRange(SoFarNumInsns[JmpBB] - CurrNumInsns)) { BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB); UncondJmp->eraseFromParent(); Changed = true; } } return Changed; } } // end default namespace INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole", "BPF PreEmit Peephole Optimization", false, false) char BPFMIPreEmitPeephole::ID = 0; FunctionPass* llvm::createBPFMIPreEmitPeepholePass() { return new BPFMIPreEmitPeephole(); }