diff options
author | Thomas Lively <tlively@google.com> | 2020-01-14 14:22:49 -0800 |
---|---|---|
committer | Thomas Lively <tlively@google.com> | 2020-02-18 14:56:09 -0800 |
commit | 52861809994c9199ceb45b98d982ab736a376e67 (patch) | |
tree | b96ce700921b7df80eb394cfd8b2a8b0bd320a90 /llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp | |
parent | b91d9ec0bb8caedcdd1ddf0506fc19d6c55efae3 (diff) | |
download | llvm-52861809994c9199ceb45b98d982ab736a376e67.zip llvm-52861809994c9199ceb45b98d982ab736a376e67.tar.gz llvm-52861809994c9199ceb45b98d982ab736a376e67.tar.bz2 |
[WebAssembly] Fix RegStackify and ExplicitLocals to handle multivalue
Summary:
There is still room for improvement in the handling of multivalue
nodes in both passes, but the current algorithm is at least correct
and optimizes some simpler cases. In order to make future
optimizations of these passes easier and build confidence that the
current algorithms are correct, this CL also adds a script that
automatically and exhaustively generates interesting multivalue test
cases.
Reviewers: aheejin, dschuff
Subscribers: sbc100, jgravelle-google, hiraditya, sunfish, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D72902
Diffstat (limited to 'llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp')
-rw-r--r-- | llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp | 151 |
1 files changed, 106 insertions, 45 deletions
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index e490344..ad4a95c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -36,6 +36,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include <iterator> using namespace llvm; #define DEBUG_TYPE "wasm-reg-stackify" @@ -120,6 +121,7 @@ static void convertImplicitDefToConstZero(MachineInstr *MI, Type::getDoubleTy(MF.getFunction().getContext()))); MI->addOperand(MachineOperand::CreateFPImm(Val)); } else if (RegClass == &WebAssembly::V128RegClass) { + // TODO: Replace this with v128.const 0 once that is supported in V8 Register TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); MI->setDesc(TII->get(WebAssembly::SPLAT_v4i32)); MI->addOperand(MachineOperand::CreateReg(TempReg, false)); @@ -312,25 +314,59 @@ static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI, // walking the block. // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be // more precise. -static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, - AliasAnalysis &AA, const MachineRegisterInfo &MRI) { - assert(Def->getParent() == Insert->getParent()); +static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, + const MachineInstr *Insert, AliasAnalysis &AA, + const WebAssemblyFunctionInfo &MFI, + const MachineRegisterInfo &MRI) { + const MachineInstr *DefI = Def->getParent(); + const MachineInstr *UseI = Use->getParent(); + assert(DefI->getParent() == Insert->getParent()); + assert(UseI->getParent() == Insert->getParent()); + + // The first def of a multivalue instruction can be stackified by moving, + // since the later defs can always be placed into locals if necessary. Later + // defs can only be stackified if all previous defs are already stackified + // since ExplicitLocals will not know how to place a def in a local if a + // subsequent def is stackified. But only one def can be stackified by moving + // the instruction, so it must be the first one. + // + // TODO: This could be loosened to be the first *live* def, but care would + // have to be taken to ensure the drops of the initial dead defs can be + // placed. This would require checking that no previous defs are used in the + // same instruction as subsequent defs. + if (Def != DefI->defs().begin()) + return false; + + // If any subsequent def is used prior to the current value by the same + // instruction in which the current value is used, we cannot + // stackify. Stackifying in this case would require that def moving below the + // current def in the stack, which cannot be achieved, even with locals. + for (const auto &SubsequentDef : drop_begin(DefI->defs(), 1)) { + for (const auto &PriorUse : UseI->uses()) { + if (&PriorUse == Use) + break; + if (PriorUse.isReg() && SubsequentDef.getReg() == PriorUse.getReg()) + return false; + } + } + + // If moving is a semantic nop, it is always allowed + const MachineBasicBlock *MBB = DefI->getParent(); + auto NextI = std::next(MachineBasicBlock::const_iterator(DefI)); + for (auto E = MBB->end(); NextI != E && NextI->isDebugInstr(); ++NextI) + ; + if (NextI == Insert) + return true; // 'catch' and 'extract_exception' should be the first instruction of a BB and // cannot move. - if (Def->getOpcode() == WebAssembly::CATCH || - Def->getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32) { - const MachineBasicBlock *MBB = Def->getParent(); - auto NextI = std::next(MachineBasicBlock::const_iterator(Def)); - for (auto E = MBB->end(); NextI != E && NextI->isDebugInstr(); ++NextI) - ; - if (NextI != Insert) - return false; - } + if (DefI->getOpcode() == WebAssembly::CATCH || + DefI->getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32) + return false; // Check for register dependencies. SmallVector<unsigned, 4> MutableRegisters; - for (const MachineOperand &MO : Def->operands()) { + for (const MachineOperand &MO : DefI->operands()) { if (!MO.isReg() || MO.isUndef()) continue; Register Reg = MO.getReg(); @@ -360,7 +396,7 @@ static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, } bool Read = false, Write = false, Effects = false, StackPointer = false; - query(*Def, AA, Read, Write, Effects, StackPointer); + query(*DefI, AA, Read, Write, Effects, StackPointer); // If the instruction does not access memory and has no side effects, it has // no additional dependencies. @@ -368,8 +404,8 @@ static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, if (!Read && !Write && !Effects && !StackPointer && !HasMutableRegisters) return true; - // Scan through the intervening instructions between Def and Insert. - MachineBasicBlock::const_iterator D(Def), I(Insert); + // Scan through the intervening instructions between DefI and Insert. + MachineBasicBlock::const_iterator D(DefI), I(Insert); for (--I; I != D; --I) { bool InterveningRead = false; bool InterveningWrite = false; @@ -800,32 +836,32 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { CommutingState Commuting; TreeWalkerState TreeWalker(Insert); while (!TreeWalker.done()) { - MachineOperand &Op = TreeWalker.pop(); + MachineOperand &Use = TreeWalker.pop(); // We're only interested in explicit virtual register operands. - if (!Op.isReg()) + if (!Use.isReg()) continue; - Register Reg = Op.getReg(); - assert(Op.isUse() && "explicit_uses() should only iterate over uses"); - assert(!Op.isImplicit() && + Register Reg = Use.getReg(); + assert(Use.isUse() && "explicit_uses() should only iterate over uses"); + assert(!Use.isImplicit() && "explicit_uses() should only iterate over explicit operands"); if (Register::isPhysicalRegister(Reg)) continue; // Identify the definition for this register at this point. - MachineInstr *Def = getVRegDef(Reg, Insert, MRI, LIS); - if (!Def) + MachineInstr *DefI = getVRegDef(Reg, Insert, MRI, LIS); + if (!DefI) continue; // Don't nest an INLINE_ASM def into anything, because we don't have // constraints for $pop outputs. - if (Def->isInlineAsm()) + if (DefI->isInlineAsm()) continue; // Argument instructions represent live-in registers and not real // instructions. - if (WebAssembly::isArgument(Def->getOpcode())) + if (WebAssembly::isArgument(DefI->getOpcode())) continue; // Currently catch's return value register cannot be stackified, because @@ -842,34 +878,38 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { // register should be assigned to a local to be propagated across // 'block' boundary now. // - // TODO Fix this once we support the multi-value proposal. - if (Def->getOpcode() == WebAssembly::CATCH) + // TODO: Fix this once we support the multivalue blocks + if (DefI->getOpcode() == WebAssembly::CATCH) continue; + MachineOperand *Def = DefI->findRegisterDefOperand(Reg); + assert(Def != nullptr); + // Decide which strategy to take. Prefer to move a single-use value // over cloning it, and prefer cloning over introducing a tee. // For moving, we require the def to be in the same block as the use; // this makes things simpler (LiveIntervals' handleMove function only // supports intra-block moves) and it's MachineSink's job to catch all // the sinking opportunities anyway. - bool SameBlock = Def->getParent() == &MBB; - bool CanMove = SameBlock && isSafeToMove(Def, Insert, AA, MRI) && + bool SameBlock = DefI->getParent() == &MBB; + bool CanMove = SameBlock && + isSafeToMove(Def, &Use, Insert, AA, MFI, MRI) && !TreeWalker.isOnStack(Reg); - if (CanMove && hasOneUse(Reg, Def, MRI, MDT, LIS)) { - Insert = moveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI); + if (CanMove && hasOneUse(Reg, DefI, MRI, MDT, LIS)) { + Insert = moveForSingleUse(Reg, Use, DefI, MBB, Insert, LIS, MFI, MRI); // If we are removing the frame base reg completely, remove the debug // info as well. // TODO: Encode this properly as a stackified value. if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg) MFI.clearFrameBaseVreg(); - } else if (shouldRematerialize(*Def, AA, TII)) { + } else if (shouldRematerialize(*DefI, AA, TII)) { Insert = - rematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(), + rematerializeCheapDef(Reg, Use, *DefI, MBB, Insert->getIterator(), LIS, MFI, MRI, TII, TRI); - } else if (CanMove && - oneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) { - Insert = moveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI, + } else if (CanMove && oneUseDominatesOtherUses(Reg, Use, MBB, MRI, MDT, + LIS, MFI)) { + Insert = moveAndTeeForMultiUse(Reg, Use, DefI, MBB, Insert, LIS, MFI, MRI, TII); } else { // We failed to stackify the operand. If the problem was ordering @@ -880,6 +920,25 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { continue; } + // Stackifying a multivalue def may unlock in-place stackification of + // subsequent defs. TODO: Handle the case where the consecutive uses are + // not all in the same instruction. + auto *SubsequentDef = DefI->defs().begin(); + auto *SubsequentUse = &Use; + while (SubsequentDef != DefI->defs().end() && + SubsequentUse != Use.getParent()->uses().end()) { + if (!SubsequentDef->isReg() || !SubsequentUse->isReg()) + break; + unsigned DefReg = SubsequentDef->getReg(); + unsigned UseReg = SubsequentUse->getReg(); + // TODO: This single-use restriction could be relaxed by using tees + if (DefReg != UseReg || !MRI.hasOneUse(DefReg)) + break; + MFI.stackifyVReg(DefReg); + ++SubsequentDef; + ++SubsequentUse; + } + // If the instruction we just stackified is an IMPLICIT_DEF, convert it // to a constant 0 so that the def is explicit, and the push/pop // correspondence is maintained. @@ -917,18 +976,20 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { for (MachineInstr &MI : MBB) { if (MI.isDebugInstr()) continue; - for (MachineOperand &MO : reverse(MI.explicit_operands())) { + for (MachineOperand &MO : reverse(MI.explicit_uses())) { if (!MO.isReg()) continue; Register Reg = MO.getReg(); - - if (MFI.isVRegStackified(Reg)) { - if (MO.isDef()) - Stack.push_back(Reg); - else - assert(Stack.pop_back_val() == Reg && - "Register stack pop should be paired with a push"); - } + if (MFI.isVRegStackified(Reg)) + assert(Stack.pop_back_val() == Reg && + "Register stack pop should be paired with a push"); + } + for (MachineOperand &MO : MI.defs()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (MFI.isVRegStackified(Reg)) + Stack.push_back(MO.getReg()); } } // TODO: Generalize this code to support keeping values on the stack across |