aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp59
-rw-r--r--llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineDebugify.cpp18
-rw-r--r--llvm/lib/CodeGen/MachineFunctionAnalysis.cpp6
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp5
-rw-r--r--llvm/lib/IR/DebugInfo.cpp25
-rw-r--r--llvm/lib/IR/DebugInfoMetadata.cpp9
-rw-r--r--llvm/lib/Object/IRSymtab.cpp45
-rw-r--r--llvm/lib/Passes/PassRegistry.def1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp108
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.h23
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp234
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp13
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h2
-rw-r--r--llvm/lib/Target/AMDGPU/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp19
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp148
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp113
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h11
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.h6
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td5
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp7
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp122
-rw-r--r--llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp22
-rw-r--r--llvm/lib/Target/X86/X86CallingConv.cpp2
-rw-r--r--llvm/lib/TargetParser/X86TargetParser.cpp6
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroFrame.cpp52
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroInternal.h5
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroSplit.cpp23
-rw-r--r--llvm/lib/Transforms/Coroutines/SpillUtils.cpp6
-rw-r--r--llvm/lib/Transforms/IPO/MergeFunctions.cpp19
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h3
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp78
-rw-r--r--llvm/lib/Transforms/Scalar/ConstraintElimination.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp13
-rw-r--r--llvm/lib/Transforms/Scalar/JumpThreading.cpp39
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInterchange.cpp24
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp179
-rw-r--r--llvm/lib/Transforms/Utils/CodeExtractor.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/Debugify.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp103
-rw-r--r--llvm/lib/Transforms/Utils/LoopRotationUtils.cpp23
-rw-r--r--llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp49
-rw-r--r--llvm/lib/Transforms/Utils/SSAUpdater.cpp22
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp30
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp39
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp9
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanAnalysis.h6
53 files changed, 730 insertions, 1038 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index d9d41f1..dc81843 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -445,7 +445,6 @@ private:
bool optimizeSwitchInst(SwitchInst *SI);
bool optimizeExtractElementInst(Instruction *Inst);
bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
- bool fixupDbgValue(Instruction *I);
bool fixupDbgVariableRecord(DbgVariableRecord &I);
bool fixupDbgVariableRecordsOnInst(Instruction &I);
bool placeDbgValues(Function &F);
@@ -2762,9 +2761,6 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
case Intrinsic::fshl:
case Intrinsic::fshr:
return optimizeFunnelShift(II);
- case Intrinsic::dbg_assign:
- case Intrinsic::dbg_value:
- return fixupDbgValue(II);
case Intrinsic::masked_gather:
return optimizeGatherScatterInst(II, II->getArgOperand(0));
case Intrinsic::masked_scatter:
@@ -3554,8 +3550,6 @@ class TypePromotionTransaction {
/// Keep track of the original uses (pair Instruction, Index).
SmallVector<InstructionAndIdx, 4> OriginalUses;
/// Keep track of the debug users.
- SmallVector<DbgValueInst *, 1> DbgValues;
- /// And non-instruction debug-users too.
SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
/// Keep track of the new value so that we can undo it by replacing
@@ -3577,7 +3571,9 @@ class TypePromotionTransaction {
}
// Record the debug uses separately. They are not in the instruction's
// use list, but they are replaced by RAUW.
+ SmallVector<DbgValueInst *> DbgValues;
findDbgValues(DbgValues, Inst, &DbgVariableRecords);
+ assert(DbgValues.empty());
// Now, we can replace the uses.
Inst->replaceAllUsesWith(New);
@@ -3591,11 +3587,7 @@ class TypePromotionTransaction {
// RAUW has replaced all original uses with references to the new value,
// including the debug uses. Since we are undoing the replacements,
// the original debug uses must also be reinstated to maintain the
- // correctness and utility of debug value instructions.
- for (auto *DVI : DbgValues)
- DVI->replaceVariableLocationOp(New, Inst);
- // Similar story with DbgVariableRecords, the non-instruction
- // representation of dbg.values.
+ // correctness and utility of debug value records.
for (DbgVariableRecord *DVR : DbgVariableRecords)
DVR->replaceVariableLocationOp(New, Inst);
}
@@ -8933,32 +8925,6 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
return MadeChange;
}
-// Some CGP optimizations may move or alter what's computed in a block. Check
-// whether a dbg.value intrinsic could be pointed at a more appropriate operand.
-bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
- assert(isa<DbgValueInst>(I));
- DbgValueInst &DVI = *cast<DbgValueInst>(I);
-
- // Does this dbg.value refer to a sunk address calculation?
- bool AnyChange = false;
- SmallDenseSet<Value *> LocationOps(DVI.location_ops().begin(),
- DVI.location_ops().end());
- for (Value *Location : LocationOps) {
- WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
- Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
- if (SunkAddr) {
- // Point dbg.value at locally computed address, which should give the best
- // opportunity to be accurately lowered. This update may change the type
- // of pointer being referred to; however this makes no difference to
- // debugging information, and we can't generate bitcasts that may affect
- // codegen.
- DVI.replaceVariableLocationOp(Location, SunkAddr);
- AnyChange = true;
- }
- }
- return AnyChange;
-}
-
bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
bool AnyChange = false;
for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
@@ -8993,14 +8959,6 @@ bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
return AnyChange;
}
-static void DbgInserterHelper(DbgValueInst *DVI, BasicBlock::iterator VI) {
- DVI->removeFromParent();
- if (isa<PHINode>(VI))
- DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
- else
- DVI->insertAfter(VI);
-}
-
static void DbgInserterHelper(DbgVariableRecord *DVR, BasicBlock::iterator VI) {
DVR->removeFromParent();
BasicBlock *VIBB = VI->getParent();
@@ -9065,15 +9023,8 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
for (BasicBlock &BB : F) {
for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
- // Process dbg.value intrinsics.
- DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
- if (DVI) {
- DbgProcessor(DVI, DVI);
- continue;
- }
-
- // If this isn't a dbg.value, process any attached DbgVariableRecord
- // records attached to this instruction.
+ // Process any DbgVariableRecord records attached to this
+ // instruction.
for (DbgVariableRecord &DVR : llvm::make_early_inc_range(
filterDbgVars(Insn.getDbgRecordRange()))) {
if (DVR.Type != DbgVariableRecord::LocationType::Value)
diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 9daacfd..e7fa082 100644
--- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -202,8 +202,8 @@ MachineBlockFrequencyInfo::MachineBlockFrequencyInfo(
MachineBlockFrequencyInfo &&) = default;
MachineBlockFrequencyInfo::MachineBlockFrequencyInfo(
- MachineFunction &F, MachineBranchProbabilityInfo &MBPI,
- MachineLoopInfo &MLI) {
+ const MachineFunction &F, const MachineBranchProbabilityInfo &MBPI,
+ const MachineLoopInfo &MLI) {
calculate(F, MBPI, MLI);
}
diff --git a/llvm/lib/CodeGen/MachineDebugify.cpp b/llvm/lib/CodeGen/MachineDebugify.cpp
index 9b9cebc..1a20fe5 100644
--- a/llvm/lib/CodeGen/MachineDebugify.cpp
+++ b/llvm/lib/CodeGen/MachineDebugify.cpp
@@ -63,24 +63,9 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
// which cover a wide range of lines can help stress the debug info passes:
// if we can't do that, fall back to using the local variable which precedes
// all the others.
- Function *DbgValF = M.getFunction("llvm.dbg.value");
- DbgValueInst *EarliestDVI = nullptr;
DbgVariableRecord *EarliestDVR = nullptr;
DenseMap<unsigned, DILocalVariable *> Line2Var;
DIExpression *Expr = nullptr;
- if (DbgValF) {
- for (const Use &U : DbgValF->uses()) {
- auto *DVI = dyn_cast<DbgValueInst>(U.getUser());
- if (!DVI || DVI->getFunction() != &F)
- continue;
- unsigned Line = DVI->getDebugLoc().getLine();
- assert(Line != 0 && "debugify should not insert line 0 locations");
- Line2Var[Line] = DVI->getVariable();
- if (!EarliestDVI || Line < EarliestDVI->getDebugLoc().getLine())
- EarliestDVI = DVI;
- Expr = DVI->getExpression();
- }
- }
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) {
@@ -125,8 +110,7 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
unsigned Line = MI.getDebugLoc().getLine();
auto It = Line2Var.find(Line);
if (It == Line2Var.end()) {
- Line = EarliestDVI ? EarliestDVI->getDebugLoc().getLine()
- : EarliestDVR->getDebugLoc().getLine();
+ Line = EarliestDVR->getDebugLoc().getLine();
It = Line2Var.find(Line);
assert(It != Line2Var.end());
}
diff --git a/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
index e7a4d6d6..116a919 100644
--- a/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -45,3 +45,9 @@ MachineFunctionAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
return Result(std::move(MF));
}
+
+PreservedAnalyses FreeMachineFunctionPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ FAM.clearAnalysis<MachineFunctionAnalysis>(F);
+ return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 682d93d..5453828 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5569,6 +5569,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::BUILD_VECTOR:
case ISD::BUILD_PAIR:
case ISD::SPLAT_VECTOR:
+ case ISD::FABS:
return false;
case ISD::ABS:
@@ -6750,7 +6751,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
return SDValue();
int64_t Offset = C2->getSExtValue();
switch (Opcode) {
- case ISD::ADD: break;
+ case ISD::ADD:
+ case ISD::PTRADD:
+ break;
case ISD::SUB: Offset = -uint64_t(Offset); break;
default: return SDValue();
}
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index 84a56058..8fb33c3 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -2288,39 +2288,36 @@ bool AssignmentTrackingPass::runOnFunction(Function &F) {
// Collect a map of {backing storage : dbg.declares} (currently "backing
// storage" is limited to Allocas). We'll use this to find dbg.declares to
// delete after running `trackAssignments`.
- DenseMap<const AllocaInst *, SmallPtrSet<DbgDeclareInst *, 2>> DbgDeclares;
DenseMap<const AllocaInst *, SmallPtrSet<DbgVariableRecord *, 2>> DVRDeclares;
// Create another similar map of {storage : variables} that we'll pass to
// trackAssignments.
StorageToVarsMap Vars;
- auto ProcessDeclare = [&](auto *Declare, auto &DeclareList) {
+ auto ProcessDeclare = [&](DbgVariableRecord &Declare) {
// FIXME: trackAssignments doesn't let you specify any modifiers to the
// variable (e.g. fragment) or location (e.g. offset), so we have to
// leave dbg.declares with non-empty expressions in place.
- if (Declare->getExpression()->getNumElements() != 0)
+ if (Declare.getExpression()->getNumElements() != 0)
return;
- if (!Declare->getAddress())
+ if (!Declare.getAddress())
return;
if (AllocaInst *Alloca =
- dyn_cast<AllocaInst>(Declare->getAddress()->stripPointerCasts())) {
+ dyn_cast<AllocaInst>(Declare.getAddress()->stripPointerCasts())) {
// FIXME: Skip VLAs for now (let these variables use dbg.declares).
if (!Alloca->isStaticAlloca())
return;
// Similarly, skip scalable vectors (use dbg.declares instead).
if (auto Sz = Alloca->getAllocationSize(*DL); Sz && Sz->isScalable())
return;
- DeclareList[Alloca].insert(Declare);
- Vars[Alloca].insert(VarRecord(Declare));
+ DVRDeclares[Alloca].insert(&Declare);
+ Vars[Alloca].insert(VarRecord(&Declare));
}
};
for (auto &BB : F) {
for (auto &I : BB) {
for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) {
if (DVR.isDbgDeclare())
- ProcessDeclare(&DVR, DVRDeclares);
+ ProcessDeclare(DVR);
}
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(&I))
- ProcessDeclare(DDI, DbgDeclares);
}
}
@@ -2336,8 +2333,8 @@ bool AssignmentTrackingPass::runOnFunction(Function &F) {
trackAssignments(F.begin(), F.end(), Vars, *DL);
// Delete dbg.declares for variables now tracked with assignment tracking.
- auto DeleteSubsumedDeclare = [&](const auto &Markers, auto &Declares) {
- (void)Markers;
+ for (auto &[Insts, Declares] : DVRDeclares) {
+ auto Markers = at::getDVRAssignmentMarkers(Insts);
for (auto *Declare : Declares) {
// Assert that the alloca that Declare uses is now linked to a dbg.assign
// describing the same variable (i.e. check that this dbg.declare has
@@ -2356,10 +2353,6 @@ bool AssignmentTrackingPass::runOnFunction(Function &F) {
Changed = true;
}
};
- for (auto &P : DbgDeclares)
- DeleteSubsumedDeclare(at::getAssignmentMarkers(P.first), P.second);
- for (auto &P : DVRDeclares)
- DeleteSubsumedDeclare(at::getDVRAssignmentMarkers(P.first), P.second);
return Changed;
}
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index 2270923..f16963d 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -49,20 +49,11 @@ uint32_t DIType::getAlignInBits() const {
const DIExpression::FragmentInfo DebugVariable::DefaultFragment = {
std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::min()};
-DebugVariable::DebugVariable(const DbgVariableIntrinsic *DII)
- : Variable(DII->getVariable()),
- Fragment(DII->getExpression()->getFragmentInfo()),
- InlinedAt(DII->getDebugLoc().getInlinedAt()) {}
-
DebugVariable::DebugVariable(const DbgVariableRecord *DVR)
: Variable(DVR->getVariable()),
Fragment(DVR->getExpression()->getFragmentInfo()),
InlinedAt(DVR->getDebugLoc().getInlinedAt()) {}
-DebugVariableAggregate::DebugVariableAggregate(const DbgVariableIntrinsic *DVI)
- : DebugVariable(DVI->getVariable(), std::nullopt,
- DVI->getDebugLoc()->getInlinedAt()) {}
-
DILocation::DILocation(LLVMContext &C, StorageType Storage, unsigned Line,
unsigned Column, uint64_t AtomGroup, uint8_t AtomRank,
ArrayRef<Metadata *> MDs, bool ImplicitCode)
diff --git a/llvm/lib/Object/IRSymtab.cpp b/llvm/lib/Object/IRSymtab.cpp
index 79eeb08..2579fa3 100644
--- a/llvm/lib/Object/IRSymtab.cpp
+++ b/llvm/lib/Object/IRSymtab.cpp
@@ -54,11 +54,6 @@ static const char *PreservedSymbols[] = {
"__stack_chk_guard",
};
-static bool isPreservedGlobalVarName(StringRef Name) {
- return StringRef(PreservedSymbols[0]) == Name ||
- StringRef(PreservedSymbols[1]) == Name;
-}
-
namespace {
const char *getExpectedProducerName() {
@@ -86,16 +81,12 @@ struct Builder {
// The StringTableBuilder does not create a copy of any strings added to it,
// so this provides somewhere to store any strings that we create.
Builder(SmallVector<char, 0> &Symtab, StringTableBuilder &StrtabBuilder,
- BumpPtrAllocator &Alloc, const Triple &TT)
- : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc), TT(TT),
- Libcalls(TT) {}
+ BumpPtrAllocator &Alloc)
+ : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {}
DenseMap<const Comdat *, int> ComdatMap;
Mangler Mang;
- const Triple &TT;
-
- // FIXME: This shouldn't be here.
- RTLIB::RuntimeLibcallsInfo Libcalls;
+ Triple TT;
std::vector<storage::Comdat> Comdats;
std::vector<storage::Module> Mods;
@@ -107,10 +98,6 @@ struct Builder {
std::vector<storage::Str> DependentLibraries;
- bool isPreservedLibFuncName(StringRef Name) {
- return Libcalls.getSupportedLibcallImpl(Name) != RTLIB::Unsupported;
- }
-
void setStr(storage::Str &S, StringRef Value) {
S.Offset = StrtabBuilder.add(Value);
S.Size = Value.size();
@@ -226,6 +213,18 @@ Expected<int> Builder::getComdatIndex(const Comdat *C, const Module *M) {
return P.first->second;
}
+static DenseSet<StringRef> buildPreservedSymbolsSet(const Triple &TT) {
+ DenseSet<StringRef> PreservedSymbolSet(std::begin(PreservedSymbols),
+ std::end(PreservedSymbols));
+ // FIXME: Do we need to pass in ABI fields from TargetOptions?
+ RTLIB::RuntimeLibcallsInfo Libcalls(TT);
+ for (RTLIB::LibcallImpl Impl : Libcalls.getLibcallImpls()) {
+ if (Impl != RTLIB::Unsupported)
+ PreservedSymbolSet.insert(Libcalls.getLibcallImplName(Impl));
+ }
+ return PreservedSymbolSet;
+}
+
Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
const SmallPtrSet<GlobalValue *, 4> &Used,
ModuleSymbolTable::Symbol Msym) {
@@ -279,11 +278,13 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
return Error::success();
}
- StringRef GVName = GV->getName();
- setStr(Sym.IRName, GVName);
+ setStr(Sym.IRName, GV->getName());
+
+ static const DenseSet<StringRef> PreservedSymbolsSet =
+ buildPreservedSymbolsSet(GV->getParent()->getTargetTriple());
+ bool IsPreservedSymbol = PreservedSymbolsSet.contains(GV->getName());
- if (Used.count(GV) || isPreservedLibFuncName(GVName) ||
- isPreservedGlobalVarName(GVName))
+ if (Used.count(GV) || IsPreservedSymbol)
Sym.Flags |= 1 << storage::Symbol::FB_used;
if (GV->isThreadLocal())
Sym.Flags |= 1 << storage::Symbol::FB_tls;
@@ -350,6 +351,7 @@ Error Builder::build(ArrayRef<Module *> IRMods) {
setStr(Hdr.Producer, kExpectedProducerName);
setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple().str());
setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName());
+ TT = IRMods[0]->getTargetTriple();
for (auto *M : IRMods)
if (Error Err = addModule(M))
@@ -375,8 +377,7 @@ Error Builder::build(ArrayRef<Module *> IRMods) {
Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,
StringTableBuilder &StrtabBuilder,
BumpPtrAllocator &Alloc) {
- const Triple &TT = Mods[0]->getTargetTriple();
- return Builder(Symtab, StrtabBuilder, Alloc, TT).build(Mods);
+ return Builder(Symtab, StrtabBuilder, Alloc).build(Mods);
}
// Upgrade a vector of bitcode modules created by an old version of LLVM by
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 9a94315..caa78b6 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -434,6 +434,7 @@ FUNCTION_PASS("extra-vector-passes",
FUNCTION_PASS("fix-irreducible", FixIrreduciblePass())
FUNCTION_PASS("flatten-cfg", FlattenCFGPass())
FUNCTION_PASS("float2int", Float2IntPass())
+FUNCTION_PASS("free-machine-function", FreeMachineFunctionPass())
FUNCTION_PASS("gc-lowering", GCLoweringPass())
FUNCTION_PASS("guard-widening", GuardWideningPass())
FUNCTION_PASS("gvn-hoist", GVNHoistPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 23f106a..007b481 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -153,6 +153,9 @@ private:
const TargetMachine &TM;
};
+void initializeAMDGPUPrepareAGPRAllocLegacyPass(PassRegistry &);
+extern char &AMDGPUPrepareAGPRAllocLegacyID;
+
void initializeAMDGPUReserveWWMRegsLegacyPass(PassRegistry &);
extern char &AMDGPUReserveWWMRegsLegacyID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 250547a..b6c6d92 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -114,6 +114,7 @@ MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUse
MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", AMDGPUPreloadKernArgPrologPass())
+MACHINE_FUNCTION_PASS("amdgpu-prepare-agpr-alloc", AMDGPUPrepareAGPRAllocPass())
MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
MACHINE_FUNCTION_PASS("amdgpu-wait-sgpr-hazards", AMDGPUWaitSGPRHazardsPass())
MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp
new file mode 100644
index 0000000..3b06e9b
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp
@@ -0,0 +1,108 @@
+//===-- AMDGPUPrepareAGPRAlloc.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Make simple transformations to relax register constraints for cases which can
+// allocate to AGPRs or VGPRs. Replace materialize of inline immediates into
+// AGPR or VGPR with a pseudo with an AV_* class register constraint. This
+// allows later passes to inflate the register class if necessary. The register
+// allocator does not know to replace instructions to relax constraints.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUPrepareAGPRAlloc.h"
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-prepare-agpr-alloc"
+
+namespace {
+
+class AMDGPUPrepareAGPRAllocImpl {
+private:
+ const SIInstrInfo &TII;
+ MachineRegisterInfo &MRI;
+
+public:
+ AMDGPUPrepareAGPRAllocImpl(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
+ : TII(*ST.getInstrInfo()), MRI(MRI) {}
+ bool run(MachineFunction &MF);
+};
+
+class AMDGPUPrepareAGPRAllocLegacy : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AMDGPUPrepareAGPRAllocLegacy() : MachineFunctionPass(ID) {
+ initializeAMDGPUPrepareAGPRAllocLegacyPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return "AMDGPU Prepare AGPR Alloc"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(AMDGPUPrepareAGPRAllocLegacy, DEBUG_TYPE,
+ "AMDGPU Prepare AGPR Alloc", false, false)
+INITIALIZE_PASS_END(AMDGPUPrepareAGPRAllocLegacy, DEBUG_TYPE,
+ "AMDGPU Prepare AGPR Alloc", false, false)
+
+char AMDGPUPrepareAGPRAllocLegacy::ID = 0;
+
+char &llvm::AMDGPUPrepareAGPRAllocLegacyID = AMDGPUPrepareAGPRAllocLegacy::ID;
+
+bool AMDGPUPrepareAGPRAllocLegacy::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ return AMDGPUPrepareAGPRAllocImpl(ST, MF.getRegInfo()).run(MF);
+}
+
+PreservedAnalyses
+AMDGPUPrepareAGPRAllocPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ AMDGPUPrepareAGPRAllocImpl(ST, MF.getRegInfo()).run(MF);
+ return PreservedAnalyses::all();
+}
+
+bool AMDGPUPrepareAGPRAllocImpl::run(MachineFunction &MF) {
+ if (MRI.isReserved(AMDGPU::AGPR0))
+ return false;
+
+ const MCInstrDesc &AVImmPseudo = TII.get(AMDGPU::AV_MOV_B32_IMM_PSEUDO);
+
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if ((MI.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
+ TII.isInlineConstant(MI, 1)) ||
+ (MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
+ MI.getOperand(1).isImm())) {
+ MI.setDesc(AVImmPseudo);
+ Changed = true;
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.h b/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.h
new file mode 100644
index 0000000..dc598c9
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.h
@@ -0,0 +1,23 @@
+//===- AMDGPUPrepareAGPRAlloc.h ---------------------------------*- C++- *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPREPAREAGPRALLOC_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPREPAREAGPRALLOC_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class AMDGPUPrepareAGPRAllocPass
+ : public PassInfoMixin<AMDGPUPrepareAGPRAllocPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUPREPAREAGPRALLOC_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index 46027b8..8101c68 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -167,77 +167,39 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage(
Info.UsesVCC =
MRI.isPhysRegUsed(AMDGPU::VCC_LO) || MRI.isPhysRegUsed(AMDGPU::VCC_HI);
+ Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::SGPR_32RegClass,
+ /*IncludeCalls=*/false);
+ if (ST.hasMAIInsts())
+ Info.NumAGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::AGPR_32RegClass,
+ /*IncludeCalls=*/false);
// If there are no calls, MachineRegisterInfo can tell us the used register
// count easily.
// A tail call isn't considered a call for MachineFrameInfo's purposes.
if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
- Info.NumVGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::VGPR_32RegClass);
- Info.NumExplicitSGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::SGPR_32RegClass);
- if (ST.hasMAIInsts())
- Info.NumAGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::AGPR_32RegClass);
+ Info.NumVGPR = TRI.getNumUsedPhysRegs(MRI, AMDGPU::VGPR_32RegClass,
+ /*IncludeCalls=*/false);
return Info;
}
int32_t MaxVGPR = -1;
- int32_t MaxAGPR = -1;
- int32_t MaxSGPR = -1;
Info.CalleeSegmentSize = 0;
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB) {
- // TODO: Check regmasks? Do they occur anywhere except calls?
- for (const MachineOperand &MO : MI.operands()) {
- unsigned Width = 0;
- bool IsSGPR = false;
- bool IsAGPR = false;
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
switch (Reg) {
- case AMDGPU::EXEC:
- case AMDGPU::EXEC_LO:
- case AMDGPU::EXEC_HI:
- case AMDGPU::SCC:
- case AMDGPU::M0:
- case AMDGPU::M0_LO16:
- case AMDGPU::M0_HI16:
- case AMDGPU::SRC_SHARED_BASE_LO:
- case AMDGPU::SRC_SHARED_BASE:
- case AMDGPU::SRC_SHARED_LIMIT_LO:
- case AMDGPU::SRC_SHARED_LIMIT:
- case AMDGPU::SRC_PRIVATE_BASE_LO:
- case AMDGPU::SRC_PRIVATE_BASE:
- case AMDGPU::SRC_PRIVATE_LIMIT_LO:
- case AMDGPU::SRC_PRIVATE_LIMIT:
- case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
- case AMDGPU::SGPR_NULL:
- case AMDGPU::SGPR_NULL64:
- case AMDGPU::MODE:
- continue;
-
case AMDGPU::NoRegister:
assert(MI.isDebugInstr() &&
"Instruction uses invalid noreg register");
continue;
- case AMDGPU::VCC:
- case AMDGPU::VCC_LO:
- case AMDGPU::VCC_HI:
- case AMDGPU::VCC_LO_LO16:
- case AMDGPU::VCC_LO_HI16:
- case AMDGPU::VCC_HI_LO16:
- case AMDGPU::VCC_HI_HI16:
- Info.UsesVCC = true;
- continue;
-
- case AMDGPU::FLAT_SCR:
- case AMDGPU::FLAT_SCR_LO:
- case AMDGPU::FLAT_SCR_HI:
- continue;
-
case AMDGPU::XNACK_MASK:
case AMDGPU::XNACK_MASK_LO:
case AMDGPU::XNACK_MASK_HI:
@@ -267,170 +229,22 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage(
break;
}
- if (AMDGPU::SGPR_32RegClass.contains(Reg) ||
- AMDGPU::SGPR_LO16RegClass.contains(Reg) ||
- AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
- IsSGPR = true;
- Width = 1;
- } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
- AMDGPU::VGPR_16RegClass.contains(Reg)) {
- IsSGPR = false;
- Width = 1;
- } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
- AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
- IsSGPR = false;
- IsAGPR = true;
- Width = 1;
- } else if (AMDGPU::SGPR_64RegClass.contains(Reg)) {
- IsSGPR = true;
- Width = 2;
- } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
- IsSGPR = false;
- Width = 2;
- } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
- IsSGPR = false;
- IsAGPR = true;
- Width = 2;
- } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
- IsSGPR = false;
- Width = 3;
- } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
- IsSGPR = true;
- Width = 3;
- } else if (AMDGPU::AReg_96RegClass.contains(Reg)) {
- IsSGPR = false;
- IsAGPR = true;
- Width = 3;
- } else if (AMDGPU::SGPR_128RegClass.contains(Reg)) {
- IsSGPR = true;
- Width = 4;
- } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
- IsSGPR = false;
- Width = 4;
- } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
- IsSGPR = false;
- IsAGPR = true;
- Width = 4;
- } else if (AMDGPU::VReg_160RegClass.contains(Reg)) {
- IsSGPR = false;
- Width = 5;
- } else if (AMDGPU::SReg_160RegClass.contains(Reg)) {
- IsSGPR = true;
- Width = 5;
- } else if (AMDGPU::AReg_160RegClass.contains(Reg)) {
- IsSGPR = false;
- IsAGPR = true;
- Width = 5;
- } else if (AMDGPU::VReg_192RegClass.contains(Reg)) {
- IsSGPR = false;
- Width = 6;
- } else if (AMDGPU::SReg_192RegClass.contains(Reg)) {
- IsSGPR = true;
- Width = 6;
- } else if (AMDGPU::AReg_192RegClass.contains(Reg)) {
- IsSGPR = false;
- IsAGPR = true;
- Width = 6;
- } else if (AMDGPU::VReg_224RegClass.contains(Reg)) {
- IsSGPR = false;
- Width = 7;
- } else if (AMDGPU::SReg_224RegClass.contains(Reg)) {
- IsSGPR = true;
- Width = 7;
- } else if (AMDGPU::AReg_224RegClass.contains(Reg)) {
- IsSGPR = false;
- IsAGPR = true;
- Width = 7;
- } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
- IsSGPR = true;
- Width = 8;
- } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
- IsSGPR = false;
- Width = 8;
- } else if (AMDGPU::AReg_256RegClass.contains(Reg)) {
- IsSGPR = false;
- IsAGPR = true;
- Width = 8;
- } else if (AMDGPU::VReg_288RegClass.contains(Reg)) {
- IsSGPR = false;
- Width = 9;
- } else if (AMDGPU::SReg_288RegClass.contains(Reg)) {
- IsSGPR = true;
- Width = 9;
- } else if (AMDGPU::AReg_288RegClass.contains(Reg)) {
- IsSGPR = false;
- IsAGPR = true;
- Width = 9;
- } else if (AMDGPU::VReg_320RegClass.contains(Reg)) {
- IsSGPR = false;
- Width = 10;
- } else if (AMDGPU::SReg_320RegClass.contains(Reg)) {
- IsSGPR = true;
- Width = 10;
- } else if (AMDGPU::AReg_320RegClass.contains(Reg)) {
- IsSGPR = false;
- IsAGPR = true;
- Width = 10;
- } else if (AMDGPU::VReg_352RegClass.contains(Reg)) {
- IsSGPR = false;
- Width = 11;
- } else if (AMDGPU::SReg_352RegClass.contains(Reg)) {
- IsSGPR = true;
- Width = 11;
- } else if (AMDGPU::AReg_352RegClass.contains(Reg)) {
- IsSGPR = false;
- IsAGPR = true;
- Width = 11;
- } else if (AMDGPU::VReg_384RegClass.contains(Reg)) {
- IsSGPR = false;
- Width = 12;
- } else if (AMDGPU::SReg_384RegClass.contains(Reg)) {
- IsSGPR = true;
- Width = 12;
- } else if (AMDGPU::AReg_384RegClass.contains(Reg)) {
- IsSGPR = false;
- IsAGPR = true;
- Width = 12;
- } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
- IsSGPR = true;
- Width = 16;
- } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
- IsSGPR = false;
- Width = 16;
- } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
- IsSGPR = false;
- IsAGPR = true;
- Width = 16;
- } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
- IsSGPR = true;
- Width = 32;
- } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
- IsSGPR = false;
- Width = 32;
- } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
- IsSGPR = false;
- IsAGPR = true;
- Width = 32;
- } else {
- // We only expect TTMP registers or registers that do not belong to
- // any RC.
- assert((AMDGPU::TTMP_32RegClass.contains(Reg) ||
- AMDGPU::TTMP_64RegClass.contains(Reg) ||
- AMDGPU::TTMP_128RegClass.contains(Reg) ||
- AMDGPU::TTMP_256RegClass.contains(Reg) ||
- AMDGPU::TTMP_512RegClass.contains(Reg) ||
- !TRI.getPhysRegBaseClass(Reg)) &&
- "Unknown register class");
- }
+ const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(Reg);
+ assert((!RC || TRI.isVGPRClass(RC) || TRI.isSGPRClass(RC) ||
+ TRI.isAGPRClass(RC) || AMDGPU::TTMP_32RegClass.contains(Reg) ||
+ AMDGPU::TTMP_64RegClass.contains(Reg) ||
+ AMDGPU::TTMP_128RegClass.contains(Reg) ||
+ AMDGPU::TTMP_256RegClass.contains(Reg) ||
+ AMDGPU::TTMP_512RegClass.contains(Reg)) &&
+ "Unknown register class");
+
+ if (!RC || !TRI.isVGPRClass(RC))
+ continue;
+
+ unsigned Width = divideCeil(TRI.getRegSizeInBits(*RC), 32);
unsigned HWReg = TRI.getHWRegIndex(Reg);
int MaxUsed = HWReg + Width - 1;
- if (IsSGPR) {
- MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
- } else if (IsAGPR) {
- MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR;
- } else {
- MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
- }
+ MaxVGPR = std::max(MaxUsed, MaxVGPR);
}
if (MI.isCall()) {
@@ -492,9 +306,7 @@ AMDGPUResourceUsageAnalysisImpl::analyzeResourceUsage(
}
}
- Info.NumExplicitSGPR = MaxSGPR + 1;
Info.NumVGPR = MaxVGPR + 1;
- Info.NumAGPR = MaxAGPR + 1;
return Info;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 31a80e0..c865082 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -25,6 +25,7 @@
#include "AMDGPUMacroFusion.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPUPreloadKernArgProlog.h"
+#include "AMDGPUPrepareAGPRAlloc.h"
#include "AMDGPURemoveIncompatibleFunctions.h"
#include "AMDGPUReserveWWMRegs.h"
#include "AMDGPUResourceUsageAnalysis.h"
@@ -499,6 +500,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeGlobalISel(*PR);
initializeAMDGPUAsmPrinterPass(*PR);
initializeAMDGPUDAGToDAGISelLegacyPass(*PR);
+ initializeAMDGPUPrepareAGPRAllocLegacyPass(*PR);
initializeGCNDPPCombineLegacyPass(*PR);
initializeSILowerI1CopiesLegacyPass(*PR);
initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
@@ -1196,6 +1198,7 @@ public:
bool addRegBankSelect() override;
void addPreGlobalInstructionSelect() override;
bool addGlobalInstructionSelect() override;
+ void addPreRegAlloc() override;
void addFastRegAlloc() override;
void addOptimizedRegAlloc() override;
@@ -1539,6 +1542,11 @@ void GCNPassConfig::addFastRegAlloc() {
TargetPassConfig::addFastRegAlloc();
}
+void GCNPassConfig::addPreRegAlloc() {
+ if (getOptLevel() != CodeGenOptLevel::None)
+ addPass(&AMDGPUPrepareAGPRAllocLegacyID);
+}
+
void GCNPassConfig::addOptimizedRegAlloc() {
if (EnableDCEInRA)
insertPass(&DetectDeadLanesID, &DeadMachineInstructionElimID);
@@ -2235,6 +2243,11 @@ void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc(
Base::addOptimizedRegAlloc(addPass);
}
+void AMDGPUCodeGenPassBuilder::addPreRegAlloc(AddMachinePass &addPass) const {
+ if (getOptLevel() != CodeGenOptLevel::None)
+ addPass(AMDGPUPrepareAGPRAllocPass());
+}
+
Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
AddMachinePass &addPass) const {
// TODO: Check --regalloc-npm option
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 3b2f39c..e0f1296 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -181,7 +181,9 @@ public:
void addMachineSSAOptimization(AddMachinePass &) const;
void addPostRegAlloc(AddMachinePass &) const;
void addPreEmitPass(AddMachinePass &) const;
+ void addPreEmitRegAlloc(AddMachinePass &) const;
Error addRegAssignmentOptimized(AddMachinePass &) const;
+ void addPreRegAlloc(AddMachinePass &) const;
void addOptimizedRegAlloc(AddMachinePass &) const;
void addPreSched2(AddMachinePass &) const;
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index e3519f1..42edec0 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -74,6 +74,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPULowerKernelArguments.cpp
AMDGPULowerKernelAttributes.cpp
AMDGPULowerModuleLDSPass.cpp
+ AMDGPUPrepareAGPRAlloc.cpp
AMDGPUSwLowerLDS.cpp
AMDGPUMachineFunction.cpp
AMDGPUMachineModuleInfo.cpp
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 44d9ef5..f018f77 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -947,13 +947,18 @@ void SIFixSGPRCopies::analyzeVGPRToSGPRCopy(MachineInstr* MI) {
// Copies and REG_SEQUENCE do not contribute to the final assembly
// So, skip them but take care of the SGPR to VGPR copies bookkeeping.
- if (Inst->isCopy() || Inst->isRegSequence()) {
- if (TRI->isVGPR(*MRI, Inst->getOperand(0).getReg())) {
- if (!Inst->isCopy() ||
- !tryChangeVGPRtoSGPRinCopy(*Inst, TRI, TII)) {
- Info.NumSVCopies++;
- continue;
- }
+ if (Inst->isRegSequence() &&
+ TRI->isVGPR(*MRI, Inst->getOperand(0).getReg())) {
+ Info.NumSVCopies++;
+ continue;
+ }
+ if (Inst->isCopy()) {
+ const TargetRegisterClass *SrcRC, *DstRC;
+ std::tie(SrcRC, DstRC) = getCopyRegClasses(*Inst, *TRI, *MRI);
+ if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI) &&
+ !tryChangeVGPRtoSGPRinCopy(*Inst, TRI, TII)) {
+ Info.NumSVCopies++;
+ continue;
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 27212fda..0c76ff2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -46,6 +47,7 @@
#include <optional>
using namespace llvm;
+using namespace llvm::SDPatternMatch;
#define DEBUG_TYPE "si-lower"
@@ -11131,7 +11133,7 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
assert(VT.getSizeInBits() == 64);
SDLoc DL(Op);
- SDValue Cond = Op.getOperand(0);
+ SDValue Cond = DAG.getFreeze(Op.getOperand(0));
SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
SDValue One = DAG.getConstant(1, DL, MVT::i32);
@@ -14561,7 +14563,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG, const SDLoc &SL,
// instead of a tree.
SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
DAGCombinerInfo &DCI) const {
- assert(N->getOpcode() == ISD::ADD);
+ assert(N->isAnyAdd());
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
@@ -14594,7 +14596,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
for (SDNode *User : LHS->users()) {
// There is a use that does not feed into addition, so the multiply can't
// be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
- if (User->getOpcode() != ISD::ADD)
+ if (!User->isAnyAdd())
return SDValue();
// We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
@@ -14706,8 +14708,11 @@ SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
SDValue Hi = getHiHalf64(LHS, DAG);
SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::PTRADD)
+ Opcode = ISD::ADD;
SDValue AddHi =
- DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32, N->getFlags());
+ DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags());
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
@@ -15181,42 +15186,123 @@ SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
+ EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() == ISD::ADD) {
- // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
- // y is not, and (add y, z) is used only once.
- // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
- // z is not, and (add y, z) is used only once.
- // The goal is to move constant offsets to the outermost ptradd, to create
- // more opportunities to fold offsets into memory instructions.
- // Together with the generic combines in DAGCombiner.cpp, this also
- // implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
- //
- // This transform is here instead of in the general DAGCombiner as it can
- // turn in-bounds pointer arithmetic out-of-bounds, which is problematic for
- // AArch64's CPA.
- SDValue X = N0;
- SDValue Y = N1.getOperand(0);
- SDValue Z = N1.getOperand(1);
- if (N1.hasOneUse()) {
- bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
- bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
- if (ZIsConstant != YIsConstant) {
- // If both additions in the original were NUW, the new ones are as well.
- SDNodeFlags Flags =
- (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
- if (YIsConstant)
- std::swap(Y, Z);
+ // The following folds transform PTRADDs into regular arithmetic in cases
+ // where the PTRADD wouldn't be folded as an immediate offset into memory
+ // instructions anyway. They are target-specific in that other targets might
+ // prefer to not lose information about the pointer arithmetic.
+
+ // Fold (ptradd x, shl(0 - v, k)) -> sub(x, shl(v, k)).
+ // Adapted from DAGCombiner::visitADDLikeCommutative.
+ SDValue V, K;
+ if (sd_match(N1, m_Shl(m_Neg(m_Value(V)), m_Value(K)))) {
+ SDNodeFlags ShlFlags = N1->getFlags();
+ // If the original shl is NUW and NSW, the first k+1 bits of 0-v are all 0,
+ // so v is either 0 or the first k+1 bits of v are all 1 -> NSW can be
+ // preserved.
+ SDNodeFlags NewShlFlags =
+ ShlFlags.hasNoUnsignedWrap() && ShlFlags.hasNoSignedWrap()
+ ? SDNodeFlags::NoSignedWrap
+ : SDNodeFlags();
+ SDValue Inner = DAG.getNode(ISD::SHL, DL, VT, V, K, NewShlFlags);
+ DCI.AddToWorklist(Inner.getNode());
+ return DAG.getNode(ISD::SUB, DL, VT, N0, Inner);
+ }
+
+ // Fold into Mad64 if the right-hand side is a MUL. Analogous to a fold in
+ // performAddCombine.
+ if (N1.getOpcode() == ISD::MUL) {
+ if (Subtarget->hasMad64_32()) {
+ if (SDValue Folded = tryFoldToMad64_32(N, DCI))
+ return Folded;
+ }
+ }
- SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, Flags);
+ // If the 32 low bits of the constant are all zero, there is nothing to fold
+ // into an immediate offset, so it's better to eliminate the unnecessary
+ // addition for the lower 32 bits than to preserve the PTRADD.
+ // Analogous to a fold in performAddCombine.
+ if (VT == MVT::i64) {
+ if (SDValue Folded = foldAddSub64WithZeroLowBitsTo32(N, DCI))
+ return Folded;
+ }
+
+ if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) {
+ // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
+ // global address GA and constant c, such that c can be folded into GA.
+ SDValue GAValue = N0.getOperand(0);
+ if (const GlobalAddressSDNode *GA =
+ dyn_cast<GlobalAddressSDNode>(GAValue)) {
+ if (DCI.isBeforeLegalizeOps() && isOffsetFoldingLegal(GA)) {
+ // If both additions in the original were NUW, reassociation preserves
+ // that.
+ SDNodeFlags Flags =
+ (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
+ SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
DCI.AddToWorklist(Inner.getNode());
- return DAG.getMemBasePlusOffset(Inner, Z, DL, Flags);
+ return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
}
}
}
+ if (N1.getOpcode() != ISD::ADD || !N1.hasOneUse())
+ return SDValue();
+
+ // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
+ // y is not, and (add y, z) is used only once.
+ // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
+ // z is not, and (add y, z) is used only once.
+ // The goal is to move constant offsets to the outermost ptradd, to create
+ // more opportunities to fold offsets into memory instructions.
+ // Together with the generic combines in DAGCombiner.cpp, this also
+ // implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
+ //
+ // This transform is here instead of in the general DAGCombiner as it can
+ // turn in-bounds pointer arithmetic out-of-bounds, which is problematic for
+ // AArch64's CPA.
+ SDValue X = N0;
+ SDValue Y = N1.getOperand(0);
+ SDValue Z = N1.getOperand(1);
+ bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
+ bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
+
+ // If both additions in the original were NUW, reassociation preserves that.
+ SDNodeFlags ReassocFlags =
+ (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
+
+ if (ZIsConstant != YIsConstant) {
+ if (YIsConstant)
+ std::swap(Y, Z);
+ SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
+ DCI.AddToWorklist(Inner.getNode());
+ return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
+ }
+
+ // If one of Y and Z is constant, they have been handled above. If both were
+ // constant, the addition would have been folded in SelectionDAG::getNode
+ // already. This ensures that the generic DAG combines won't undo the
+ // following reassociation.
+ assert(!YIsConstant && !ZIsConstant);
+
+ if (!X->isDivergent() && Y->isDivergent() != Z->isDivergent()) {
+ // Reassociate (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if x and
+ // y are uniform and z isn't.
+ // Reassociate (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if x and
+ // z are uniform and y isn't.
+ // The goal is to push uniform operands up in the computation, so that they
+ // can be handled with scalar operations. We can't use reassociateScalarOps
+ // for this since it requires two identical commutative operations to
+ // reassociate.
+ if (Y->isDivergent())
+ std::swap(Y, Z);
+ SDValue UniformInner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
+ DCI.AddToWorklist(UniformInner.getNode());
+ return DAG.getMemBasePlusOffset(UniformInner, Z, DL, ReassocFlags);
+ }
+
return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9da8a1c..c8935f0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -687,7 +687,8 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII,
if (!SafeToPropagate)
break;
- DefOp.setIsKill(false);
+ for (auto I = Def; I != MI; ++I)
+ I->clearRegisterKills(DefOp.getReg(), &RI);
}
MachineInstrBuilder Builder =
@@ -1625,41 +1626,6 @@ static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
}
}
-static unsigned getAGPRSpillSaveOpcode(unsigned Size) {
- switch (Size) {
- case 4:
- return AMDGPU::SI_SPILL_A32_SAVE;
- case 8:
- return AMDGPU::SI_SPILL_A64_SAVE;
- case 12:
- return AMDGPU::SI_SPILL_A96_SAVE;
- case 16:
- return AMDGPU::SI_SPILL_A128_SAVE;
- case 20:
- return AMDGPU::SI_SPILL_A160_SAVE;
- case 24:
- return AMDGPU::SI_SPILL_A192_SAVE;
- case 28:
- return AMDGPU::SI_SPILL_A224_SAVE;
- case 32:
- return AMDGPU::SI_SPILL_A256_SAVE;
- case 36:
- return AMDGPU::SI_SPILL_A288_SAVE;
- case 40:
- return AMDGPU::SI_SPILL_A320_SAVE;
- case 44:
- return AMDGPU::SI_SPILL_A352_SAVE;
- case 48:
- return AMDGPU::SI_SPILL_A384_SAVE;
- case 64:
- return AMDGPU::SI_SPILL_A512_SAVE;
- case 128:
- return AMDGPU::SI_SPILL_A1024_SAVE;
- default:
- llvm_unreachable("unknown register size");
- }
-}
-
static unsigned getAVSpillSaveOpcode(unsigned Size) {
switch (Size) {
case 4:
@@ -1707,22 +1673,20 @@ static unsigned getWWMRegSpillSaveOpcode(unsigned Size,
return AMDGPU::SI_SPILL_WWM_V32_SAVE;
}
-static unsigned getVectorRegSpillSaveOpcode(Register Reg,
- const TargetRegisterClass *RC,
- unsigned Size,
- const SIRegisterInfo &TRI,
- const SIMachineFunctionInfo &MFI) {
- bool IsVectorSuperClass = TRI.isVectorSuperClass(RC);
+unsigned SIInstrInfo::getVectorRegSpillSaveOpcode(
+ Register Reg, const TargetRegisterClass *RC, unsigned Size,
+ const SIMachineFunctionInfo &MFI) const {
+ bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
// Choose the right opcode if spilling a WWM register.
if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG))
return getWWMRegSpillSaveOpcode(Size, IsVectorSuperClass);
- if (IsVectorSuperClass)
+ // TODO: Check if AGPRs are available
+ if (ST.hasMAIInsts())
return getAVSpillSaveOpcode(Size);
- return TRI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(Size)
- : getVGPRSpillSaveOpcode(Size);
+ return getVGPRSpillSaveOpcode(Size);
}
void SIInstrInfo::storeRegToStackSlot(
@@ -1770,8 +1734,8 @@ void SIInstrInfo::storeRegToStackSlot(
return;
}
- unsigned Opcode = getVectorRegSpillSaveOpcode(VReg ? VReg : SrcReg, RC,
- SpillSize, RI, *MFI);
+ unsigned Opcode =
+ getVectorRegSpillSaveOpcode(VReg ? VReg : SrcReg, RC, SpillSize, *MFI);
MFI->setHasSpilledVGPRs();
BuildMI(MBB, MI, DL, get(Opcode))
@@ -1854,41 +1818,6 @@ static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
}
}
-static unsigned getAGPRSpillRestoreOpcode(unsigned Size) {
- switch (Size) {
- case 4:
- return AMDGPU::SI_SPILL_A32_RESTORE;
- case 8:
- return AMDGPU::SI_SPILL_A64_RESTORE;
- case 12:
- return AMDGPU::SI_SPILL_A96_RESTORE;
- case 16:
- return AMDGPU::SI_SPILL_A128_RESTORE;
- case 20:
- return AMDGPU::SI_SPILL_A160_RESTORE;
- case 24:
- return AMDGPU::SI_SPILL_A192_RESTORE;
- case 28:
- return AMDGPU::SI_SPILL_A224_RESTORE;
- case 32:
- return AMDGPU::SI_SPILL_A256_RESTORE;
- case 36:
- return AMDGPU::SI_SPILL_A288_RESTORE;
- case 40:
- return AMDGPU::SI_SPILL_A320_RESTORE;
- case 44:
- return AMDGPU::SI_SPILL_A352_RESTORE;
- case 48:
- return AMDGPU::SI_SPILL_A384_RESTORE;
- case 64:
- return AMDGPU::SI_SPILL_A512_RESTORE;
- case 128:
- return AMDGPU::SI_SPILL_A1024_RESTORE;
- default:
- llvm_unreachable("unknown register size");
- }
-}
-
static unsigned getAVSpillRestoreOpcode(unsigned Size) {
switch (Size) {
case 4:
@@ -1930,27 +1859,27 @@ static unsigned getWWMRegSpillRestoreOpcode(unsigned Size,
if (Size != 4)
llvm_unreachable("unknown wwm register spill size");
- if (IsVectorSuperClass)
+ if (IsVectorSuperClass) // TODO: Always use this if there are AGPRs
return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
}
-static unsigned
-getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC,
- unsigned Size, const SIRegisterInfo &TRI,
- const SIMachineFunctionInfo &MFI) {
- bool IsVectorSuperClass = TRI.isVectorSuperClass(RC);
+unsigned SIInstrInfo::getVectorRegSpillRestoreOpcode(
+ Register Reg, const TargetRegisterClass *RC, unsigned Size,
+ const SIMachineFunctionInfo &MFI) const {
+ bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
// Choose the right opcode if restoring a WWM register.
if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG))
return getWWMRegSpillRestoreOpcode(Size, IsVectorSuperClass);
- if (IsVectorSuperClass)
+ // TODO: Check if AGPRs are available
+ if (ST.hasMAIInsts())
return getAVSpillRestoreOpcode(Size);
- return TRI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(Size)
- : getVGPRSpillRestoreOpcode(Size);
+ assert(!RI.isAGPRClass(RC));
+ return getVGPRSpillRestoreOpcode(Size);
}
void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
@@ -1998,7 +1927,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
}
unsigned Opcode = getVectorRegSpillRestoreOpcode(VReg ? VReg : DestReg, RC,
- SpillSize, RI, *MFI);
+ SpillSize, *MFI);
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
.addFrameIndex(FrameIndex) // vaddr
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 3a48e65..5e92921 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -33,6 +33,7 @@ class LiveVariables;
class MachineDominatorTree;
class MachineRegisterInfo;
class RegScavenger;
+class SIMachineFunctionInfo;
class TargetRegisterClass;
class ScheduleHazardRecognizer;
@@ -287,6 +288,15 @@ public:
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg,
int64_t &ImmVal) const override;
+ unsigned getVectorRegSpillSaveOpcode(Register Reg,
+ const TargetRegisterClass *RC,
+ unsigned Size,
+ const SIMachineFunctionInfo &MFI) const;
+ unsigned
+ getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC,
+ unsigned Size,
+ const SIMachineFunctionInfo &MFI) const;
+
void storeRegToStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
bool isKill, int FrameIndex, const TargetRegisterClass *RC,
@@ -1103,7 +1113,6 @@ public:
// that will not require an additional 4-bytes; this function assumes that it
// will.
bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const {
- assert(!MO.isReg() && "isInlineConstant called on register operand!");
if (!MO.isImm())
return false;
return isInlineConstant(MO.getImm(), OperandType);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 9173041..fa2b8db 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -4052,11 +4052,11 @@ SIRegisterInfo::getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
return 0;
}
-unsigned
-SIRegisterInfo::getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
- const TargetRegisterClass &RC) const {
+unsigned SIRegisterInfo::getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
+ const TargetRegisterClass &RC,
+ bool IncludeCalls) const {
for (MCPhysReg Reg : reverse(RC.getRegisters()))
- if (MRI.isPhysRegUsed(Reg))
+ if (MRI.isPhysRegUsed(Reg, /*SkipRegMaskTest=*/!IncludeCalls))
return getHWRegIndex(Reg) + 1;
return 0;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 06a7a17..0008e5f 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -486,9 +486,11 @@ public:
unsigned SubReg) const;
// \returns a number of registers of a given \p RC used in a function.
- // Does not go inside function calls.
+ // Does not go inside function calls. If \p IncludeCalls is true, it will
+ // include registers that may be clobbered by calls.
unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
- const TargetRegisterClass &RC) const;
+ const TargetRegisterClass &RC,
+ bool IncludeCalls = true) const;
std::optional<uint8_t> getVRegFlagValue(StringRef Name) const override {
return Name == "WWM_REG" ? AMDGPU::VirtRegFlag::WWM_REG
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 8c35fea..1bbbb61 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -530,6 +530,10 @@ defm V_EXP_F16 : VOP1Inst_t16 <"v_exp_f16", VOP_F16_F16, AMDGPUexpf16>;
defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
+let SubtargetPredicate = HasTanhInsts in {
+defm V_TANH_F16 : VOP1Inst_t16 <"v_tanh_f16", VOP_F16_F16, int_amdgcn_tanh>;
+}
+
let SubtargetPredicate = HasBF16TransInsts in {
defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>;
defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>;
@@ -1142,6 +1146,7 @@ defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>;
defm V_MOV_B64 : VOP1_Real_FULL <GFX1250Gen, 0x1d>;
defm V_TANH_F32 : VOP1_Real_FULL<GFX1250Gen, 0x01e>;
+defm V_TANH_F16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x01f>;
defm V_TANH_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x04a>;
defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
index ac5e7f3..1493bf4 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
@@ -158,7 +158,12 @@ void LoongArchFrameLowering::processFunctionBeforeFrameFinalized(
// estimateStackSize has been observed to under-estimate the final stack
// size, so give ourselves wiggle-room by checking for stack size
// representable an 11-bit signed field rather than 12-bits.
- if (!isInt<11>(MFI.estimateStackSize(MF)))
+ // For [x]vstelm.{b/h/w/d} memory instructions with 8 imm offset, 7-bit
+ // signed field is fine.
+ unsigned EstimateStackSize = MFI.estimateStackSize(MF);
+ if (!isInt<11>(EstimateStackSize) ||
+ (MF.getSubtarget<LoongArchSubtarget>().hasExtLSX() &&
+ !isInt<7>(EstimateStackSize)))
ScavSlotsNum = std::max(ScavSlotsNum, 1u);
// For CFR spill.
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 6c8e3da..23b4554 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -95,6 +95,11 @@ static const std::pair<MCPhysReg, int8_t> FixedCSRFIQCIInterruptMap[] = {
/* -21, -22, -23, -24 are reserved */
};
+/// Returns true if DWARF CFI instructions ("frame moves") should be emitted.
+static bool needsDwarfCFI(const MachineFunction &MF) {
+ return MF.needsFrameMoves();
+}
+
// For now we use x3, a.k.a gp, as pointer to shadow call stack.
// User should not use x3 in their asm.
static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -141,6 +146,9 @@ static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
.addImm(-SlotSize)
.setMIFlag(MachineInstr::FrameSetup);
+ if (!needsDwarfCFI(MF))
+ return;
+
// Emit a CFI instruction that causes SlotSize to be subtracted from the value
// of the shadow stack pointer when unwinding past this frame.
char DwarfSCSReg = TRI->getDwarfRegNum(SCSPReg, /*IsEH*/ true);
@@ -199,8 +207,10 @@ static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB,
.addReg(SCSPReg)
.addImm(-SlotSize)
.setMIFlag(MachineInstr::FrameDestroy);
- // Restore the SCS pointer
- CFIInstBuilder(MBB, MI, MachineInstr::FrameDestroy).buildRestore(SCSPReg);
+ if (needsDwarfCFI(MF)) {
+ // Restore the SCS pointer
+ CFIInstBuilder(MBB, MI, MachineInstr::FrameDestroy).buildRestore(SCSPReg);
+ }
}
// Insert instruction to swap mscratchsw with sp
@@ -935,6 +945,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
MBBI = std::prev(MBBI, getRVVCalleeSavedInfo(MF, CSI).size() +
getUnmanagedCSI(MF, CSI).size());
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
+ bool NeedsDwarfCFI = needsDwarfCFI(MF);
// If libcalls are used to spill and restore callee-saved registers, the frame
// has two sections; the opaque section managed by the libcalls, and the
@@ -962,10 +973,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
alignTo((STI.getXLen() / 8) * LibCallRegs, getStackAlign());
RVFI->setLibCallStackSize(LibCallFrameSize);
- CFIBuilder.buildDefCFAOffset(LibCallFrameSize);
- for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
- CFIBuilder.buildOffset(CS.getReg(),
- MFI.getObjectOffset(CS.getFrameIdx()));
+ if (NeedsDwarfCFI) {
+ CFIBuilder.buildDefCFAOffset(LibCallFrameSize);
+ for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
+ CFIBuilder.buildOffset(CS.getReg(),
+ MFI.getObjectOffset(CS.getFrameIdx()));
+ }
}
// FIXME (note copied from Lanai): This appears to be overallocating. Needs
@@ -996,14 +1009,17 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// could only be the next instruction.
++PossiblePush;
- // Insert the CFI metadata before where we think the `(QC.)CM.PUSH(FP)`
- // could be. The PUSH will also get its own CFI metadata for its own
- // modifications, which should come after the PUSH.
- CFIInstBuilder PushCFIBuilder(MBB, PossiblePush, MachineInstr::FrameSetup);
- PushCFIBuilder.buildDefCFAOffset(QCIInterruptPushAmount);
- for (const CalleeSavedInfo &CS : getQCISavedInfo(MF, CSI))
- PushCFIBuilder.buildOffset(CS.getReg(),
- MFI.getObjectOffset(CS.getFrameIdx()));
+ if (NeedsDwarfCFI) {
+ // Insert the CFI metadata before where we think the `(QC.)CM.PUSH(FP)`
+ // could be. The PUSH will also get its own CFI metadata for its own
+ // modifications, which should come after the PUSH.
+ CFIInstBuilder PushCFIBuilder(MBB, PossiblePush,
+ MachineInstr::FrameSetup);
+ PushCFIBuilder.buildDefCFAOffset(QCIInterruptPushAmount);
+ for (const CalleeSavedInfo &CS : getQCISavedInfo(MF, CSI))
+ PushCFIBuilder.buildOffset(CS.getReg(),
+ MFI.getObjectOffset(CS.getFrameIdx()));
+ }
}
if (RVFI->isPushable(MF) && PossiblePush != MBB.end() &&
@@ -1017,10 +1033,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
PossiblePush->getOperand(1).setImm(StackAdj);
StackSize -= StackAdj;
- CFIBuilder.buildDefCFAOffset(RealStackSize - StackSize);
- for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
- CFIBuilder.buildOffset(CS.getReg(),
- MFI.getObjectOffset(CS.getFrameIdx()));
+ if (NeedsDwarfCFI) {
+ CFIBuilder.buildDefCFAOffset(RealStackSize - StackSize);
+ for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
+ CFIBuilder.buildOffset(CS.getReg(),
+ MFI.getObjectOffset(CS.getFrameIdx()));
+ }
}
// Allocate space on the stack if necessary.
@@ -1031,7 +1049,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
bool DynAllocation =
MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
if (StackSize != 0)
- allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, /*EmitCFI=*/true,
+ allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, NeedsDwarfCFI,
NeedProbe, ProbeSize, DynAllocation,
MachineInstr::FrameSetup);
@@ -1049,8 +1067,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// Iterate over list of callee-saved registers and emit .cfi_offset
// directives.
- for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
- CFIBuilder.buildOffset(CS.getReg(), MFI.getObjectOffset(CS.getFrameIdx()));
+ if (NeedsDwarfCFI)
+ for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
+ CFIBuilder.buildOffset(CS.getReg(),
+ MFI.getObjectOffset(CS.getFrameIdx()));
// Generate new FP.
if (hasFP(MF)) {
@@ -1069,7 +1089,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
MachineInstr::FrameSetup, getStackAlign());
}
- CFIBuilder.buildDefCFA(FPReg, RVFI->getVarArgsSaveSize());
+ if (NeedsDwarfCFI)
+ CFIBuilder.buildDefCFA(FPReg, RVFI->getVarArgsSaveSize());
}
uint64_t SecondSPAdjustAmount = 0;
@@ -1080,15 +1101,16 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
"SecondSPAdjustAmount should be greater than zero");
allocateStack(MBB, MBBI, MF, SecondSPAdjustAmount,
- getStackSizeWithRVVPadding(MF), !hasFP(MF), NeedProbe,
- ProbeSize, DynAllocation, MachineInstr::FrameSetup);
+ getStackSizeWithRVVPadding(MF), NeedsDwarfCFI && !hasFP(MF),
+ NeedProbe, ProbeSize, DynAllocation,
+ MachineInstr::FrameSetup);
}
if (RVVStackSize) {
if (NeedProbe) {
allocateAndProbeStackForRVV(MF, MBB, MBBI, DL, RVVStackSize,
- MachineInstr::FrameSetup, !hasFP(MF),
- DynAllocation);
+ MachineInstr::FrameSetup,
+ NeedsDwarfCFI && !hasFP(MF), DynAllocation);
} else {
// We must keep the stack pointer aligned through any intermediate
// updates.
@@ -1097,14 +1119,15 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
MachineInstr::FrameSetup, getStackAlign());
}
- if (!hasFP(MF)) {
+ if (NeedsDwarfCFI && !hasFP(MF)) {
// Emit .cfi_def_cfa_expression "sp + StackSize + RVVStackSize * vlenb".
CFIBuilder.insertCFIInst(createDefCFAExpression(
*RI, SPReg, getStackSizeWithRVVPadding(MF), RVVStackSize / 8));
}
std::advance(MBBI, getRVVCalleeSavedInfo(MF, CSI).size());
- emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF));
+ if (NeedsDwarfCFI)
+ emitCalleeSavedRVVPrologCFI(MBB, MBBI, hasFP(MF));
}
if (hasFP(MF)) {
@@ -1171,8 +1194,9 @@ void RISCVFrameLowering::deallocateStack(MachineFunction &MF,
MachineInstr::FrameDestroy, getStackAlign());
StackSize = 0;
- CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
- .buildDefCFAOffset(CFAOffset);
+ if (needsDwarfCFI(MF))
+ CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
+ .buildDefCFAOffset(CFAOffset);
}
void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
@@ -1212,6 +1236,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
std::next(MBBI, getRVVCalleeSavedInfo(MF, CSI).size());
CFIInstBuilder CFIBuilder(MBB, FirstScalarCSRRestoreInsn,
MachineInstr::FrameDestroy);
+ bool NeedsDwarfCFI = needsDwarfCFI(MF);
uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
uint64_t RealStackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount
@@ -1232,10 +1257,11 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
StackOffset::getScalable(RVVStackSize),
MachineInstr::FrameDestroy, getStackAlign());
- if (!hasFP(MF))
- CFIBuilder.buildDefCFA(SPReg, RealStackSize);
-
- emitCalleeSavedRVVEpilogCFI(MBB, FirstScalarCSRRestoreInsn);
+ if (NeedsDwarfCFI) {
+ if (!hasFP(MF))
+ CFIBuilder.buildDefCFA(SPReg, RealStackSize);
+ emitCalleeSavedRVVEpilogCFI(MBB, FirstScalarCSRRestoreInsn);
+ }
}
if (FirstSPAdjustAmount) {
@@ -1251,7 +1277,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
StackOffset::getFixed(SecondSPAdjustAmount),
MachineInstr::FrameDestroy, getStackAlign());
- if (!hasFP(MF))
+ if (NeedsDwarfCFI && !hasFP(MF))
CFIBuilder.buildDefCFAOffset(FirstSPAdjustAmount);
}
@@ -1272,7 +1298,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
getStackAlign());
}
- if (hasFP(MF))
+ if (NeedsDwarfCFI && hasFP(MF))
CFIBuilder.buildDefCFA(SPReg, RealStackSize);
// Skip to after the restores of scalar callee-saved registers
@@ -1295,8 +1321,9 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
}
// Recover callee-saved registers.
- for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
- CFIBuilder.buildRestore(CS.getReg());
+ if (NeedsDwarfCFI)
+ for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
+ CFIBuilder.buildRestore(CS.getReg());
if (RVFI->isPushable(MF) && MBBI != MBB.end() && isPop(MBBI->getOpcode())) {
// Use available stack adjustment in pop instruction to deallocate stack
@@ -1315,15 +1342,17 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
auto NextI = next_nodbg(MBBI, MBB.end());
if (NextI == MBB.end() || NextI->getOpcode() != RISCV::PseudoRET) {
++MBBI;
- CFIBuilder.setInsertPoint(MBBI);
+ if (NeedsDwarfCFI) {
+ CFIBuilder.setInsertPoint(MBBI);
- for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
- CFIBuilder.buildRestore(CS.getReg());
+ for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
+ CFIBuilder.buildRestore(CS.getReg());
- // Update CFA Offset. If this is a QCI interrupt function, there will be a
- // leftover offset which is deallocated by `QC.C.MILEAVERET`, otherwise
- // getQCIInterruptStackSize() will be 0.
- CFIBuilder.buildDefCFAOffset(RVFI->getQCIInterruptStackSize());
+ // Update CFA Offset. If this is a QCI interrupt function, there will
+ // be a leftover offset which is deallocated by `QC.C.MILEAVERET`,
+ // otherwise getQCIInterruptStackSize() will be 0.
+ CFIBuilder.buildDefCFAOffset(RVFI->getQCIInterruptStackSize());
+ }
}
}
@@ -1812,7 +1841,8 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
// allocateStack.
bool DynAllocation =
MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
- allocateStack(MBB, MI, MF, -Amount, -Amount, !hasFP(MF),
+ allocateStack(MBB, MI, MF, -Amount, -Amount,
+ needsDwarfCFI(MF) && !hasFP(MF),
/*NeedProbe=*/true, ProbeSize, DynAllocation,
MachineInstr::NoFlags);
} else {
diff --git a/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp b/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
index c1f4d19..3bd2705 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp
@@ -10,6 +10,10 @@
// instructions and masked instructions, so that we can reduce the live range
// overlaps of mask registers.
//
+// If there are multiple masks producers followed by multiple masked
+// instructions, then at each masked instructions add dependency edges between
+// every producer and masked instruction.
+//
// The reason why we need to do this:
// 1. When tracking register pressure, we don't track physical registers.
// 2. We have a RegisterClass for mask register (which is `VMV0`), but we don't
@@ -67,11 +71,27 @@ public:
void apply(ScheduleDAGInstrs *DAG) override {
SUnit *NearestUseV0SU = nullptr;
+ SmallVector<SUnit *, 2> DefMask;
for (SUnit &SU : DAG->SUnits) {
const MachineInstr *MI = SU.getInstr();
- if (MI->findRegisterUseOperand(RISCV::V0, TRI))
+ bool UseV0 = MI->findRegisterUseOperand(RISCV::V0, TRI);
+ if (isSoleUseCopyToV0(SU) && !UseV0)
+ DefMask.push_back(&SU);
+
+ if (UseV0) {
NearestUseV0SU = &SU;
+ // Copy may not be a real use, so skip it here.
+ if (DefMask.size() > 1 && !MI->isCopy()) {
+ for (SUnit *Def : DefMask)
+ if (DAG->canAddEdge(Def, &SU))
+ DAG->addEdge(Def, SDep(&SU, SDep::Artificial));
+ }
+
+ if (!DefMask.empty())
+ DefMask.erase(DefMask.begin());
+ }
+
if (NearestUseV0SU && NearestUseV0SU != &SU && isSoleUseCopyToV0(SU) &&
// For LMUL=8 cases, there will be more possibilities to spill.
// FIXME: We should use RegPressureTracker to do fine-grained
diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp
index 82e8ce4e..5d5a705 100644
--- a/llvm/lib/Target/X86/X86CallingConv.cpp
+++ b/llvm/lib/Target/X86/X86CallingConv.cpp
@@ -389,7 +389,7 @@ static bool CC_X86_32_I128_FP128(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
if (!ArgFlags.isInConsecutiveRegsLast())
return true;
- assert(PendingMembers.size() == 4 && "Should have two parts");
+ assert(PendingMembers.size() == 4 && "Should have four parts");
int64_t Offset = State.AllocateStack(16, Align(16));
PendingMembers[0].convertToMem(Offset);
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 57fbc71..9cd35e3 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -176,10 +176,10 @@ constexpr FeatureBitset FeaturesArrowlakeS =
FeaturesArrowlake | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 |
FeatureSM4;
constexpr FeatureBitset FeaturesPantherlake =
- FeaturesArrowlakeS ^ FeatureWIDEKL | FeaturePREFETCHI;
+ (FeaturesArrowlakeS ^ FeatureWIDEKL) | FeaturePREFETCHI;
constexpr FeatureBitset FeaturesClearwaterforest =
- FeaturesSierraforest ^ FeatureWIDEKL | FeatureAVXVNNIINT16 | FeatureSHA512 |
- FeatureSM3 | FeatureSM4 | FeaturePREFETCHI | FeatureUSERMSR;
+ (FeaturesSierraforest ^ FeatureWIDEKL) | FeatureAVXVNNIINT16 |
+ FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeaturePREFETCHI | FeatureUSERMSR;
// Geode Processor.
constexpr FeatureBitset FeaturesGeode =
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index fbeb721..a65d0fb 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -1103,14 +1103,13 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
SpillAlignment, E.first->getName() + Twine(".reload"));
- TinyPtrVector<DbgDeclareInst *> DIs = findDbgDeclares(Def);
TinyPtrVector<DbgVariableRecord *> DVRs = findDVRDeclares(Def);
// Try best to find dbg.declare. If the spill is a temp, there may not
// be a direct dbg.declare. Walk up the load chain to find one from an
// alias.
if (F->getSubprogram()) {
auto *CurDef = Def;
- while (DIs.empty() && DVRs.empty() && isa<LoadInst>(CurDef)) {
+ while (DVRs.empty() && isa<LoadInst>(CurDef)) {
auto *LdInst = cast<LoadInst>(CurDef);
// Only consider ptr to ptr same type load.
if (LdInst->getPointerOperandType() != LdInst->getType())
@@ -1118,12 +1117,11 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
CurDef = LdInst->getPointerOperand();
if (!isa<AllocaInst, LoadInst>(CurDef))
break;
- DIs = findDbgDeclares(CurDef);
DVRs = findDVRDeclares(CurDef);
}
}
- auto SalvageOne = [&](auto *DDI) {
+ auto SalvageOne = [&](DbgVariableRecord *DDI) {
// This dbg.declare is preserved for all coro-split function
// fragments. It will be unreachable in the main function, and
// processed by coro::salvageDebugInfo() by the Cloner.
@@ -1137,7 +1135,6 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
// will be deleted in all coro-split functions.
coro::salvageDebugInfo(ArgToAllocaMap, *DDI, false /*UseEntryValue*/);
};
- for_each(DIs, SalvageOne);
for_each(DVRs, SalvageOne);
}
@@ -1225,8 +1222,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
SmallVector<DbgVariableIntrinsic *, 4> DIs;
SmallVector<DbgVariableRecord *> DbgVariableRecords;
findDbgUsers(DIs, Alloca, &DbgVariableRecords);
- for (auto *DVI : DIs)
- DVI->replaceUsesOfWith(Alloca, G);
+ assert(DIs.empty() && "Should never see debug-intrinsics");
for (auto *DVR : DbgVariableRecords)
DVR->replaceVariableLocationOp(Alloca, G);
@@ -1922,48 +1918,6 @@ salvageDebugInfoImpl(SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
void coro::salvageDebugInfo(
SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
- DbgVariableIntrinsic &DVI, bool UseEntryValue) {
-
- Function *F = DVI.getFunction();
- // Follow the pointer arithmetic all the way to the incoming
- // function argument and convert into a DIExpression.
- bool SkipOutermostLoad = !isa<DbgValueInst>(DVI);
- Value *OriginalStorage = DVI.getVariableLocationOp(0);
-
- auto SalvagedInfo =
- ::salvageDebugInfoImpl(ArgToAllocaMap, UseEntryValue, F, OriginalStorage,
- DVI.getExpression(), SkipOutermostLoad);
- if (!SalvagedInfo)
- return;
-
- Value *Storage = &SalvagedInfo->first;
- DIExpression *Expr = &SalvagedInfo->second;
-
- DVI.replaceVariableLocationOp(OriginalStorage, Storage);
- DVI.setExpression(Expr);
- // We only hoist dbg.declare today since it doesn't make sense to hoist
- // dbg.value since it does not have the same function wide guarantees that
- // dbg.declare does.
- if (isa<DbgDeclareInst>(DVI)) {
- std::optional<BasicBlock::iterator> InsertPt;
- if (auto *I = dyn_cast<Instruction>(Storage)) {
- InsertPt = I->getInsertionPointAfterDef();
- // Update DILocation only if variable was not inlined.
- DebugLoc ILoc = I->getDebugLoc();
- DebugLoc DVILoc = DVI.getDebugLoc();
- if (ILoc && DVILoc &&
- DVILoc->getScope()->getSubprogram() ==
- ILoc->getScope()->getSubprogram())
- DVI.setDebugLoc(I->getDebugLoc());
- } else if (isa<Argument>(Storage))
- InsertPt = F->getEntryBlock().begin();
- if (InsertPt)
- DVI.moveBefore(*(*InsertPt)->getParent(), *InsertPt);
- }
-}
-
-void coro::salvageDebugInfo(
- SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
DbgVariableRecord &DVR, bool UseEntryValue) {
Function *F = DVR.getFunction();
diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index b53c5a4..52f4ffe 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -34,16 +34,13 @@ void suppressCoroAllocs(CoroIdInst *CoroId);
void suppressCoroAllocs(LLVMContext &Context,
ArrayRef<CoroAllocInst *> CoroAllocs);
-/// Attempts to rewrite the location operand of debug intrinsics in terms of
+/// Attempts to rewrite the location operand of debug records in terms of
/// the coroutine frame pointer, folding pointer offsets into the DIExpression
/// of the intrinsic.
/// If the frame pointer is an Argument, store it into an alloca to enhance the
/// debugability.
void salvageDebugInfo(
SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
- DbgVariableIntrinsic &DVI, bool IsEntryPoint);
-void salvageDebugInfo(
- SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
DbgVariableRecord &DVR, bool UseEntryValue);
// Keeps data and helper functions for lowering coroutine intrinsics.
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 5a8a41f..64b33e4 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -618,19 +618,15 @@ static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
}
}
-/// Returns all DbgVariableIntrinsic in F.
-static std::pair<SmallVector<DbgVariableIntrinsic *, 8>,
- SmallVector<DbgVariableRecord *>>
-collectDbgVariableIntrinsics(Function &F) {
- SmallVector<DbgVariableIntrinsic *, 8> Intrinsics;
+/// Returns all debug records in F.
+static SmallVector<DbgVariableRecord *>
+collectDbgVariableRecords(Function &F) {
SmallVector<DbgVariableRecord *> DbgVariableRecords;
for (auto &I : instructions(F)) {
for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
DbgVariableRecords.push_back(&DVR);
- if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
- Intrinsics.push_back(DVI);
}
- return {Intrinsics, DbgVariableRecords};
+ return DbgVariableRecords;
}
void coro::BaseCloner::replaceSwiftErrorOps() {
@@ -638,13 +634,11 @@ void coro::BaseCloner::replaceSwiftErrorOps() {
}
void coro::BaseCloner::salvageDebugInfo() {
- auto [Worklist, DbgVariableRecords] = collectDbgVariableIntrinsics(*NewF);
+ auto DbgVariableRecords = collectDbgVariableRecords(*NewF);
SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
// Only 64-bit ABIs have a register we can refer to with the entry value.
bool UseEntryValue = OrigF.getParent()->getTargetTriple().isArch64Bit();
- for (DbgVariableIntrinsic *DVI : Worklist)
- coro::salvageDebugInfo(ArgToAllocaMap, *DVI, UseEntryValue);
for (DbgVariableRecord *DVR : DbgVariableRecords)
coro::salvageDebugInfo(ArgToAllocaMap, *DVR, UseEntryValue);
@@ -655,7 +649,7 @@ void coro::BaseCloner::salvageDebugInfo() {
return !isPotentiallyReachable(&NewF->getEntryBlock(), BB, nullptr,
&DomTree);
};
- auto RemoveOne = [&](auto *DVI) {
+ auto RemoveOne = [&](DbgVariableRecord *DVI) {
if (IsUnreachableBlock(DVI->getParent()))
DVI->eraseFromParent();
else if (isa_and_nonnull<AllocaInst>(DVI->getVariableLocationOp(0))) {
@@ -669,7 +663,6 @@ void coro::BaseCloner::salvageDebugInfo() {
DVI->eraseFromParent();
}
};
- for_each(Worklist, RemoveOne);
for_each(DbgVariableRecords, RemoveOne);
}
@@ -2022,9 +2015,7 @@ static void doSplitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
// original function. The Cloner has already salvaged debug info in the new
// coroutine funclets.
SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
- auto [DbgInsts, DbgVariableRecords] = collectDbgVariableIntrinsics(F);
- for (auto *DDI : DbgInsts)
- coro::salvageDebugInfo(ArgToAllocaMap, *DDI, false /*UseEntryValue*/);
+ auto DbgVariableRecords = collectDbgVariableRecords(F);
for (DbgVariableRecord *DVR : DbgVariableRecords)
coro::salvageDebugInfo(ArgToAllocaMap, *DVR, false /*UseEntryValue*/);
diff --git a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp
index 8017db1c..5fd5f7d 100644
--- a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp
+++ b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp
@@ -514,7 +514,7 @@ void collectSpillsAndAllocasFromInsts(
void collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F,
const SuspendCrossingInfo &Checker) {
// We don't want the layout of coroutine frame to be affected
- // by debug information. So we only choose to salvage DbgValueInst for
+ // by debug information. So we only choose to salvage dbg.values for
// whose value is already in the frame.
// We would handle the dbg.values for allocas specially
for (auto &Iter : Spills) {
@@ -522,9 +522,7 @@ void collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F,
SmallVector<DbgValueInst *, 16> DVIs;
SmallVector<DbgVariableRecord *, 16> DVRs;
findDbgValues(DVIs, V, &DVRs);
- for (DbgValueInst *DVI : DVIs)
- if (Checker.isDefinitionAcrossSuspend(*V, DVI))
- Spills[V].push_back(DVI);
+ assert(DVIs.empty());
// Add the instructions which carry debug info that is in the frame.
for (DbgVariableRecord *DVR : DVRs)
if (Checker.isDefinitionAcrossSuspend(*V, DVR->Marker->MarkedInstr))
diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index d4555e9..f5525de 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -572,7 +572,7 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
// Work out whether a dbg.value intrinsic or an equivalent DbgVariableRecord
// is a parameter to be preserved.
- auto ExamineDbgValue = [](auto *DbgVal, auto &Container) {
+ auto ExamineDbgValue = [&PDVRRelated](DbgVariableRecord *DbgVal) {
LLVM_DEBUG(dbgs() << " Deciding: ");
LLVM_DEBUG(DbgVal->print(dbgs()));
LLVM_DEBUG(dbgs() << "\n");
@@ -581,7 +581,7 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
LLVM_DEBUG(dbgs() << " Include (parameter): ");
LLVM_DEBUG(DbgVal->print(dbgs()));
LLVM_DEBUG(dbgs() << "\n");
- Container.insert(DbgVal);
+ PDVRRelated.insert(DbgVal);
} else {
LLVM_DEBUG(dbgs() << " Delete (!parameter): ");
LLVM_DEBUG(DbgVal->print(dbgs()));
@@ -589,7 +589,8 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
}
};
- auto ExamineDbgDeclare = [&PDIRelated](auto *DbgDecl, auto &Container) {
+ auto ExamineDbgDeclare = [&PDIRelated,
+ &PDVRRelated](DbgVariableRecord *DbgDecl) {
LLVM_DEBUG(dbgs() << " Deciding: ");
LLVM_DEBUG(DbgDecl->print(dbgs()));
LLVM_DEBUG(dbgs() << "\n");
@@ -616,7 +617,7 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
LLVM_DEBUG(dbgs() << " Include: ");
LLVM_DEBUG(DbgDecl->print(dbgs()));
LLVM_DEBUG(dbgs() << "\n");
- Container.insert(DbgDecl);
+ PDVRRelated.insert(DbgDecl);
} else {
LLVM_DEBUG(dbgs() << " Delete (!parameter): ");
LLVM_DEBUG(SI->print(dbgs()));
@@ -647,18 +648,14 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
// they connected to parameters?
for (DbgVariableRecord &DVR : filterDbgVars(BI->getDbgRecordRange())) {
if (DVR.isDbgValue() || DVR.isDbgAssign()) {
- ExamineDbgValue(&DVR, PDVRRelated);
+ ExamineDbgValue(&DVR);
} else {
assert(DVR.isDbgDeclare());
- ExamineDbgDeclare(&DVR, PDVRRelated);
+ ExamineDbgDeclare(&DVR);
}
}
- if (auto *DVI = dyn_cast<DbgValueInst>(&*BI)) {
- ExamineDbgValue(DVI, PDIRelated);
- } else if (auto *DDI = dyn_cast<DbgDeclareInst>(&*BI)) {
- ExamineDbgDeclare(DDI, PDIRelated);
- } else if (BI->isTerminator() && &*BI == GEntryBlock->getTerminator()) {
+ if (BI->isTerminator() && &*BI == GEntryBlock->getTerminator()) {
LLVM_DEBUG(dbgs() << " Will Include Terminator: ");
LLVM_DEBUG(BI->print(dbgs()));
LLVM_DEBUG(dbgs() << "\n");
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 9d7c025..f7fbf08 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -825,9 +825,6 @@ public:
Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned);
bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock);
- void tryToSinkInstructionDbgValues(
- Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
- BasicBlock *DestBlock, SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers);
void tryToSinkInstructionDbgVariableRecords(
Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
BasicBlock *DestBlock, SmallVectorImpl<DbgVariableRecord *> &DPUsers);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 684b9a1..503611a 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1420,21 +1420,16 @@ void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) {
SmallVector<DbgValueInst *, 4> DbgValues;
SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
llvm::findDbgValues(DbgValues, I, &DbgVariableRecords);
+ assert(DbgValues.empty());
- auto InvertDbgValueUse = [&](auto *DbgVal) {
+ for (DbgVariableRecord *DbgVal : DbgVariableRecords) {
SmallVector<uint64_t, 1> Ops = {dwarf::DW_OP_not};
for (unsigned Idx = 0, End = DbgVal->getNumVariableLocationOps();
Idx != End; ++Idx)
if (DbgVal->getVariableLocationOp(Idx) == I)
DbgVal->setExpression(
DIExpression::appendOpsToArg(DbgVal->getExpression(), Ops, Idx));
- };
-
- for (DbgValueInst *DVI : DbgValues)
- InvertDbgValueUse(DVI);
-
- for (DbgVariableRecord *DVR : DbgVariableRecords)
- InvertDbgValueUse(DVR);
+ }
}
/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
@@ -3575,6 +3570,7 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
std::unique_ptr<DIBuilder> DIB;
if (isa<AllocaInst>(MI)) {
findDbgUsers(DVIs, &MI, &DVRs);
+ assert(DVIs.empty());
DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
}
@@ -5253,8 +5249,7 @@ bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
SmallVector<DbgVariableIntrinsic *, 2> DbgUsers;
SmallVector<DbgVariableRecord *, 2> DbgVariableRecords;
findDbgUsers(DbgUsers, I, &DbgVariableRecords);
- if (!DbgUsers.empty())
- tryToSinkInstructionDbgValues(I, InsertPos, SrcBlock, DestBlock, DbgUsers);
+ assert(DbgUsers.empty());
if (!DbgVariableRecords.empty())
tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
DbgVariableRecords);
@@ -5271,71 +5266,12 @@ bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
return true;
}
-void InstCombinerImpl::tryToSinkInstructionDbgValues(
- Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
- BasicBlock *DestBlock, SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers) {
- // For all debug values in the destination block, the sunk instruction
- // will still be available, so they do not need to be dropped.
- SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSalvage;
- for (auto &DbgUser : DbgUsers)
- if (DbgUser->getParent() != DestBlock)
- DbgUsersToSalvage.push_back(DbgUser);
-
- // Process the sinking DbgUsersToSalvage in reverse order, as we only want
- // to clone the last appearing debug intrinsic for each given variable.
- SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSink;
- for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage)
- if (DVI->getParent() == SrcBlock)
- DbgUsersToSink.push_back(DVI);
- llvm::sort(DbgUsersToSink,
- [](auto *A, auto *B) { return B->comesBefore(A); });
-
- SmallVector<DbgVariableIntrinsic *, 2> DIIClones;
- SmallSet<DebugVariable, 4> SunkVariables;
- for (auto *User : DbgUsersToSink) {
- // A dbg.declare instruction should not be cloned, since there can only be
- // one per variable fragment. It should be left in the original place
- // because the sunk instruction is not an alloca (otherwise we could not be
- // here).
- if (isa<DbgDeclareInst>(User))
- continue;
-
- DebugVariable DbgUserVariable =
- DebugVariable(User->getVariable(), User->getExpression(),
- User->getDebugLoc()->getInlinedAt());
-
- if (!SunkVariables.insert(DbgUserVariable).second)
- continue;
-
- // Leave dbg.assign intrinsics in their original positions and there should
- // be no need to insert a clone.
- if (isa<DbgAssignIntrinsic>(User))
- continue;
-
- DIIClones.emplace_back(cast<DbgVariableIntrinsic>(User->clone()));
- if (isa<DbgDeclareInst>(User) && isa<CastInst>(I))
- DIIClones.back()->replaceVariableLocationOp(I, I->getOperand(0));
- LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n');
- }
-
- // Perform salvaging without the clones, then sink the clones.
- if (!DIIClones.empty()) {
- salvageDebugInfoForDbgValues(*I, DbgUsersToSalvage, {});
- // The clones are in reverse order of original appearance, reverse again to
- // maintain the original order.
- for (auto &DIIClone : llvm::reverse(DIIClones)) {
- DIIClone->insertBefore(InsertPos);
- LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n');
- }
- }
-}
-
void InstCombinerImpl::tryToSinkInstructionDbgVariableRecords(
Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
BasicBlock *DestBlock,
SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
- // Implementation of tryToSinkInstructionDbgValues, but for the
- // DbgVariableRecord of variable assignments rather than dbg.values.
+ // For all debug values in the destination block, the sunk instruction
+ // will still be available, so they do not need to be dropped.
// Fetch all DbgVariableRecords not already in the destination.
SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage;
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 2786d81..df31602 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1489,6 +1489,7 @@ static bool checkAndReplaceCondition(
SmallVector<DbgVariableIntrinsic *> DbgUsers;
SmallVector<DbgVariableRecord *> DVRUsers;
findDbgUsers(DbgUsers, Cmp, &DVRUsers);
+ assert(DbgUsers.empty());
for (auto *DVR : DVRUsers) {
auto *DTN = DT.getNode(DVR->getParent());
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 85dd9a1..0f63ed0 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -2079,6 +2079,7 @@ struct DSEState {
AllocFnKind AllocKind =
Attrs.getFnAttr(Attribute::AllocKind).getAllocKind() |
AllocFnKind::Zeroed;
+ AllocKind &= ~AllocFnKind::Uninitialized;
Attrs =
Attrs.addFnAttribute(Ctx, Attribute::getWithAllocKind(Ctx, AllocKind))
.removeFnAttribute(Ctx, "alloc-variant-zeroed");
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 8bff458..f6bf09d 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -2367,11 +2367,14 @@ uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
// See if we can refine the value number by looking at the PN incoming value
// for the given predecessor.
if (PHINode *PN = NumberingPhi[Num]) {
- if (PN->getParent() == PhiBlock)
- for (unsigned I = 0; I != PN->getNumIncomingValues(); ++I)
- if (PN->getIncomingBlock(I) == Pred)
- if (uint32_t TransVal = lookup(PN->getIncomingValue(I), false))
- return TransVal;
+ if (PN->getParent() != PhiBlock)
+ return Num;
+ for (unsigned I = 0; I != PN->getNumIncomingValues(); ++I) {
+ if (PN->getIncomingBlock(I) != Pred)
+ continue;
+ if (uint32_t TransVal = lookup(PN->getIncomingValue(I), false))
+ return TransVal;
+ }
return Num;
}
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index b5dbef1..4d1f4407 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1979,15 +1979,13 @@ void JumpThreadingPass::updateSSA(BasicBlock *BB, BasicBlock *NewBB,
// Find debug values outside of the block
findDbgValues(DbgValues, &I, &DbgVariableRecords);
- llvm::erase_if(DbgValues, [&](const DbgValueInst *DbgVal) {
- return DbgVal->getParent() == BB;
- });
+ assert(DbgValues.empty());
llvm::erase_if(DbgVariableRecords, [&](const DbgVariableRecord *DbgVarRec) {
return DbgVarRec->getParent() == BB;
});
// If there are no uses outside the block, we're done with this instruction.
- if (UsesToRename.empty() && DbgValues.empty() && DbgVariableRecords.empty())
+ if (UsesToRename.empty() && DbgVariableRecords.empty())
continue;
LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
@@ -2000,8 +1998,7 @@ void JumpThreadingPass::updateSSA(BasicBlock *BB, BasicBlock *NewBB,
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
- if (!DbgValues.empty() || !DbgVariableRecords.empty()) {
- SSAUpdate.UpdateDebugValues(&I, DbgValues);
+ if (!DbgVariableRecords.empty()) {
SSAUpdate.UpdateDebugValues(&I, DbgVariableRecords);
DbgValues.clear();
DbgVariableRecords.clear();
@@ -2032,32 +2029,7 @@ void JumpThreadingPass::cloneInstructions(ValueToValueMapTy &ValueMapping,
// copy of the block 'NewBB'. If there are PHI nodes in the source basic
// block, evaluate them to account for entry from PredBB.
- // Retargets llvm.dbg.value to any renamed variables.
- auto RetargetDbgValueIfPossible = [&](Instruction *NewInst) -> bool {
- auto DbgInstruction = dyn_cast<DbgValueInst>(NewInst);
- if (!DbgInstruction)
- return false;
-
- SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
- for (auto DbgOperand : DbgInstruction->location_ops()) {
- auto DbgOperandInstruction = dyn_cast<Instruction>(DbgOperand);
- if (!DbgOperandInstruction)
- continue;
-
- auto I = ValueMapping.find(DbgOperandInstruction);
- if (I != ValueMapping.end()) {
- OperandsToRemap.insert(
- std::pair<Value *, Value *>(DbgOperand, I->second));
- }
- }
-
- for (auto &[OldOp, MappedOp] : OperandsToRemap)
- DbgInstruction->replaceVariableLocationOp(OldOp, MappedOp);
- return true;
- };
-
- // Duplicate implementation of the above dbg.value code, using
- // DbgVariableRecords instead.
+ // Retargets dbg.value to any renamed variables.
auto RetargetDbgVariableRecordIfPossible = [&](DbgVariableRecord *DVR) {
SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
for (auto *Op : DVR->location_ops()) {
@@ -2116,9 +2088,6 @@ void JumpThreadingPass::cloneInstructions(ValueToValueMapTy &ValueMapping,
if (const DebugLoc &DL = New->getDebugLoc())
mapAtomInstance(DL, ValueMapping);
- if (RetargetDbgValueIfPossible(New))
- continue;
-
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index a0f9f3c..70e9eee 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -78,6 +78,7 @@ enum class RuleTy {
PerLoopCacheAnalysis,
PerInstrOrderCost,
ForVectorization,
+ Ignore
};
} // end anonymous namespace
@@ -106,14 +107,20 @@ static cl::list<RuleTy> Profitabilities(
clEnumValN(RuleTy::PerInstrOrderCost, "instorder",
"Prioritize the IVs order of each instruction"),
clEnumValN(RuleTy::ForVectorization, "vectorize",
- "Prioritize vectorization")));
+ "Prioritize vectorization"),
+ clEnumValN(RuleTy::Ignore, "ignore",
+ "Ignore profitability, force interchange (does not "
+ "work with other options)")));
#ifndef NDEBUG
-static bool noDuplicateRules(ArrayRef<RuleTy> Rules) {
+static bool noDuplicateRulesAndIgnore(ArrayRef<RuleTy> Rules) {
SmallSet<RuleTy, 4> Set;
- for (RuleTy Rule : Rules)
+ for (RuleTy Rule : Rules) {
if (!Set.insert(Rule).second)
return false;
+ if (Rule == RuleTy::Ignore)
+ return false;
+ }
return true;
}
@@ -1357,6 +1364,13 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
bool LoopInterchangeProfitability::isProfitable(
const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
+
+ // Return true if interchange is forced and the cost-model ignored.
+ if (Profitabilities.size() == 1 && Profitabilities[0] == RuleTy::Ignore)
+ return true;
+ assert(noDuplicateRulesAndIgnore(Profitabilities) &&
+ "Duplicate rules and option 'ignore' are not allowed");
+
// isProfitable() is structured to avoid endless loop interchange. If the
// highest priority rule (isProfitablePerLoopCacheAnalysis by default) could
// decide the profitability then, profitability check will stop and return the
@@ -1365,7 +1379,6 @@ bool LoopInterchangeProfitability::isProfitable(
// second highest priority rule (isProfitablePerInstrOrderCost by default).
// Likewise, if it failed to analysis the profitability then only, the last
// rule (isProfitableForVectorization by default) will decide.
- assert(noDuplicateRules(Profitabilities) && "Detect duplicate rules");
std::optional<bool> shouldInterchange;
for (RuleTy RT : Profitabilities) {
switch (RT) {
@@ -1382,6 +1395,9 @@ bool LoopInterchangeProfitability::isProfitable(
shouldInterchange =
isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
break;
+ case RuleTy::Ignore:
+ llvm_unreachable("Option 'ignore' is not supported with other options");
+ break;
}
// If this rule could determine the profitability, don't call subsequent
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index dc8fa43..636bd81 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -6630,13 +6630,10 @@ struct SCEVDbgValueBuilder {
/// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs
/// and DIExpression.
struct DVIRecoveryRec {
- DVIRecoveryRec(DbgValueInst *DbgValue)
- : DbgRef(DbgValue), Expr(DbgValue->getExpression()),
- HadLocationArgList(false) {}
DVIRecoveryRec(DbgVariableRecord *DVR)
: DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(false) {}
- PointerUnion<DbgValueInst *, DbgVariableRecord *> DbgRef;
+ DbgVariableRecord *DbgRef;
DIExpression *Expr;
bool HadLocationArgList;
SmallVector<WeakVH, 2> LocationOps;
@@ -6695,44 +6692,38 @@ static void updateDVIWithLocations(T &DbgVal,
}
/// Write the new expression and new location ops for the dbg.value. If possible
-/// reduce the szie of the dbg.value intrinsic by omitting DIArglist. This
+/// reduce the szie of the dbg.value by omitting DIArglist. This
/// can be omitted if:
/// 1. There is only a single location, refenced by a single DW_OP_llvm_arg.
/// 2. The DW_OP_LLVM_arg is the first operand in the expression.
-static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec,
- SmallVectorImpl<Value *> &NewLocationOps,
- SmallVectorImpl<uint64_t> &NewExpr) {
- auto UpdateDbgValueInstImpl = [&](auto *DbgVal) {
- unsigned NumLLVMArgs = numLLVMArgOps(NewExpr);
- if (NumLLVMArgs == 0) {
- // Location assumed to be on the stack.
- updateDVIWithLocation(*DbgVal, NewLocationOps[0], NewExpr);
- } else if (NumLLVMArgs == 1 && NewExpr[0] == dwarf::DW_OP_LLVM_arg) {
- // There is only a single DW_OP_llvm_arg at the start of the expression,
- // so it can be omitted along with DIArglist.
- assert(NewExpr[1] == 0 &&
- "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
- llvm::SmallVector<uint64_t, 6> ShortenedOps(llvm::drop_begin(NewExpr, 2));
- updateDVIWithLocation(*DbgVal, NewLocationOps[0], ShortenedOps);
- } else {
- // Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary.
- updateDVIWithLocations(*DbgVal, NewLocationOps, NewExpr);
- }
+static void UpdateDbgValue(DVIRecoveryRec &DVIRec,
+ SmallVectorImpl<Value *> &NewLocationOps,
+ SmallVectorImpl<uint64_t> &NewExpr) {
+ DbgVariableRecord *DbgVal = DVIRec.DbgRef;
+ unsigned NumLLVMArgs = numLLVMArgOps(NewExpr);
+ if (NumLLVMArgs == 0) {
+ // Location assumed to be on the stack.
+ updateDVIWithLocation(*DbgVal, NewLocationOps[0], NewExpr);
+ } else if (NumLLVMArgs == 1 && NewExpr[0] == dwarf::DW_OP_LLVM_arg) {
+ // There is only a single DW_OP_llvm_arg at the start of the expression,
+ // so it can be omitted along with DIArglist.
+ assert(NewExpr[1] == 0 &&
+ "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
+ llvm::SmallVector<uint64_t, 6> ShortenedOps(llvm::drop_begin(NewExpr, 2));
+ updateDVIWithLocation(*DbgVal, NewLocationOps[0], ShortenedOps);
+ } else {
+ // Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary.
+ updateDVIWithLocations(*DbgVal, NewLocationOps, NewExpr);
+ }
- // If the DIExpression was previously empty then add the stack terminator.
- // Non-empty expressions have only had elements inserted into them and so
- // the terminator should already be present e.g. stack_value or fragment.
- DIExpression *SalvageExpr = DbgVal->getExpression();
- if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) {
- SalvageExpr =
- DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value});
- DbgVal->setExpression(SalvageExpr);
- }
- };
- if (isa<DbgValueInst *>(DVIRec.DbgRef))
- UpdateDbgValueInstImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
- else
- UpdateDbgValueInstImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
+ // If the DIExpression was previously empty then add the stack terminator.
+ // Non-empty expressions have only had elements inserted into them and so
+ // the terminator should already be present e.g. stack_value or fragment.
+ DIExpression *SalvageExpr = DbgVal->getExpression();
+ if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) {
+ SalvageExpr = DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value});
+ DbgVal->setExpression(SalvageExpr);
+ }
}
/// Cached location ops may be erased during LSR, in which case a poison is
@@ -6746,39 +6737,34 @@ static Value *getValueOrPoison(WeakVH &VH, LLVMContext &C) {
/// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
static void restorePreTransformState(DVIRecoveryRec &DVIRec) {
- auto RestorePreTransformStateImpl = [&](auto *DbgVal) {
- LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n"
- << "scev-salvage: post-LSR: " << *DbgVal << '\n');
- assert(DVIRec.Expr && "Expected an expression");
- DbgVal->setExpression(DVIRec.Expr);
-
- // Even a single location-op may be inside a DIArgList and referenced with
- // DW_OP_LLVM_arg, which is valid only with a DIArgList.
- if (!DVIRec.HadLocationArgList) {
- assert(DVIRec.LocationOps.size() == 1 &&
- "Unexpected number of location ops.");
- // LSR's unsuccessful salvage attempt may have added DIArgList, which in
- // this case was not present before, so force the location back to a
- // single uncontained Value.
- Value *CachedValue =
- getValueOrPoison(DVIRec.LocationOps[0], DbgVal->getContext());
- DbgVal->setRawLocation(ValueAsMetadata::get(CachedValue));
- } else {
- SmallVector<ValueAsMetadata *, 3> MetadataLocs;
- for (WeakVH VH : DVIRec.LocationOps) {
- Value *CachedValue = getValueOrPoison(VH, DbgVal->getContext());
- MetadataLocs.push_back(ValueAsMetadata::get(CachedValue));
- }
- auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
- DbgVal->setRawLocation(
- llvm::DIArgList::get(DbgVal->getContext(), ValArrayRef));
+ DbgVariableRecord *DbgVal = DVIRec.DbgRef;
+ LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n"
+ << "scev-salvage: post-LSR: " << *DbgVal << '\n');
+ assert(DVIRec.Expr && "Expected an expression");
+ DbgVal->setExpression(DVIRec.Expr);
+
+ // Even a single location-op may be inside a DIArgList and referenced with
+ // DW_OP_LLVM_arg, which is valid only with a DIArgList.
+ if (!DVIRec.HadLocationArgList) {
+ assert(DVIRec.LocationOps.size() == 1 &&
+ "Unexpected number of location ops.");
+ // LSR's unsuccessful salvage attempt may have added DIArgList, which in
+ // this case was not present before, so force the location back to a
+ // single uncontained Value.
+ Value *CachedValue =
+ getValueOrPoison(DVIRec.LocationOps[0], DbgVal->getContext());
+ DbgVal->setRawLocation(ValueAsMetadata::get(CachedValue));
+ } else {
+ SmallVector<ValueAsMetadata *, 3> MetadataLocs;
+ for (WeakVH VH : DVIRec.LocationOps) {
+ Value *CachedValue = getValueOrPoison(VH, DbgVal->getContext());
+ MetadataLocs.push_back(ValueAsMetadata::get(CachedValue));
}
- LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DbgVal << '\n');
- };
- if (isa<DbgValueInst *>(DVIRec.DbgRef))
- RestorePreTransformStateImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
- else
- RestorePreTransformStateImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
+ auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
+ DbgVal->setRawLocation(
+ llvm::DIArgList::get(DbgVal->getContext(), ValArrayRef));
+ }
+ LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DbgVal << '\n');
}
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
@@ -6786,9 +6772,7 @@ static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
const SCEV *SCEVInductionVar,
SCEVDbgValueBuilder IterCountExpr) {
- if (isa<DbgValueInst *>(DVIRec.DbgRef)
- ? !cast<DbgValueInst *>(DVIRec.DbgRef)->isKillLocation()
- : !cast<DbgVariableRecord *>(DVIRec.DbgRef)->isKillLocation())
+ if (!DVIRec.DbgRef->isKillLocation())
return false;
// LSR may have caused several changes to the dbg.value in the failed salvage
@@ -6882,13 +6866,8 @@ static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
DbgBuilder->appendToVectors(NewExpr, NewLocationOps);
}
- UpdateDbgValueInst(DVIRec, NewLocationOps, NewExpr);
- if (isa<DbgValueInst *>(DVIRec.DbgRef))
- LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: "
- << *cast<DbgValueInst *>(DVIRec.DbgRef) << "\n");
- else
- LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: "
- << *cast<DbgVariableRecord *>(DVIRec.DbgRef) << "\n");
+ UpdateDbgValue(DVIRec, NewLocationOps, NewExpr);
+ LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " << *DVIRec.DbgRef << "\n");
return true;
}
@@ -6934,21 +6913,23 @@ static void DbgRewriteSalvageableDVIs(
/// cacheing and salvaging.
static void DbgGatherSalvagableDVI(
Loop *L, ScalarEvolution &SE,
- SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,
- SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) {
+ SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs) {
for (const auto &B : L->getBlocks()) {
for (auto &I : *B) {
- auto ProcessDbgValue = [&](auto *DbgVal) -> bool {
+ for (DbgVariableRecord &DbgVal : filterDbgVars(I.getDbgRecordRange())) {
+ if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())
+ continue;
+
// Ensure that if any location op is undef that the dbg.vlue is not
// cached.
- if (DbgVal->isKillLocation())
- return false;
+ if (DbgVal.isKillLocation())
+ continue;
// Check that the location op SCEVs are suitable for translation to
// DIExpression.
const auto &HasTranslatableLocationOps =
- [&](const auto *DbgValToTranslate) -> bool {
- for (const auto LocOp : DbgValToTranslate->location_ops()) {
+ [&](const DbgVariableRecord &DbgValToTranslate) -> bool {
+ for (const auto LocOp : DbgValToTranslate.location_ops()) {
if (!LocOp)
return false;
@@ -6963,31 +6944,21 @@ static void DbgGatherSalvagableDVI(
};
if (!HasTranslatableLocationOps(DbgVal))
- return false;
+ continue;
std::unique_ptr<DVIRecoveryRec> NewRec =
- std::make_unique<DVIRecoveryRec>(DbgVal);
+ std::make_unique<DVIRecoveryRec>(&DbgVal);
// Each location Op may need a SCEVDbgValueBuilder in order to recover
// it. Pre-allocating a vector will enable quick lookups of the builder
// later during the salvage.
- NewRec->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps());
- for (const auto LocOp : DbgVal->location_ops()) {
+ NewRec->RecoveryExprs.resize(DbgVal.getNumVariableLocationOps());
+ for (const auto LocOp : DbgVal.location_ops()) {
NewRec->SCEVs.push_back(SE.getSCEV(LocOp));
NewRec->LocationOps.push_back(LocOp);
- NewRec->HadLocationArgList = DbgVal->hasArgList();
+ NewRec->HadLocationArgList = DbgVal.hasArgList();
}
SalvageableDVISCEVs.push_back(std::move(NewRec));
- return true;
- };
- for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) {
- if (DVR.isDbgValue() || DVR.isDbgAssign())
- ProcessDbgValue(&DVR);
}
- auto DVI = dyn_cast<DbgValueInst>(&I);
- if (!DVI)
- continue;
- if (ProcessDbgValue(DVI))
- DVIHandles.insert(DVI);
}
}
}
@@ -7036,8 +7007,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
// Debug preservation - before we start removing anything identify which DVI
// meet the salvageable criteria and store their DIExpression and SCEVs.
SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> SalvageableDVIRecords;
- SmallSet<AssertingVH<DbgValueInst>, 2> DVIHandles;
- DbgGatherSalvagableDVI(L, SE, SalvageableDVIRecords, DVIHandles);
+ DbgGatherSalvagableDVI(L, SE, SalvageableDVIRecords);
bool Changed = false;
std::unique_ptr<MemorySSAUpdater> MSSAU;
@@ -7105,7 +7075,6 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
for (auto &Rec : SalvageableDVIRecords)
Rec->clear();
SalvageableDVIRecords.clear();
- DVIHandles.clear();
return Changed;
}
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index eacaf42..1d1af42 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1222,9 +1222,7 @@ static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) {
SmallVector<DbgVariableIntrinsic *, 4> DbgUsers;
SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
findDbgUsers(DbgUsers, &I, &DbgVariableRecords);
- for (DbgVariableIntrinsic *DVI : DbgUsers)
- if (DVI->getFunction() != &F)
- DVI->eraseFromParent();
+ assert(DbgUsers.empty());
for (DbgVariableRecord *DVR : DbgVariableRecords)
if (DVR->getFunction() != &F)
DVR->eraseFromParent();
@@ -1289,14 +1287,12 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
SmallVector<DbgVariableRecord *, 1> DPUsers;
findDbgUsers(DbgUsers, Input, &DPUsers);
+ assert(DbgUsers.empty());
DIExpression *Expr = DIB.createExpression();
// Iterate the debud users of the Input values. If they are in the extracted
// function then update their location with the new value. If they are in
// the parent function then create a similar debug record.
- for (auto *DVI : DbgUsers)
- UpdateOrInsertDebugRecord(DVI, Input, NewVal, Expr,
- isa<DbgDeclareInst>(DVI));
for (auto *DVR : DPUsers)
UpdateOrInsertDebugRecord(DVR, Input, NewVal, Expr, DVR->isDbgDeclare());
}
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index a1f030a..4210ce6 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -808,9 +808,6 @@ bool checkDebugifyMetadata(Module &M,
// Find missing lines.
for (Instruction &I : instructions(F)) {
- if (isa<DbgValueInst>(&I))
- continue;
-
auto DL = I.getDebugLoc();
if (DL && DL.getLine() != 0) {
MissingLines.reset(DL.getLine() - 1);
@@ -839,10 +836,6 @@ bool checkDebugifyMetadata(Module &M,
for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
if (DVR.isDbgValue() || DVR.isDbgAssign())
CheckForMisSized(&DVR);
- auto *DVI = dyn_cast<DbgValueInst>(&I);
- if (!DVI)
- continue;
- CheckForMisSized(DVI);
}
}
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 6929d14..ed3dca2 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1978,14 +1978,13 @@ static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL,
continue;
// Find all local variables associated with the backing storage.
- auto CollectAssignsForStorage = [&](auto *DbgAssign) {
+ auto CollectAssignsForStorage = [&](DbgVariableRecord *DbgAssign) {
// Skip variables from inlined functions - they are not local variables.
if (DbgAssign->getDebugLoc().getInlinedAt())
return;
LLVM_DEBUG(errs() << " > DEF : " << *DbgAssign << "\n");
EscapedLocals[Base].insert(at::VarRecord(DbgAssign));
};
- for_each(at::getAssignmentMarkers(Base), CollectAssignsForStorage);
for_each(at::getDVRAssignmentMarkers(Base), CollectAssignsForStorage);
}
return EscapedLocals;
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index b14bbea..d481ad9 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -613,11 +613,10 @@ bool llvm::replaceDbgUsesWithUndef(Instruction *I) {
SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
SmallVector<DbgVariableRecord *, 1> DPUsers;
findDbgUsers(DbgUsers, I, &DPUsers);
- for (auto *DII : DbgUsers)
- DII->setKillLocation();
+ assert(DbgUsers.empty());
for (auto *DVR : DPUsers)
DVR->setKillLocation();
- return !DbgUsers.empty() || !DPUsers.empty();
+ return !DPUsers.empty();
}
/// areAllUsesEqual - Check whether the uses of a value are all the same.
@@ -1607,12 +1606,8 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
SmallVector<DbgValueInst *, 1> DbgValues;
SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
findDbgValues(DbgValues, APN, &DbgVariableRecords);
- for (auto *DVI : DbgValues) {
- assert(is_contained(DVI->getValues(), APN));
- if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr))
- return true;
- }
- for (auto *DVR : DbgVariableRecords) {
+ assert(DbgValues.empty());
+ for (DbgVariableRecord *DVR : DbgVariableRecords) {
assert(is_contained(DVR->location_ops(), APN));
if ((DVR->getVariable() == DIVar) && (DVR->getExpression() == DIExpr))
return true;
@@ -1971,7 +1966,6 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
static void updateOneDbgValueForAlloca(const DebugLoc &Loc,
DILocalVariable *DIVar,
DIExpression *DIExpr, Value *NewAddress,
- DbgValueInst *DVI,
DbgVariableRecord *DVR,
DIBuilder &Builder, int Offset) {
assert(DIVar && "Missing variable");
@@ -1987,14 +1981,8 @@ static void updateOneDbgValueForAlloca(const DebugLoc &Loc,
if (Offset)
DIExpr = DIExpression::prepend(DIExpr, 0, Offset);
- if (DVI) {
- DVI->setExpression(DIExpr);
- DVI->replaceVariableLocationOp(0u, NewAddress);
- } else {
- assert(DVR);
- DVR->setExpression(DIExpr);
- DVR->replaceVariableLocationOp(0u, NewAddress);
- }
+ DVR->setExpression(DIExpr);
+ DVR->replaceVariableLocationOp(0u, NewAddress);
}
void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
@@ -2002,18 +1990,13 @@ void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
SmallVector<DbgValueInst *, 1> DbgUsers;
SmallVector<DbgVariableRecord *, 1> DPUsers;
findDbgValues(DbgUsers, AI, &DPUsers);
-
- // Attempt to replace dbg.values that use this alloca.
- for (auto *DVI : DbgUsers)
- updateOneDbgValueForAlloca(DVI->getDebugLoc(), DVI->getVariable(),
- DVI->getExpression(), NewAllocaAddress, DVI,
- nullptr, Builder, Offset);
+ assert(DbgUsers.empty());
// Replace any DbgVariableRecords that use this alloca.
for (DbgVariableRecord *DVR : DPUsers)
updateOneDbgValueForAlloca(DVR->getDebugLoc(), DVR->getVariable(),
- DVR->getExpression(), NewAllocaAddress, nullptr,
- DVR, Builder, Offset);
+ DVR->getExpression(), NewAllocaAddress, DVR,
+ Builder, Offset);
}
/// Where possible to salvage debug information for \p I do so.
@@ -2022,6 +2005,7 @@ void llvm::salvageDebugInfo(Instruction &I) {
SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
SmallVector<DbgVariableRecord *, 1> DPUsers;
findDbgUsers(DbgUsers, &I, &DPUsers);
+ assert(DbgUsers.empty());
salvageDebugInfoForDbgValues(I, DbgUsers, DPUsers);
}
@@ -2070,66 +2054,9 @@ void llvm::salvageDebugInfoForDbgValues(
const unsigned MaxExpressionSize = 128;
bool Salvaged = false;
- for (auto *DII : DbgUsers) {
- if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(DII)) {
- if (DAI->getAddress() == &I) {
- salvageDbgAssignAddress(DAI);
- Salvaged = true;
- }
- if (DAI->getValue() != &I)
- continue;
- }
+ // We should never see debug intrinsics nowadays.
+ assert(DbgUsers.empty());
- // Do not add DW_OP_stack_value for DbgDeclare, because they are implicitly
- // pointing out the value as a DWARF memory location description.
- bool StackValue = isa<DbgValueInst>(DII);
- auto DIILocation = DII->location_ops();
- assert(
- is_contained(DIILocation, &I) &&
- "DbgVariableIntrinsic must use salvaged instruction as its location");
- SmallVector<Value *, 4> AdditionalValues;
- // `I` may appear more than once in DII's location ops, and each use of `I`
- // must be updated in the DIExpression and potentially have additional
- // values added; thus we call salvageDebugInfoImpl for each `I` instance in
- // DIILocation.
- Value *Op0 = nullptr;
- DIExpression *SalvagedExpr = DII->getExpression();
- auto LocItr = find(DIILocation, &I);
- while (SalvagedExpr && LocItr != DIILocation.end()) {
- SmallVector<uint64_t, 16> Ops;
- unsigned LocNo = std::distance(DIILocation.begin(), LocItr);
- uint64_t CurrentLocOps = SalvagedExpr->getNumLocationOperands();
- Op0 = salvageDebugInfoImpl(I, CurrentLocOps, Ops, AdditionalValues);
- if (!Op0)
- break;
- SalvagedExpr =
- DIExpression::appendOpsToArg(SalvagedExpr, Ops, LocNo, StackValue);
- LocItr = std::find(++LocItr, DIILocation.end(), &I);
- }
- // salvageDebugInfoImpl should fail on examining the first element of
- // DbgUsers, or none of them.
- if (!Op0)
- break;
-
- SalvagedExpr = SalvagedExpr->foldConstantMath();
- DII->replaceVariableLocationOp(&I, Op0);
- bool IsValidSalvageExpr = SalvagedExpr->getNumElements() <= MaxExpressionSize;
- if (AdditionalValues.empty() && IsValidSalvageExpr) {
- DII->setExpression(SalvagedExpr);
- } else if (isa<DbgValueInst>(DII) && IsValidSalvageExpr &&
- DII->getNumVariableLocationOps() + AdditionalValues.size() <=
- MaxDebugArgs) {
- DII->addVariableLocationOps(AdditionalValues, SalvagedExpr);
- } else {
- // Do not salvage using DIArgList for dbg.declare, as it is not currently
- // supported in those instructions. Also do not salvage if the resulting
- // DIArgList would contain an unreasonably large number of values.
- DII->setKillLocation();
- }
- LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
- Salvaged = true;
- }
- // Duplicate of above block for DbgVariableRecords.
for (auto *DVR : DPUsers) {
if (DVR->isDbgAssign()) {
if (DVR->getAddress() == &I) {
@@ -2198,9 +2125,6 @@ void llvm::salvageDebugInfoForDbgValues(
if (Salvaged)
return;
- for (auto *DII : DbgUsers)
- DII->setKillLocation();
-
for (auto *DVR : DPUsers)
DVR->setKillLocation();
}
@@ -3429,8 +3353,7 @@ void llvm::dropDebugUsers(Instruction &I) {
SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
SmallVector<DbgVariableRecord *, 1> DPUsers;
findDbgUsers(DbgUsers, &I, &DPUsers);
- for (auto *DII : DbgUsers)
- DII->eraseFromParent();
+ assert(DbgUsers.empty());
for (auto *DVR : DPUsers)
DVR->eraseFromParent();
}
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 66d0573..06115e0 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -161,29 +161,8 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
SmallVector<DbgValueInst *, 1> DbgValues;
SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
llvm::findDbgValues(DbgValues, OrigHeaderVal, &DbgVariableRecords);
- for (auto &DbgValue : DbgValues) {
- // The original users in the OrigHeader are already using the original
- // definitions.
- BasicBlock *UserBB = DbgValue->getParent();
- if (UserBB == OrigHeader)
- continue;
-
- // Users in the OrigPreHeader need to use the value to which the
- // original definitions are mapped and anything else can be handled by
- // the SSAUpdater. To avoid adding PHINodes, check if the value is
- // available in UserBB, if not substitute poison.
- Value *NewVal;
- if (UserBB == OrigPreheader)
- NewVal = OrigPreHeaderVal;
- else if (SSA.HasValueForBlock(UserBB))
- NewVal = SSA.GetValueInMiddleOfBlock(UserBB);
- else
- NewVal = PoisonValue::get(OrigHeaderVal->getType());
- DbgValue->replaceVariableLocationOp(OrigHeaderVal, NewVal);
- }
+ assert(DbgValues.empty());
- // RemoveDIs: duplicate implementation for non-instruction debug-info
- // storage in DbgVariableRecords.
for (DbgVariableRecord *DVR : DbgVariableRecords) {
// The original users in the OrigHeader are already using the original
// definitions.
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index ccd7ee3..73b5f48 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -190,7 +190,6 @@ public:
};
struct AllocaInfo {
- using DbgUserVec = SmallVector<DbgVariableIntrinsic *, 1>;
using DPUserVec = SmallVector<DbgVariableRecord *, 1>;
SmallVector<BasicBlock *, 32> DefiningBlocks;
@@ -201,7 +200,6 @@ struct AllocaInfo {
bool OnlyUsedInOneBlock;
/// Debug users of the alloca - does not include dbg.assign intrinsics.
- DbgUserVec DbgUsers;
DPUserVec DPUsers;
/// Helper to update assignment tracking debug info.
AssignmentTrackingInfo AssignmentTracking;
@@ -212,7 +210,6 @@ struct AllocaInfo {
OnlyStore = nullptr;
OnlyBlock = nullptr;
OnlyUsedInOneBlock = true;
- DbgUsers.clear();
DPUsers.clear();
AssignmentTracking.clear();
}
@@ -246,13 +243,10 @@ struct AllocaInfo {
OnlyUsedInOneBlock = false;
}
}
- DbgUserVec AllDbgUsers;
+ SmallVector<DbgVariableIntrinsic *> AllDbgUsers;
SmallVector<DbgVariableRecord *> AllDPUsers;
findDbgUsers(AllDbgUsers, AI, &AllDPUsers);
- std::copy_if(AllDbgUsers.begin(), AllDbgUsers.end(),
- std::back_inserter(DbgUsers), [](DbgVariableIntrinsic *DII) {
- return !isa<DbgAssignIntrinsic>(DII);
- });
+ assert(AllDbgUsers.empty());
std::copy_if(AllDPUsers.begin(), AllDPUsers.end(),
std::back_inserter(DPUsers),
[](DbgVariableRecord *DVR) { return !DVR->isDbgAssign(); });
@@ -380,10 +374,9 @@ struct PromoteMem2Reg {
/// to.
DenseMap<PHINode *, unsigned> PhiToAllocaMap;
- /// For each alloca, we keep track of the dbg.declare intrinsic that
+ /// For each alloca, we keep track of the dbg.declare record that
/// describes it, if any, so that we can convert it to a dbg.value
- /// intrinsic if the alloca gets promoted.
- SmallVector<AllocaInfo::DbgUserVec, 8> AllocaDbgUsers;
+ /// record if the alloca gets promoted.
SmallVector<AllocaInfo::DPUserVec, 8> AllocaDPUsers;
/// For each alloca, keep an instance of a helper class that gives us an easy
@@ -741,14 +734,11 @@ promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
AI->eraseFromParent();
// The alloca's debuginfo can be removed as well.
- auto DbgUpdateForAlloca = [&](auto &Container) {
- for (auto *DbgItem : Container)
- if (DbgItem->isAddressOfVariable() ||
- DbgItem->getExpression()->startsWithDeref())
- DbgItem->eraseFromParent();
- };
- DbgUpdateForAlloca(Info.DbgUsers);
- DbgUpdateForAlloca(Info.DPUsers);
+ for (DbgVariableRecord *DbgItem : Info.DPUsers) {
+ if (DbgItem->isAddressOfVariable() ||
+ DbgItem->getExpression()->startsWithDeref())
+ DbgItem->eraseFromParent();
+ }
++NumLocalPromoted;
return true;
@@ -757,7 +747,6 @@ promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
void PromoteMem2Reg::run() {
Function &F = *DT.getRoot()->getParent();
- AllocaDbgUsers.resize(Allocas.size());
AllocaATInfo.resize(Allocas.size());
AllocaDPUsers.resize(Allocas.size());
@@ -816,9 +805,7 @@ void PromoteMem2Reg::run() {
if (BBNumPreds.empty())
BBNumPreds.resize(F.getMaxBlockNumber());
- // Remember the dbg.declare intrinsic describing this alloca, if any.
- if (!Info.DbgUsers.empty())
- AllocaDbgUsers[AllocaNum] = Info.DbgUsers;
+ // Remember the dbg.declare record describing this alloca, if any.
if (!Info.AssignmentTracking.empty())
AllocaATInfo[AllocaNum] = Info.AssignmentTracking;
if (!Info.DPUsers.empty())
@@ -894,16 +881,12 @@ void PromoteMem2Reg::run() {
}
// Remove alloca's dbg.declare intrinsics from the function.
- auto RemoveDbgDeclares = [&](auto &Container) {
- for (auto &DbgUsers : Container) {
- for (auto *DbgItem : DbgUsers)
- if (DbgItem->isAddressOfVariable() ||
- DbgItem->getExpression()->startsWithDeref())
- DbgItem->eraseFromParent();
- }
- };
- RemoveDbgDeclares(AllocaDbgUsers);
- RemoveDbgDeclares(AllocaDPUsers);
+ for (auto &DbgUsers : AllocaDPUsers) {
+ for (DbgVariableRecord *DbgItem : DbgUsers)
+ if (DbgItem->isAddressOfVariable() ||
+ DbgItem->getExpression()->startsWithDeref())
+ DbgItem->eraseFromParent();
+ }
// Loop over all of the PHI nodes and see if there are any that we can get
// rid of because they merge all of the same incoming values. This can
diff --git a/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index 5db7fc9..561c898 100644
--- a/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -200,11 +200,7 @@ void SSAUpdater::UpdateDebugValues(Instruction *I) {
SmallVector<DbgValueInst *, 4> DbgValues;
SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
llvm::findDbgValues(DbgValues, I, &DbgVariableRecords);
- for (auto &DbgValue : DbgValues) {
- if (DbgValue->getParent() == I->getParent())
- continue;
- UpdateDebugValue(I, DbgValue);
- }
+ assert(DbgValues.empty());
for (auto &DVR : DbgVariableRecords) {
if (DVR->getParent() == I->getParent())
continue;
@@ -212,13 +208,6 @@ void SSAUpdater::UpdateDebugValues(Instruction *I) {
}
}
-void SSAUpdater::UpdateDebugValues(Instruction *I,
- SmallVectorImpl<DbgValueInst *> &DbgValues) {
- for (auto &DbgValue : DbgValues) {
- UpdateDebugValue(I, DbgValue);
- }
-}
-
void SSAUpdater::UpdateDebugValues(
Instruction *I, SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
for (auto &DVR : DbgVariableRecords) {
@@ -226,15 +215,6 @@ void SSAUpdater::UpdateDebugValues(
}
}
-void SSAUpdater::UpdateDebugValue(Instruction *I, DbgValueInst *DbgValue) {
- BasicBlock *UserBB = DbgValue->getParent();
- if (HasValueForBlock(UserBB)) {
- Value *NewVal = GetValueAtEndOfBlock(UserBB);
- DbgValue->replaceVariableLocationOp(I, NewVal);
- } else
- DbgValue->setKillLocation();
-}
-
void SSAUpdater::UpdateDebugValue(Instruction *I, DbgVariableRecord *DVR) {
BasicBlock *UserBB = DVR->getParent();
if (HasValueForBlock(UserBB)) {
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index a75f290..75c9650 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7493,7 +7493,7 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
SmallPtrSet<PHINode *, 8> Phis;
SmallPtrSet<BasicBlock *, 8> Seen;
DenseMap<PHINode *, SmallDenseMap<BasicBlock *, Value *, 8>> PhiPredIVs;
- DenseMap<BasicBlock *, SmallVector<unsigned, 4>> BBToSuccessorIndexes;
+ DenseMap<BasicBlock *, SmallVector<unsigned, 32>> BBToSuccessorIndexes;
SmallVector<SwitchSuccWrapper> Cases;
Cases.reserve(SI->getNumSuccessors());
@@ -7505,12 +7505,6 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
if (BB->size() != 1)
continue;
- // FIXME: This case needs some extra care because the terminators other than
- // SI need to be updated. For now, consider only backedges to the SI.
- if (BB->hasNPredecessorsOrMore(4) ||
- BB->getUniquePredecessor() != SI->getParent())
- continue;
-
// FIXME: Relax that the terminator is a BranchInst by checking for equality
// on other kinds of terminators. We decide to only support unconditional
// branches for now for compile time reasons.
@@ -7518,14 +7512,24 @@ bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
if (!BI || BI->isConditional())
continue;
- if (Seen.insert(BB).second) {
- // Keep track of which PHIs we need as keys in PhiPredIVs below.
- for (BasicBlock *Succ : BI->successors())
- Phis.insert_range(llvm::make_pointer_range(Succ->phis()));
- // Add the successor only if not previously visited.
- Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
+ if (!Seen.insert(BB).second) {
+ auto It = BBToSuccessorIndexes.find(BB);
+ if (It != BBToSuccessorIndexes.end())
+ It->second.emplace_back(I);
+ continue;
}
+ // FIXME: This case needs some extra care because the terminators other than
+ // SI need to be updated. For now, consider only backedges to the SI.
+ if (BB->getUniquePredecessor() != SI->getParent())
+ continue;
+
+ // Keep track of which PHIs we need as keys in PhiPredIVs below.
+ for (BasicBlock *Succ : BI->successors())
+ Phis.insert_range(llvm::make_pointer_range(Succ->phis()));
+
+ // Add the successor only if not previously visited.
+ Cases.emplace_back(SwitchSuccWrapper{BB, &PhiPredIVs});
BBToSuccessorIndexes[BB].emplace_back(I);
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ceeabd65..f142e07 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -947,9 +947,8 @@ public:
/// user options, for the given register kind.
bool useMaxBandwidth(TargetTransformInfo::RegisterKind RegKind);
- /// \return True if maximizing vector bandwidth is enabled by the target or
- /// user options, for the given vector factor.
- bool useMaxBandwidth(ElementCount VF);
+ /// \return True if register pressure should be calculated for the given VF.
+ bool shouldCalculateRegPressureForVF(ElementCount VF);
/// \return The size (in bits) of the smallest and widest types in the code
/// that needs to be vectorized. We ignore values that remain scalar such as
@@ -1736,6 +1735,9 @@ public:
/// Whether this loop should be optimized for size based on function attribute
/// or profile information.
bool OptForSize;
+
+ /// The highest VF possible for this loop, without using MaxBandwidth.
+ FixedScalableVFPair MaxPermissibleVFWithoutMaxBW;
};
} // end namespace llvm
@@ -3832,10 +3834,16 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
return FixedScalableVFPair::getNone();
}
-bool LoopVectorizationCostModel::useMaxBandwidth(ElementCount VF) {
- return useMaxBandwidth(VF.isScalable()
- ? TargetTransformInfo::RGK_ScalableVector
- : TargetTransformInfo::RGK_FixedWidthVector);
+bool LoopVectorizationCostModel::shouldCalculateRegPressureForVF(
+ ElementCount VF) {
+ if (!useMaxBandwidth(VF.isScalable()
+ ? TargetTransformInfo::RGK_ScalableVector
+ : TargetTransformInfo::RGK_FixedWidthVector))
+ return false;
+ // Only calculate register pressure for VFs enabled by MaxBandwidth.
+ return ElementCount::isKnownGT(
+ VF, VF.isScalable() ? MaxPermissibleVFWithoutMaxBW.ScalableVF
+ : MaxPermissibleVFWithoutMaxBW.FixedVF);
}
bool LoopVectorizationCostModel::useMaxBandwidth(
@@ -3911,6 +3919,12 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
: TargetTransformInfo::RGK_FixedWidthVector;
ElementCount MaxVF = MaxVectorElementCount;
+
+ if (MaxVF.isScalable())
+ MaxPermissibleVFWithoutMaxBW.ScalableVF = MaxVF;
+ else
+ MaxPermissibleVFWithoutMaxBW.FixedVF = MaxVF;
+
if (useMaxBandwidth(RegKind)) {
auto MaxVectorElementCountMaxBW = ElementCount::get(
llvm::bit_floor(WidestRegister.getKnownMinValue() / SmallestType),
@@ -4264,9 +4278,11 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
if (VF.isScalar())
continue;
- /// Don't consider the VF if it exceeds the number of registers for the
- /// target.
- if (CM.useMaxBandwidth(VF) && RUs[I].exceedsMaxNumRegs(TTI))
+ /// If the register pressure needs to be considered for VF,
+ /// don't consider the VF as valid if it exceeds the number
+ /// of registers for the target.
+ if (CM.shouldCalculateRegPressureForVF(VF) &&
+ RUs[I].exceedsMaxNumRegs(TTI, ForceTargetNumVectorRegs))
continue;
InstructionCost C = CM.expectedCost(VF);
@@ -7044,7 +7060,8 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
InstructionCost Cost = cost(*P, VF);
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
- if (CM.useMaxBandwidth(VF) && RUs[I].exceedsMaxNumRegs(TTI)) {
+ if (CM.shouldCalculateRegPressureForVF(VF) &&
+ RUs[I].exceedsMaxNumRegs(TTI, ForceTargetNumVectorRegs)) {
LLVM_DEBUG(dbgs() << "LV(REG): Not considering vector loop of width "
<< VF << " because it uses too many registers\n");
continue;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index b27a7ff..ca8729a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -404,9 +404,12 @@ static unsigned getVFScaleFactor(VPRecipeBase *R) {
return 1;
}
-bool VPRegisterUsage::exceedsMaxNumRegs(const TargetTransformInfo &TTI) const {
- return any_of(MaxLocalUsers, [&TTI](auto &LU) {
- return LU.second > TTI.getNumberOfRegisters(LU.first);
+bool VPRegisterUsage::exceedsMaxNumRegs(const TargetTransformInfo &TTI,
+ unsigned OverrideMaxNumRegs) const {
+ return any_of(MaxLocalUsers, [&TTI, &OverrideMaxNumRegs](auto &LU) {
+ return LU.second > (OverrideMaxNumRegs > 0
+ ? OverrideMaxNumRegs
+ : TTI.getNumberOfRegisters(LU.first));
});
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
index 7bcf9db..cd86d27 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
@@ -85,8 +85,10 @@ struct VPRegisterUsage {
SmallMapVector<unsigned, unsigned, 4> MaxLocalUsers;
/// Check if any of the tracked live intervals exceeds the number of
- /// available registers for the target.
- bool exceedsMaxNumRegs(const TargetTransformInfo &TTI) const;
+ /// available registers for the target. If non-zero, OverrideMaxNumRegs
+ /// is used in place of the target's number of registers.
+ bool exceedsMaxNumRegs(const TargetTransformInfo &TTI,
+ unsigned OverrideMaxNumRegs = 0) const;
};
/// Estimate the register usage for \p Plan and vectorization factors in \p VFs