aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/ConstantFolding.cpp11
-rw-r--r--llvm/lib/Analysis/DXILResource.cpp7
-rw-r--r--llvm/lib/Analysis/DependenceAnalysis.cpp17
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AIXException.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp9
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp7
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp50
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp4
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp6
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp54
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h4
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp18
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp21
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp26
-rw-r--r--llvm/lib/IR/DiagnosticInfo.cpp4
-rw-r--r--llvm/lib/MC/CMakeLists.txt6
-rw-r--r--llvm/lib/MC/ELFObjectWriter.cpp14
-rw-r--r--llvm/lib/MC/GOFFObjectWriter.cpp2
-rw-r--r--llvm/lib/MC/MCAsmInfoCOFF.cpp108
-rw-r--r--llvm/lib/MC/MCAsmInfoDarwin.cpp5
-rw-r--r--llvm/lib/MC/MCAsmInfoELF.cpp196
-rw-r--r--llvm/lib/MC/MCAsmInfoGOFF.cpp138
-rw-r--r--llvm/lib/MC/MCAsmInfoWasm.cpp83
-rw-r--r--llvm/lib/MC/MCAsmInfoXCOFF.cpp124
-rw-r--r--llvm/lib/MC/MCAsmStreamer.cpp10
-rw-r--r--llvm/lib/MC/MCContext.cpp4
-rw-r--r--llvm/lib/MC/MCExpr.cpp25
-rw-r--r--llvm/lib/MC/MCFragment.cpp2
-rw-r--r--llvm/lib/MC/MCMachOStreamer.cpp4
-rw-r--r--llvm/lib/MC/MCObjectStreamer.cpp29
-rw-r--r--llvm/lib/MC/MCParser/AsmParser.cpp2
-rw-r--r--llvm/lib/MC/MCParser/ELFAsmParser.cpp4
-rw-r--r--llvm/lib/MC/MCParser/MasmParser.cpp3
-rw-r--r--llvm/lib/MC/MCParser/WasmAsmParser.cpp2
-rw-r--r--llvm/lib/MC/MCSection.cpp5
-rw-r--r--llvm/lib/MC/MCSectionCOFF.cpp117
-rw-r--r--llvm/lib/MC/MCSectionDXContainer.cpp15
-rw-r--r--llvm/lib/MC/MCSectionELF.cpp217
-rw-r--r--llvm/lib/MC/MCSectionGOFF.cpp143
-rw-r--r--llvm/lib/MC/MCSectionMachO.cpp28
-rw-r--r--llvm/lib/MC/MCSectionWasm.cpp101
-rw-r--r--llvm/lib/MC/MCSectionXCOFF.cpp134
-rw-r--r--llvm/lib/MC/MCStreamer.cpp28
-rw-r--r--llvm/lib/MC/MCWasmStreamer.cpp2
-rw-r--r--llvm/lib/MC/MCWinCOFFStreamer.cpp3
-rw-r--r--llvm/lib/MC/MCXCOFFStreamer.cpp2
-rw-r--r--llvm/lib/MC/MachObjectWriter.cpp32
-rw-r--r--llvm/lib/MC/WasmObjectWriter.cpp2
-rw-r--r--llvm/lib/MC/WinCOFFObjectWriter.cpp5
-rw-r--r--llvm/lib/MC/XCOFFObjectWriter.cpp8
-rw-r--r--llvm/lib/Passes/PassRegistry.def1
-rw-r--r--llvm/lib/Support/SpecialCaseList.cpp30
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp2
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp3
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.h40
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp13
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td1
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td10
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td43
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td6
-rw-r--r--llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp3
-rw-r--r--llvm/lib/Target/AVR/AVRAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h4
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.cpp6
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp7
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h5
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp31
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp1
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h2
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp5
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp32
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp9
-rw-r--r--llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp111
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp1
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h1
-rw-r--r--llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoC.td121
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp10
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h4
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp4
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp6
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssembly.td15
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td3
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td11
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp5
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h2
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp2
-rw-r--r--llvm/lib/Target/X86/X86PassRegistry.def44
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp131
-rw-r--r--llvm/lib/TargetParser/RISCVISAInfo.cpp19
-rw-r--r--llvm/lib/TextAPI/SymbolSet.cpp5
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp96
-rw-r--r--llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp7
-rw-r--r--llvm/lib/Transforms/ObjCARC/CMakeLists.txt1
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp156
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp12
-rw-r--r--llvm/lib/Transforms/Scalar/StructurizeCFG.cpp130
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp35
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp703
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h19
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp44
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp5
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp36
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp65
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.h10
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp7
125 files changed, 2250 insertions, 1732 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index ec78386..759c553 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -929,12 +929,11 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
if (!AllConstantInt)
break;
- // TODO: Try to intersect two inrange attributes?
- if (!InRange) {
- InRange = GEP->getInRange();
- if (InRange)
- // Adjust inrange by offset until now.
- InRange = InRange->sextOrTrunc(BitWidth).subtract(Offset);
+ // Adjust inrange offset and intersect inrange attributes
+ if (auto GEPRange = GEP->getInRange()) {
+ auto AdjustedGEPRange = GEPRange->sextOrTrunc(BitWidth).subtract(Offset);
+ InRange =
+ InRange ? InRange->intersectWith(AdjustedGEPRange) : AdjustedGEPRange;
}
Ptr = cast<Constant>(GEP->getOperand(0));
diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp
index 2da6468..1959ab6 100644
--- a/llvm/lib/Analysis/DXILResource.cpp
+++ b/llvm/lib/Analysis/DXILResource.cpp
@@ -1079,15 +1079,16 @@ void DXILResourceBindingInfo::populate(Module &M, DXILResourceTypeMap &DRTM) {
// add new space
S = &BS->Spaces.emplace_back(B.Space);
- // the space is full - set flag to report overlapping binding later
- if (S->FreeRanges.empty()) {
+ // The space is full - there are no free slots left, or the rest of the
+ // slots are taken by an unbounded array. Set flag to report overlapping
+ // binding later.
+ if (S->FreeRanges.empty() || S->FreeRanges.back().UpperBound < UINT32_MAX) {
OverlappingBinding = true;
continue;
}
// adjust the last free range lower bound, split it in two, or remove it
BindingRange &LastFreeRange = S->FreeRanges.back();
- assert(LastFreeRange.UpperBound == UINT32_MAX);
if (LastFreeRange.LowerBound == B.LowerBound) {
if (B.UpperBound < UINT32_MAX)
LastFreeRange.LowerBound = B.UpperBound + 1;
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index dd9a44b..f1473b2 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -3383,6 +3383,10 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
SrcSubscripts, DstSubscripts))
return false;
+ assert(isLoopInvariant(SrcBase, SrcLoop) &&
+ isLoopInvariant(DstBase, DstLoop) &&
+ "Expected SrcBase and DstBase to be loop invariant");
+
int Size = SrcSubscripts.size();
LLVM_DEBUG({
dbgs() << "\nSrcSubscripts: ";
@@ -3666,6 +3670,19 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
SCEVUnionPredicate(Assume, *SE));
}
+ // Even if the base pointers are the same, they may not be loop-invariant. It
+ // could lead to incorrect results, as we're analyzing loop-carried
+ // dependencies. Src and Dst can be in different loops, so we need to check
+ // the base pointer is invariant in both loops.
+ Loop *SrcLoop = LI->getLoopFor(Src->getParent());
+ Loop *DstLoop = LI->getLoopFor(Dst->getParent());
+ if (!isLoopInvariant(SrcBase, SrcLoop) ||
+ !isLoopInvariant(DstBase, DstLoop)) {
+ LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n");
+ return std::make_unique<Dependence>(Src, Dst,
+ SCEVUnionPredicate(Assume, *SE));
+ }
+
uint64_t EltSize = SrcLoc.Size.toRaw();
const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase);
const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase);
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 82530e7..5907e21 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5366,7 +5366,7 @@ static Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
Type *MidTy = CI->getType();
Type *DstTy = Ty;
if (Src->getType() == Ty) {
- auto FirstOp = static_cast<Instruction::CastOps>(CI->getOpcode());
+ auto FirstOp = CI->getOpcode();
auto SecondOp = static_cast<Instruction::CastOps>(CastOpc);
Type *SrcIntPtrTy =
SrcTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(SrcTy) : nullptr;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
index 5d7c97a..6356d71 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
@@ -37,8 +37,8 @@ void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA,
// unsigned long personality; /* Pointer to the personality routine */
// }
- auto *EHInfo =
- cast<MCSectionXCOFF>(Asm->getObjFileLowering().getCompactUnwindSection());
+ auto *EHInfo = static_cast<MCSectionXCOFF *>(
+ Asm->getObjFileLowering().getCompactUnwindSection());
if (Asm->TM.getFunctionSections()) {
// If option -ffunction-sections is on, append the function name to the
// name of EH Info Table csect so that each function has its own EH Info
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index f1d3e96..6166271 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -4221,10 +4221,11 @@ MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
SectionKind Kind = CPE.getSectionKind(&DL);
const Constant *C = CPE.Val.ConstVal;
Align Alignment = CPE.Alignment;
- if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>(
- getObjFileLowering().getSectionForConstant(DL, Kind, C,
- Alignment))) {
- if (MCSymbol *Sym = S->getCOMDATSymbol()) {
+ auto *S =
+ getObjFileLowering().getSectionForConstant(DL, Kind, C, Alignment);
+ if (S && TM.getTargetTriple().isOSBinFormatCOFF()) {
+ if (MCSymbol *Sym =
+ static_cast<const MCSectionCOFF *>(S)->getCOMDATSymbol()) {
if (Sym->isUndefined())
OutStreamer->emitSymbolAttribute(Sym, MCSA_Global);
return Sym;
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 8abeb56..c5d6e40 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -1051,10 +1051,10 @@ void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) {
// comdat key. A section may be comdat because of -ffunction-sections or
// because it is comdat in the IR.
MCSectionCOFF *GVSec =
- GVSym ? dyn_cast<MCSectionCOFF>(&GVSym->getSection()) : nullptr;
+ GVSym ? static_cast<MCSectionCOFF *>(&GVSym->getSection()) : nullptr;
const MCSymbol *KeySym = GVSec ? GVSec->getCOMDATSymbol() : nullptr;
- MCSectionCOFF *DebugSec = cast<MCSectionCOFF>(
+ auto *DebugSec = static_cast<MCSectionCOFF *>(
CompilerInfoAsm->getObjFileLowering().getCOFFDebugSymbolsSection());
DebugSec = OS.getContext().getAssociativeCOFFSection(DebugSec, KeySym);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 11b8576..7188833 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -972,10 +972,9 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
// the call graph which could lead to some target function. For tail
// calls, no return PC information is needed, unless tuning for GDB in
// DWARF4 mode in which case we fake a return PC for compatibility.
- const MCSymbol *PCAddr =
- (!IsTail || CU.useGNUAnalogForDwarf5Feature())
- ? const_cast<MCSymbol *>(getLabelAfterInsn(TopLevelCallMI))
- : nullptr;
+ const MCSymbol *PCAddr = (!IsTail || CU.useGNUAnalogForDwarf5Feature())
+ ? getLabelAfterInsn(TopLevelCallMI)
+ : nullptr;
// For tail calls, it's necessary to record the address of the branch
// instruction so that the debugger can show where the tail call occurred.
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 1cb0a23..3b3e7a4 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -2083,59 +2083,22 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
if (TBB == FBB) {
MBB->splice(Loc, TBB, TBB->begin(), TIB);
} else {
- // Merge the debug locations, and hoist and kill the debug instructions from
- // both branches. FIXME: We could probably try harder to preserve some debug
- // instructions (but at least this isn't producing wrong locations).
- MachineInstrBuilder MIRBuilder(*MBB->getParent(), Loc);
- auto HoistAndKillDbgInstr = [MBB, Loc](MachineBasicBlock::iterator DI) {
- assert(DI->isDebugInstr() && "Expected a debug instruction");
- if (DI->isDebugRef()) {
- const TargetInstrInfo *TII =
- MBB->getParent()->getSubtarget().getInstrInfo();
- const MCInstrDesc &DBGV = TII->get(TargetOpcode::DBG_VALUE);
- DI = BuildMI(*MBB->getParent(), DI->getDebugLoc(), DBGV, false, 0,
- DI->getDebugVariable(), DI->getDebugExpression());
- MBB->insert(Loc, &*DI);
- return;
- }
- // Deleting a DBG_PHI results in an undef at the referenced DBG_INSTR_REF.
- if (DI->isDebugPHI()) {
- DI->eraseFromParent();
- return;
- }
-
- DI->setDebugValueUndef();
- DI->moveBefore(&*Loc);
- };
-
// TIB and FIB point to the end of the regions to hoist/merge in TBB and
// FBB.
MachineBasicBlock::iterator FE = FIB;
MachineBasicBlock::iterator FI = FBB->begin();
for (MachineBasicBlock::iterator TI :
make_early_inc_range(make_range(TBB->begin(), TIB))) {
- // Hoist and kill debug instructions from FBB. After this loop FI points
- // to the next non-debug instruction to hoist (checked in assert after the
- // TBB debug instruction handling code).
- while (FI->isDebugInstr()) {
- assert(FI != FE && "Unexpected end of FBB range");
- MachineBasicBlock::iterator FINext = std::next(FI);
- HoistAndKillDbgInstr(FI);
- FI = FINext;
- }
-
- // Kill debug instructions before moving.
- if (TI->isDebugInstr()) {
- HoistAndKillDbgInstr(TI);
+ // Move debug instructions and pseudo probes without modifying them.
+ // FIXME: This is the wrong thing to do for debug locations, which
+ // should at least be killed (and hoisted from BOTH blocks).
+ if (TI->isDebugOrPseudoInstr()) {
+ TI->moveBefore(&*Loc);
continue;
}
- // If FI is a debug instruction, skip forward to the next non-debug
- // instruction.
+ // Get the next non-meta instruction in FBB.
FI = skipDebugInstructionsForward(FI, FE, false);
- // Pseudo probes are excluded from the range when identifying foldable
- // instructions, so we don't expect to see one now.
- assert(!TI->isPseudoProbe() && "Unexpected pseudo probe in range");
// NOTE: The loop above checks CheckKillDead but we can't do that here as
// it modifies some kill markers after the check.
assert(TI->isIdenticalTo(*FI, MachineInstr::CheckDefs) &&
@@ -2148,7 +2111,6 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
++FI;
}
}
-
FBB->erase(FBB->begin(), FIB);
if (UpdateLiveIns)
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index c21058c..416c56d 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2095,6 +2095,10 @@ static bool isRemOfLoopIncrementWithLoopInvariant(
if (!L->isLoopInvariant(RemAmt))
return false;
+ // Only works if the AddOffset is a loop invaraint
+ if (AddOffset && !L->isLoopInvariant(AddOffset))
+ return false;
+
// Is the PHI a loop increment?
auto LoopIncrInfo = getIVIncrement(PN, LI);
if (!LoopIncrInfo)
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index c2839d4..5e50898 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -634,6 +634,9 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
<< " and factor = " << Factor << "\n");
} else {
assert(II);
+ if (II->getIntrinsicID() != Intrinsic::masked_load &&
+ II->getIntrinsicID() != Intrinsic::vp_load)
+ return false;
// Check mask operand. Handle both all-true/false and interleaved mask.
Mask = getMask(getMaskOperand(II), Factor, getDeinterleavedVectorType(DI));
@@ -673,6 +676,9 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
Value *Mask = nullptr;
if (II) {
+ if (II->getIntrinsicID() != Intrinsic::masked_store &&
+ II->getIntrinsicID() != Intrinsic::vp_store)
+ return false;
// Check mask operand. Handle both all-true/false and interleaved mask.
Mask = getMask(getMaskOperand(II), Factor,
cast<VectorType>(InterleaveValues[0]->getType()));
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index e5704c0..583a85a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
@@ -357,6 +358,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::PATCHPOINT:
Res = PromoteIntRes_PATCHPOINT(N);
break;
+ case ISD::READ_REGISTER:
+ Res = PromoteIntRes_READ_REGISTER(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -2076,6 +2080,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::PATCHPOINT:
Res = PromoteIntOp_PATCHPOINT(N, OpNo);
break;
+ case ISD::WRITE_REGISTER:
+ Res = PromoteIntOp_WRITE_REGISTER(N, OpNo);
+ break;
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
Res = PromoteIntOp_VP_STRIDED(N, OpNo);
@@ -2853,6 +2860,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) {
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_WRITE_REGISTER(SDNode *N,
+ unsigned OpNo) {
+ const Function &Fn = DAG.getMachineFunction().getFunction();
+ Fn.getContext().diagnose(DiagnosticInfoLegalizationFailure(
+ "cannot use llvm.write_register with illegal type", Fn,
+ N->getDebugLoc()));
+ return N->getOperand(0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) {
assert((N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD && OpNo == 3) ||
(N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE && OpNo == 4));
@@ -3127,6 +3143,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VSCALE:
ExpandIntRes_VSCALE(N, Lo, Hi);
break;
+
+ case ISD::READ_REGISTER:
+ ExpandIntRes_READ_REGISTER(N, Lo, Hi);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -5471,6 +5491,18 @@ void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
SplitInteger(Res, Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_READ_REGISTER(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ const Function &Fn = DAG.getMachineFunction().getFunction();
+ Fn.getContext().diagnose(DiagnosticInfoLegalizationFailure(
+ "cannot use llvm.read_register with illegal type", Fn, N->getDebugLoc()));
+ ReplaceValueWith(SDValue(N, 1), N->getOperand(0));
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ Lo = DAG.getPOISON(LoVT);
+ Hi = DAG.getPOISON(HiVT);
+}
+
//===----------------------------------------------------------------------===//
// Integer Operand Expansion
//===----------------------------------------------------------------------===//
@@ -5537,6 +5569,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
Res = ExpandIntOp_VP_STRIDED(N, OpNo);
break;
+ case ISD::WRITE_REGISTER:
+ Res = ExpandIntOp_WRITE_REGISTER(N, OpNo);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -5935,6 +5970,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) {
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
+SDValue DAGTypeLegalizer::ExpandIntOp_WRITE_REGISTER(SDNode *N, unsigned OpNo) {
+ const Function &Fn = DAG.getMachineFunction().getFunction();
+ Fn.getContext().diagnose(DiagnosticInfoLegalizationFailure(
+ "cannot use llvm.write_register with illegal type", Fn,
+ N->getDebugLoc()));
+
+ return N->getOperand(0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SPLICE(SDNode *N) {
SDLoc dl(N);
@@ -6332,6 +6376,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_PATCHPOINT(SDNode *N) {
return Res.getValue(0);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_READ_REGISTER(SDNode *N) {
+ const Function &Fn = DAG.getMachineFunction().getFunction();
+ Fn.getContext().diagnose(DiagnosticInfoLegalizationFailure(
+ "cannot use llvm.read_register with illegal type", Fn, N->getDebugLoc()));
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ReplaceValueWith(SDValue(N, 1), N->getOperand(0));
+ return DAG.getPOISON(NVT);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDLoc dl(N);
SDValue V0 = GetPromotedInteger(N->getOperand(0));
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 9b53724..2e13b18 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -378,6 +378,7 @@ private:
SDValue PromoteIntRes_VPFunnelShift(SDNode *N);
SDValue PromoteIntRes_IS_FPCLASS(SDNode *N);
SDValue PromoteIntRes_PATCHPOINT(SDNode *N);
+ SDValue PromoteIntRes_READ_REGISTER(SDNode *N);
SDValue PromoteIntRes_VECTOR_FIND_LAST_ACTIVE(SDNode *N);
SDValue PromoteIntRes_GET_ACTIVE_LANE_MASK(SDNode *N);
SDValue PromoteIntRes_PARTIAL_REDUCE_MLA(SDNode *N);
@@ -428,6 +429,7 @@ private:
SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_WRITE_REGISTER(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_VP_SPLICE(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_VECTOR_HISTOGRAM(SDNode *N, unsigned OpNo);
@@ -511,6 +513,7 @@ private:
void ExpandIntRes_FunnelShift (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_VSCALE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_READ_REGISTER(SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
SDValue &Lo, SDValue &Hi);
@@ -534,6 +537,7 @@ private:
SDValue ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo);
SDValue ExpandIntOp_PATCHPOINT(SDNode *N, unsigned OpNo);
SDValue ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_WRITE_REGISTER(SDNode *N, unsigned OpNo);
void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode, const SDLoc &dl);
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index a40ceaa..408d07b 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -995,7 +995,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForLSDA(
if (!LSDASection || (!F.hasComdat() && !TM.getFunctionSections()))
return LSDASection;
- const auto *LSDA = cast<MCSectionELF>(LSDASection);
+ const auto *LSDA = static_cast<const MCSectionELF *>(LSDASection);
unsigned Flags = LSDA->getFlags();
const MCSymbolELF *LinkedToSym = nullptr;
StringRef Group;
@@ -2054,14 +2054,14 @@ MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
return getCOFFStaticStructorSection(
getContext(), getContext().getTargetTriple(), true, Priority, KeySym,
- cast<MCSectionCOFF>(StaticCtorSection));
+ static_cast<MCSectionCOFF *>(StaticCtorSection));
}
MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
return getCOFFStaticStructorSection(
getContext(), getContext().getTargetTriple(), false, Priority, KeySym,
- cast<MCSectionCOFF>(StaticDtorSection));
+ static_cast<MCSectionCOFF *>(StaticDtorSection));
}
const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference(
@@ -2388,23 +2388,25 @@ TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV,
// here.
if (const GlobalObject *GO = dyn_cast<GlobalObject>(GV)) {
if (GO->isDeclarationForLinker())
- return cast<MCSectionXCOFF>(getSectionForExternalReference(GO, TM))
+ return static_cast<const MCSectionXCOFF *>(
+ getSectionForExternalReference(GO, TM))
->getQualNameSymbol();
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
if (GVar->hasAttribute("toc-data"))
- return cast<MCSectionXCOFF>(
+ return static_cast<const MCSectionXCOFF *>(
SectionForGlobal(GVar, SectionKind::getData(), TM))
->getQualNameSymbol();
SectionKind GOKind = getKindForGlobal(GO, TM);
if (GOKind.isText())
- return cast<MCSectionXCOFF>(
+ return static_cast<const MCSectionXCOFF *>(
getSectionForFunctionDescriptor(cast<Function>(GO), TM))
->getQualNameSymbol();
if ((TM.getDataSections() && !GO->hasSection()) || GO->hasCommonLinkage() ||
GOKind.isBSSLocal() || GOKind.isThreadBSSLocal())
- return cast<MCSectionXCOFF>(SectionForGlobal(GO, GOKind, TM))
+ return static_cast<const MCSectionXCOFF *>(
+ SectionForGlobal(GO, GOKind, TM))
->getQualNameSymbol();
}
@@ -2740,7 +2742,7 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA(
const Function &F, const MCSymbol &FnSym, const TargetMachine &TM) const {
- auto *LSDA = cast<MCSectionXCOFF>(LSDASection);
+ auto *LSDA = static_cast<MCSectionXCOFF *>(LSDASection);
if (TM.getFunctionSections()) {
// If option -ffunction-sections is on, append the function name to the
// name of the LSDA csect so that each function has its own LSDA csect.
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index b3798f1..a8559e7 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -183,7 +183,7 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
std::lock_guard<sys::Mutex> locked(lock);
// Save information about our target
- Arch = (Triple::ArchType)Obj.getArch();
+ Arch = Obj.getArch();
IsTargetLittleEndian = Obj.isLittleEndian();
setMipsABI(Obj);
@@ -1361,18 +1361,17 @@ std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
RuntimeDyld::loadObject(const ObjectFile &Obj) {
if (!Dyld) {
if (Obj.isELF())
- Dyld =
- createRuntimeDyldELF(static_cast<Triple::ArchType>(Obj.getArch()),
- MemMgr, Resolver, ProcessAllSections,
- std::move(NotifyStubEmitted));
+ Dyld = createRuntimeDyldELF(Obj.getArch(), MemMgr, Resolver,
+ ProcessAllSections,
+ std::move(NotifyStubEmitted));
else if (Obj.isMachO())
- Dyld = createRuntimeDyldMachO(
- static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver,
- ProcessAllSections, std::move(NotifyStubEmitted));
+ Dyld = createRuntimeDyldMachO(Obj.getArch(), MemMgr, Resolver,
+ ProcessAllSections,
+ std::move(NotifyStubEmitted));
else if (Obj.isCOFF())
- Dyld = createRuntimeDyldCOFF(
- static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver,
- ProcessAllSections, std::move(NotifyStubEmitted));
+ Dyld = createRuntimeDyldCOFF(Obj.getArch(), MemMgr, Resolver,
+ ProcessAllSections,
+ std::move(NotifyStubEmitted));
else
report_fatal_error("Incompatible object format!");
}
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 840ca83..7928772 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2617,7 +2617,7 @@ void OpenMPIRBuilder::emitReductionListCopy(
Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
const LocationDescription &Loc, ArrayRef<ReductionInfo> ReductionInfos,
AttributeList FuncAttrs) {
- IRBuilder<>::InsertPointGuard IPG(Builder);
+ InsertPointTy SavedIP = Builder.saveIP();
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
@@ -2630,7 +2630,6 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
WcFunc->addParamAttr(1, Attribute::NoUndef);
BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", WcFunc);
Builder.SetInsertPoint(EntryBB);
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// ReduceList: thread local Reduce list.
// At the stage of the computation when this function is called, partially
@@ -2845,6 +2844,7 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
}
Builder.CreateRetVoid();
+ Builder.restoreIP(SavedIP);
return WcFunc;
}
@@ -2853,7 +2853,6 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
AttributeList FuncAttrs) {
LLVMContext &Ctx = M.getContext();
- IRBuilder<>::InsertPointGuard IPG(Builder);
FunctionType *FuncTy =
FunctionType::get(Builder.getVoidTy(),
{Builder.getPtrTy(), Builder.getInt16Ty(),
@@ -2872,7 +2871,6 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
SarFunc->addParamAttr(3, Attribute::SExt);
BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", SarFunc);
Builder.SetInsertPoint(EntryBB);
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Thread local Reduce list used to host the values of data to be reduced.
Argument *ReduceListArg = SarFunc->getArg(0);
@@ -3019,7 +3017,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy,
AttributeList FuncAttrs) {
- IRBuilder<>::InsertPointGuard IPG(Builder);
+ OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(),
@@ -3035,7 +3033,6 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc);
Builder.SetInsertPoint(EntryBlock);
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Buffer: global reduction buffer.
Argument *BufferArg = LtGCFunc->getArg(0);
@@ -3123,13 +3120,14 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
}
Builder.CreateRetVoid();
+ Builder.restoreIP(OldIP);
return LtGCFunc;
}
Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
Type *ReductionsBufferTy, AttributeList FuncAttrs) {
- IRBuilder<>::InsertPointGuard IPG(Builder);
+ OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(),
@@ -3145,7 +3143,6 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc);
Builder.SetInsertPoint(EntryBlock);
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Buffer: global reduction buffer.
Argument *BufferArg = LtGRFunc->getArg(0);
@@ -3206,13 +3203,14 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
->addFnAttr(Attribute::NoUnwind);
Builder.CreateRetVoid();
+ Builder.restoreIP(OldIP);
return LtGRFunc;
}
Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy,
AttributeList FuncAttrs) {
- IRBuilder<>::InsertPointGuard IPG(Builder);
+ OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(),
@@ -3228,7 +3226,6 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc);
Builder.SetInsertPoint(EntryBlock);
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Buffer: global reduction buffer.
Argument *BufferArg = LtGCFunc->getArg(0);
@@ -3314,13 +3311,14 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
}
Builder.CreateRetVoid();
+ Builder.restoreIP(OldIP);
return LtGCFunc;
}
Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
Type *ReductionsBufferTy, AttributeList FuncAttrs) {
- IRBuilder<>::InsertPointGuard IPG(Builder);
+ OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
LLVMContext &Ctx = M.getContext();
auto *FuncTy = FunctionType::get(
Builder.getVoidTy(),
@@ -3336,7 +3334,6 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc);
Builder.SetInsertPoint(EntryBlock);
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Buffer: global reduction buffer.
Argument *BufferArg = LtGRFunc->getArg(0);
@@ -3397,6 +3394,7 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
Builder.CreateCall(ReduceFn, {ReduceList, ReductionList})
->addFnAttr(Attribute::NoUnwind);
Builder.CreateRetVoid();
+ Builder.restoreIP(OldIP);
return LtGRFunc;
}
@@ -3409,7 +3407,6 @@ std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name) const {
Expected<Function *> OpenMPIRBuilder::createReductionFunction(
StringRef ReducerName, ArrayRef<ReductionInfo> ReductionInfos,
ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) {
- IRBuilder<>::InsertPointGuard IPG(Builder);
auto *FuncTy = FunctionType::get(Builder.getVoidTy(),
{Builder.getPtrTy(), Builder.getPtrTy()},
/* IsVarArg */ false);
@@ -3422,7 +3419,6 @@ Expected<Function *> OpenMPIRBuilder::createReductionFunction(
BasicBlock *EntryBB =
BasicBlock::Create(M.getContext(), "entry", ReductionFunc);
Builder.SetInsertPoint(EntryBB);
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Need to alloca memory here and deal with the pointers before getting
// LHS/RHS pointers out
@@ -3750,12 +3746,10 @@ static Error populateReductionFunction(
Function *ReductionFunc,
ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
IRBuilder<> &Builder, ArrayRef<bool> IsByRef, bool IsGPU) {
- IRBuilder<>::InsertPointGuard IPG(Builder);
Module *Module = ReductionFunc->getParent();
BasicBlock *ReductionFuncBlock =
BasicBlock::Create(Module->getContext(), "", ReductionFunc);
Builder.SetInsertPoint(ReductionFuncBlock);
- Builder.SetCurrentDebugLocation(llvm::DebugLoc());
Value *LHSArrayPtr = nullptr;
Value *RHSArrayPtr = nullptr;
if (IsGPU) {
diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp
index b94dcac..4f37624 100644
--- a/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/llvm/lib/IR/DiagnosticInfo.cpp
@@ -81,6 +81,10 @@ void DiagnosticInfoInlineAsm::print(DiagnosticPrinter &DP) const {
DP << " at line " << getLocCookie();
}
+void DiagnosticInfoLegalizationFailure::print(DiagnosticPrinter &DP) const {
+ DP << getLocationStr() << ": " << getMsgStr();
+}
+
DiagnosticInfoRegAllocFailure::DiagnosticInfoRegAllocFailure(
const Twine &MsgStr, const Function &Fn, const DiagnosticLocation &DL,
DiagnosticSeverity Severity)
diff --git a/llvm/lib/MC/CMakeLists.txt b/llvm/lib/MC/CMakeLists.txt
index d662c42..18a85b3 100644
--- a/llvm/lib/MC/CMakeLists.txt
+++ b/llvm/lib/MC/CMakeLists.txt
@@ -43,13 +43,7 @@ add_llvm_component_library(LLVMMC
MCRegisterInfo.cpp
MCSchedule.cpp
MCSection.cpp
- MCSectionCOFF.cpp
- MCSectionDXContainer.cpp
- MCSectionELF.cpp
- MCSectionGOFF.cpp
MCSectionMachO.cpp
- MCSectionWasm.cpp
- MCSectionXCOFF.cpp
MCStreamer.cpp
MCSPIRVStreamer.cpp
MCSubtargetInfo.cpp
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index 4510d4db..ae8dffc 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -1087,7 +1087,8 @@ uint64_t ELFWriter::writeObject() {
// Remember the offset into the file for this section.
const uint64_t SecStart = align(RelSection->getAlign());
- writeRelocations(cast<MCSectionELF>(*RelSection->getLinkedToSection()));
+ writeRelocations(
+ static_cast<const MCSectionELF &>(*RelSection->getLinkedToSection()));
uint64_t SecEnd = W.OS.tell();
RelSection->setOffsets(SecStart, SecEnd);
@@ -1260,7 +1261,7 @@ bool ELFObjectWriter::useSectionSymbol(const MCValue &Val,
// that it pointed to another string and subtracting 42 at runtime will
// produce the wrong value.
if (Sym->isInSection()) {
- auto &Sec = cast<MCSectionELF>(Sym->getSection());
+ auto &Sec = static_cast<const MCSectionELF &>(Sym->getSection());
unsigned Flags = Sec.getFlags();
if (Flags & ELF::SHF_MERGE) {
if (C != 0)
@@ -1312,13 +1313,14 @@ bool ELFObjectWriter::checkRelocation(SMLoc Loc, const MCSectionELF *From,
void ELFObjectWriter::recordRelocation(const MCFragment &F,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
- const MCSectionELF &Section = cast<MCSectionELF>(*F.getParent());
+ auto &Section = static_cast<const MCSectionELF &>(*F.getParent());
MCContext &Ctx = getContext();
const auto *SymA = cast_or_null<MCSymbolELF>(Target.getAddSym());
- const MCSectionELF *SecA = (SymA && SymA->isInSection())
- ? cast<MCSectionELF>(&SymA->getSection())
- : nullptr;
+ const MCSectionELF *SecA =
+ (SymA && SymA->isInSection())
+ ? static_cast<const MCSectionELF *>(&SymA->getSection())
+ : nullptr;
if (DwoOS && !checkRelocation(Fixup.getLoc(), &Section, SecA))
return;
diff --git a/llvm/lib/MC/GOFFObjectWriter.cpp b/llvm/lib/MC/GOFFObjectWriter.cpp
index 1871f5f..88188f3 100644
--- a/llvm/lib/MC/GOFFObjectWriter.cpp
+++ b/llvm/lib/MC/GOFFObjectWriter.cpp
@@ -336,7 +336,7 @@ void GOFFWriter::defineSymbols() {
unsigned Ordinal = 0;
// Process all sections.
for (MCSection &S : Asm) {
- auto &Section = cast<MCSectionGOFF>(S);
+ auto &Section = static_cast<MCSectionGOFF &>(S);
Section.setOrdinal(++Ordinal);
defineSectionSymbols(Section);
}
diff --git a/llvm/lib/MC/MCAsmInfoCOFF.cpp b/llvm/lib/MC/MCAsmInfoCOFF.cpp
index 0b8781c..54717df 100644
--- a/llvm/lib/MC/MCAsmInfoCOFF.cpp
+++ b/llvm/lib/MC/MCAsmInfoCOFF.cpp
@@ -12,7 +12,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfoCOFF.h"
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
using namespace llvm;
@@ -49,6 +55,10 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
HasCOFFComdatConstants = true;
}
+bool MCAsmInfoCOFF::useCodeAlign(const MCSection &Sec) const {
+ return Sec.isText();
+}
+
void MCAsmInfoMicrosoft::anchor() {}
MCAsmInfoMicrosoft::MCAsmInfoMicrosoft() = default;
@@ -64,3 +74,101 @@ MCAsmInfoGNUCOFF::MCAsmInfoGNUCOFF() {
// We don't create constants in comdat sections for MinGW.
HasCOFFComdatConstants = false;
}
+
+bool MCSectionCOFF::shouldOmitSectionDirective(StringRef Name) const {
+ if (COMDATSymbol || isUnique())
+ return false;
+
+ // FIXME: Does .section .bss/.data/.text work everywhere??
+ if (Name == ".text" || Name == ".data" || Name == ".bss")
+ return true;
+
+ return false;
+}
+
+void MCSectionCOFF::setSelection(int Selection) const {
+ assert(Selection != 0 && "invalid COMDAT selection type");
+ this->Selection = Selection;
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+}
+
+void MCAsmInfoCOFF::printSwitchToSection(const MCSection &Section, uint32_t,
+ const Triple &T,
+ raw_ostream &OS) const {
+ auto &Sec = static_cast<const MCSectionCOFF &>(Section);
+ // standard sections don't require the '.section'
+ if (Sec.shouldOmitSectionDirective(Sec.getName())) {
+ OS << '\t' << Sec.getName() << '\n';
+ return;
+ }
+
+ OS << "\t.section\t" << Sec.getName() << ",\"";
+ if (Sec.getCharacteristics() & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
+ OS << 'd';
+ if (Sec.getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+ OS << 'b';
+ if (Sec.getCharacteristics() & COFF::IMAGE_SCN_MEM_EXECUTE)
+ OS << 'x';
+ if (Sec.getCharacteristics() & COFF::IMAGE_SCN_MEM_WRITE)
+ OS << 'w';
+ else if (Sec.getCharacteristics() & COFF::IMAGE_SCN_MEM_READ)
+ OS << 'r';
+ else
+ OS << 'y';
+ if (Sec.getCharacteristics() & COFF::IMAGE_SCN_LNK_REMOVE)
+ OS << 'n';
+ if (Sec.getCharacteristics() & COFF::IMAGE_SCN_MEM_SHARED)
+ OS << 's';
+ if ((Sec.getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE) &&
+ !Sec.isImplicitlyDiscardable(Sec.getName()))
+ OS << 'D';
+ if (Sec.getCharacteristics() & COFF::IMAGE_SCN_LNK_INFO)
+ OS << 'i';
+ OS << '"';
+
+ // unique should be tail of .section directive.
+ if (Sec.isUnique() && !Sec.COMDATSymbol)
+ OS << ",unique," << Sec.UniqueID;
+
+ if (Sec.getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) {
+ if (Sec.COMDATSymbol)
+ OS << ",";
+ else
+ OS << "\n\t.linkonce\t";
+ switch (Sec.Selection) {
+ case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES:
+ OS << "one_only";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_ANY:
+ OS << "discard";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE:
+ OS << "same_size";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH:
+ OS << "same_contents";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE:
+ OS << "associative";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_LARGEST:
+ OS << "largest";
+ break;
+ case COFF::IMAGE_COMDAT_SELECT_NEWEST:
+ OS << "newest";
+ break;
+ default:
+ assert(false && "unsupported COFF selection type");
+ break;
+ }
+ if (Sec.COMDATSymbol) {
+ OS << ",";
+ Sec.COMDATSymbol->print(OS, this);
+ }
+ }
+
+ if (Sec.isUnique() && Sec.COMDATSymbol)
+ OS << ",unique," << Sec.UniqueID;
+
+ OS << '\n';
+}
diff --git a/llvm/lib/MC/MCAsmInfoDarwin.cpp b/llvm/lib/MC/MCAsmInfoDarwin.cpp
index 9cba775..e156fa0 100644
--- a/llvm/lib/MC/MCAsmInfoDarwin.cpp
+++ b/llvm/lib/MC/MCAsmInfoDarwin.cpp
@@ -85,3 +85,8 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
DwarfUsesRelocationsAcrossSections = false;
SetDirectiveSuppressesReloc = true;
}
+
+bool MCAsmInfoDarwin::useCodeAlign(const MCSection &Sec) const {
+ return static_cast<const MCSectionMachO &>(Sec).hasAttribute(
+ MachO::S_ATTR_PURE_INSTRUCTIONS);
+}
diff --git a/llvm/lib/MC/MCAsmInfoELF.cpp b/llvm/lib/MC/MCAsmInfoELF.cpp
index 7eb89ef..cdae9d7 100644
--- a/llvm/lib/MC/MCAsmInfoELF.cpp
+++ b/llvm/lib/MC/MCAsmInfoELF.cpp
@@ -12,9 +12,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfoELF.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
+#include <cassert>
using namespace llvm;
@@ -28,9 +35,198 @@ MCSection *MCAsmInfoELF::getNonexecutableStackSection(MCContext &Ctx) const {
return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0);
}
+bool MCAsmInfoELF::useCodeAlign(const MCSection &Sec) const {
+ return static_cast<const MCSectionELF &>(Sec).getFlags() & ELF::SHF_EXECINSTR;
+}
+
MCAsmInfoELF::MCAsmInfoELF() {
HasIdentDirective = true;
WeakRefDirective = "\t.weak\t";
PrivateGlobalPrefix = ".L";
PrivateLabelPrefix = ".L";
}
+
+static void printName(raw_ostream &OS, StringRef Name) {
+ if (Name.find_first_not_of("0123456789_."
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == Name.npos) {
+ OS << Name;
+ return;
+ }
+ OS << '"';
+ for (const char *B = Name.begin(), *E = Name.end(); B < E; ++B) {
+ if (*B == '"') // Unquoted "
+ OS << "\\\"";
+ else if (*B != '\\') // Neither " or backslash
+ OS << *B;
+ else if (B + 1 == E) // Trailing backslash
+ OS << "\\\\";
+ else {
+ OS << B[0] << B[1]; // Quoted character
+ ++B;
+ }
+ }
+ OS << '"';
+}
+
+void MCAsmInfoELF::printSwitchToSection(const MCSection &Section,
+ uint32_t Subsection, const Triple &T,
+ raw_ostream &OS) const {
+ auto &Sec = static_cast<const MCSectionELF &>(Section);
+ if (!Sec.isUnique() && shouldOmitSectionDirective(Sec.getName())) {
+ OS << '\t' << Sec.getName();
+ if (Subsection)
+ OS << '\t' << Subsection;
+ OS << '\n';
+ return;
+ }
+
+ OS << "\t.section\t";
+ printName(OS, Sec.getName());
+
+ // Handle the weird solaris syntax if desired.
+ if (usesSunStyleELFSectionSwitchSyntax() && !(Sec.Flags & ELF::SHF_MERGE)) {
+ if (Sec.Flags & ELF::SHF_ALLOC)
+ OS << ",#alloc";
+ if (Sec.Flags & ELF::SHF_EXECINSTR)
+ OS << ",#execinstr";
+ if (Sec.Flags & ELF::SHF_WRITE)
+ OS << ",#write";
+ if (Sec.Flags & ELF::SHF_EXCLUDE)
+ OS << ",#exclude";
+ if (Sec.Flags & ELF::SHF_TLS)
+ OS << ",#tls";
+ OS << '\n';
+ return;
+ }
+
+ OS << ",\"";
+ if (Sec.Flags & ELF::SHF_ALLOC)
+ OS << 'a';
+ if (Sec.Flags & ELF::SHF_EXCLUDE)
+ OS << 'e';
+ if (Sec.Flags & ELF::SHF_EXECINSTR)
+ OS << 'x';
+ if (Sec.Flags & ELF::SHF_WRITE)
+ OS << 'w';
+ if (Sec.Flags & ELF::SHF_MERGE)
+ OS << 'M';
+ if (Sec.Flags & ELF::SHF_STRINGS)
+ OS << 'S';
+ if (Sec.Flags & ELF::SHF_TLS)
+ OS << 'T';
+ if (Sec.Flags & ELF::SHF_LINK_ORDER)
+ OS << 'o';
+ if (Sec.Flags & ELF::SHF_GROUP)
+ OS << 'G';
+ if (Sec.Flags & ELF::SHF_GNU_RETAIN)
+ OS << 'R';
+
+ // If there are os-specific flags, print them.
+ if (T.isOSSolaris())
+ if (Sec.Flags & ELF::SHF_SUNW_NODISCARD)
+ OS << 'R';
+
+ // If there are tarSec.get-specific flags, print them.
+ Triple::ArchType Arch = T.getArch();
+ if (Arch == Triple::xcore) {
+ if (Sec.Flags & ELF::XCORE_SHF_CP_SECTION)
+ OS << 'c';
+ if (Sec.Flags & ELF::XCORE_SHF_DP_SECTION)
+ OS << 'd';
+ } else if (T.isARM() || T.isThumb()) {
+ if (Sec.Flags & ELF::SHF_ARM_PURECODE)
+ OS << 'y';
+ } else if (T.isAArch64()) {
+ if (Sec.Flags & ELF::SHF_AARCH64_PURECODE)
+ OS << 'y';
+ } else if (Arch == Triple::hexagon) {
+ if (Sec.Flags & ELF::SHF_HEX_GPREL)
+ OS << 's';
+ } else if (Arch == Triple::x86_64) {
+ if (Sec.Flags & ELF::SHF_X86_64_LARGE)
+ OS << 'l';
+ }
+
+ OS << '"';
+
+ OS << ',';
+
+ // If comment string is '@', e.g. as on ARM - use '%' instead
+ if (getCommentString()[0] == '@')
+ OS << '%';
+ else
+ OS << '@';
+
+ if (Sec.Type == ELF::SHT_INIT_ARRAY)
+ OS << "init_array";
+ else if (Sec.Type == ELF::SHT_FINI_ARRAY)
+ OS << "fini_array";
+ else if (Sec.Type == ELF::SHT_PREINIT_ARRAY)
+ OS << "preinit_array";
+ else if (Sec.Type == ELF::SHT_NOBITS)
+ OS << "nobits";
+ else if (Sec.Type == ELF::SHT_NOTE)
+ OS << "note";
+ else if (Sec.Type == ELF::SHT_PROGBITS)
+ OS << "progbits";
+ else if (Sec.Type == ELF::SHT_X86_64_UNWIND)
+ OS << "unwind";
+ else if (Sec.Type == ELF::SHT_MIPS_DWARF)
+ // Print hex value of the flag while we do not have
+ // any standard symbolic representation of the flag.
+ OS << "0x7000001e";
+ else if (Sec.Type == ELF::SHT_LLVM_ODRTAB)
+ OS << "llvm_odrtab";
+ else if (Sec.Type == ELF::SHT_LLVM_LINKER_OPTIONS)
+ OS << "llvm_linker_options";
+ else if (Sec.Type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE)
+ OS << "llvm_call_graph_profile";
+ else if (Sec.Type == ELF::SHT_LLVM_DEPENDENT_LIBRARIES)
+ OS << "llvm_dependent_libraries";
+ else if (Sec.Type == ELF::SHT_LLVM_SYMPART)
+ OS << "llvm_sympart";
+ else if (Sec.Type == ELF::SHT_LLVM_BB_ADDR_MAP)
+ OS << "llvm_bb_addr_map";
+ else if (Sec.Type == ELF::SHT_LLVM_OFFLOADING)
+ OS << "llvm_offloading";
+ else if (Sec.Type == ELF::SHT_LLVM_LTO)
+ OS << "llvm_lto";
+ else if (Sec.Type == ELF::SHT_LLVM_JT_SIZES)
+ OS << "llvm_jt_sizes";
+ else if (Sec.Type == ELF::SHT_LLVM_CFI_JUMP_TABLE)
+ OS << "llvm_cfi_jump_table";
+ else
+ OS << "0x" << Twine::utohexstr(Sec.Type);
+
+ if (Sec.EntrySize) {
+ assert((Sec.Flags & ELF::SHF_MERGE) ||
+ Sec.Type == ELF::SHT_LLVM_CFI_JUMP_TABLE);
+ OS << "," << Sec.EntrySize;
+ }
+
+ if (Sec.Flags & ELF::SHF_LINK_ORDER) {
+ OS << ",";
+ if (Sec.LinkedToSym)
+ printName(OS, Sec.LinkedToSym->getName());
+ else
+ OS << '0';
+ }
+
+ if (Sec.Flags & ELF::SHF_GROUP) {
+ OS << ",";
+ printName(OS, Sec.Group.getPointer()->getName());
+ if (Sec.isComdat())
+ OS << ",comdat";
+ }
+
+ if (Sec.isUnique())
+ OS << ",unique," << Sec.UniqueID;
+
+ OS << '\n';
+
+ if (Subsection) {
+ OS << "\t.subsection\t" << Subsection;
+ OS << '\n';
+ }
+}
diff --git a/llvm/lib/MC/MCAsmInfoGOFF.cpp b/llvm/lib/MC/MCAsmInfoGOFF.cpp
index 3c81a46..0a5d1927 100644
--- a/llvm/lib/MC/MCAsmInfoGOFF.cpp
+++ b/llvm/lib/MC/MCAsmInfoGOFF.cpp
@@ -13,11 +13,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfoGOFF.h"
+#include "llvm/BinaryFormat/GOFF.h"
+#include "llvm/MC/MCSectionGOFF.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-void MCAsmInfoGOFF::anchor() {}
-
MCAsmInfoGOFF::MCAsmInfoGOFF() {
Data64bitsDirective = "\t.quad\t";
HasDotTypeDotSizeDirective = false;
@@ -25,3 +26,136 @@ MCAsmInfoGOFF::MCAsmInfoGOFF() {
PrivateLabelPrefix = "L#";
ZeroDirective = "\t.space\t";
}
+
+static void emitCATTR(raw_ostream &OS, StringRef Name, GOFF::ESDRmode Rmode,
+ GOFF::ESDAlignment Alignment,
+ GOFF::ESDLoadingBehavior LoadBehavior,
+ GOFF::ESDExecutable Executable, bool IsReadOnly,
+ uint32_t SortKey, uint8_t FillByteValue,
+ StringRef PartName) {
+ OS << Name << " CATTR ";
+ OS << "ALIGN(" << static_cast<unsigned>(Alignment) << "),"
+ << "FILL(" << static_cast<unsigned>(FillByteValue) << ")";
+ switch (LoadBehavior) {
+ case GOFF::ESD_LB_Deferred:
+ OS << ",DEFLOAD";
+ break;
+ case GOFF::ESD_LB_NoLoad:
+ OS << ",NOLOAD";
+ break;
+ default:
+ break;
+ }
+ switch (Executable) {
+ case GOFF::ESD_EXE_CODE:
+ OS << ",EXECUTABLE";
+ break;
+ case GOFF::ESD_EXE_DATA:
+ OS << ",NOTEXECUTABLE";
+ break;
+ default:
+ break;
+ }
+ if (IsReadOnly)
+ OS << ",READONLY";
+ if (Rmode != GOFF::ESD_RMODE_None) {
+ OS << ',';
+ OS << "RMODE(";
+ switch (Rmode) {
+ case GOFF::ESD_RMODE_24:
+ OS << "24";
+ break;
+ case GOFF::ESD_RMODE_31:
+ OS << "31";
+ break;
+ case GOFF::ESD_RMODE_64:
+ OS << "64";
+ break;
+ case GOFF::ESD_RMODE_None:
+ break;
+ }
+ OS << ')';
+ }
+ if (SortKey)
+ OS << ",PRIORITY(" << SortKey << ")";
+ if (!PartName.empty())
+ OS << ",PART(" << PartName << ")";
+ OS << '\n';
+}
+
+static void emitXATTR(raw_ostream &OS, StringRef Name,
+ GOFF::ESDLinkageType Linkage,
+ GOFF::ESDExecutable Executable,
+ GOFF::ESDBindingScope BindingScope) {
+ OS << Name << " XATTR ";
+ OS << "LINKAGE(" << (Linkage == GOFF::ESD_LT_OS ? "OS" : "XPLINK") << "),";
+ if (Executable != GOFF::ESD_EXE_Unspecified)
+ OS << "REFERENCE(" << (Executable == GOFF::ESD_EXE_CODE ? "CODE" : "DATA")
+ << "),";
+ if (BindingScope != GOFF::ESD_BSC_Unspecified) {
+ OS << "SCOPE(";
+ switch (BindingScope) {
+ case GOFF::ESD_BSC_Section:
+ OS << "SECTION";
+ break;
+ case GOFF::ESD_BSC_Module:
+ OS << "MODULE";
+ break;
+ case GOFF::ESD_BSC_Library:
+ OS << "LIBRARY";
+ break;
+ case GOFF::ESD_BSC_ImportExport:
+ OS << "EXPORT";
+ break;
+ default:
+ break;
+ }
+ OS << ')';
+ }
+ OS << '\n';
+}
+
+void MCAsmInfoGOFF::printSwitchToSection(const MCSection &Section,
+ uint32_t Subsection, const Triple &T,
+ raw_ostream &OS) const {
+ auto &Sec =
+ const_cast<MCSectionGOFF &>(static_cast<const MCSectionGOFF &>(Section));
+ switch (Sec.SymbolType) {
+ case GOFF::ESD_ST_SectionDefinition: {
+ OS << Sec.getName() << " CSECT\n";
+ Sec.Emitted = true;
+ break;
+ }
+ case GOFF::ESD_ST_ElementDefinition: {
+ printSwitchToSection(*Sec.getParent(), Subsection, T, OS);
+ if (!Sec.Emitted) {
+ emitCATTR(OS, Sec.getName(), Sec.EDAttributes.Rmode,
+ Sec.EDAttributes.Alignment, Sec.EDAttributes.LoadBehavior,
+ GOFF::ESD_EXE_Unspecified, Sec.EDAttributes.IsReadOnly, 0,
+ Sec.EDAttributes.FillByteValue, StringRef());
+ Sec.Emitted = true;
+ } else
+ OS << Sec.getName() << " CATTR\n";
+ break;
+ }
+ case GOFF::ESD_ST_PartReference: {
+ MCSectionGOFF *ED = Sec.getParent();
+ printSwitchToSection(*ED->getParent(), Subsection, T, OS);
+ if (!Sec.Emitted) {
+ emitCATTR(OS, ED->getName(), ED->getEDAttributes().Rmode,
+ ED->EDAttributes.Alignment, ED->EDAttributes.LoadBehavior,
+ Sec.PRAttributes.Executable, ED->EDAttributes.IsReadOnly,
+ Sec.PRAttributes.SortKey, ED->EDAttributes.FillByteValue,
+ Sec.getName());
+ emitXATTR(OS, Sec.getName(), Sec.PRAttributes.Linkage,
+ Sec.PRAttributes.Executable, Sec.PRAttributes.BindingScope);
+ ED->Emitted = true;
+ Sec.Emitted = true;
+ } else
+ OS << ED->getName() << " CATTR PART(" << Sec.getName() << ")\n";
+ break;
+ }
+ default:
+ llvm_unreachable("Wrong section type");
+ }
+}
diff --git a/llvm/lib/MC/MCAsmInfoWasm.cpp b/llvm/lib/MC/MCAsmInfoWasm.cpp
index ce6ec7e..5e44f48 100644
--- a/llvm/lib/MC/MCAsmInfoWasm.cpp
+++ b/llvm/lib/MC/MCAsmInfoWasm.cpp
@@ -12,9 +12,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfoWasm.h"
-using namespace llvm;
+#include "llvm/MC/MCSectionWasm.h"
+#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/Support/raw_ostream.h"
-void MCAsmInfoWasm::anchor() {}
+using namespace llvm;
MCAsmInfoWasm::MCAsmInfoWasm() {
HasIdentDirective = true;
@@ -23,3 +25,80 @@ MCAsmInfoWasm::MCAsmInfoWasm() {
PrivateGlobalPrefix = ".L";
PrivateLabelPrefix = ".L";
}
+
+static void printName(raw_ostream &OS, StringRef Name) {
+ if (Name.find_first_not_of("0123456789_."
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == Name.npos) {
+ OS << Name;
+ return;
+ }
+ OS << '"';
+ for (const char *B = Name.begin(), *E = Name.end(); B < E; ++B) {
+ if (*B == '"') // Unquoted "
+ OS << "\\\"";
+ else if (*B != '\\') // Neither " or backslash
+ OS << *B;
+ else if (B + 1 == E) // Trailing backslash
+ OS << "\\\\";
+ else {
+ OS << B[0] << B[1]; // Quoted character
+ ++B;
+ }
+ }
+ OS << '"';
+}
+
+void MCAsmInfoWasm::printSwitchToSection(const MCSection &Section,
+ uint32_t Subsection, const Triple &T,
+ raw_ostream &OS) const {
+ auto &Sec = static_cast<const MCSectionWasm &>(Section);
+ if (shouldOmitSectionDirective(Sec.getName())) {
+ OS << '\t' << Sec.getName();
+ if (Subsection)
+ OS << '\t' << Subsection;
+ OS << '\n';
+ return;
+ }
+
+ OS << "\t.section\t";
+ printName(OS, Sec.getName());
+ OS << ",\"";
+
+ if (Sec.IsPassive)
+ OS << 'p';
+ if (Sec.Group)
+ OS << 'G';
+ if (Sec.SegmentFlags & wasm::WASM_SEG_FLAG_STRINGS)
+ OS << 'S';
+ if (Sec.SegmentFlags & wasm::WASM_SEG_FLAG_TLS)
+ OS << 'T';
+ if (Sec.SegmentFlags & wasm::WASM_SEG_FLAG_RETAIN)
+ OS << 'R';
+
+ OS << '"';
+
+ OS << ',';
+
+ // If comment string is '@', e.g. as on ARM - use '%' instead
+ if (getCommentString()[0] == '@')
+ OS << '%';
+ else
+ OS << '@';
+
+ // TODO: Print section type.
+
+ if (Sec.Group) {
+ OS << ",";
+ printName(OS, Sec.Group->getName());
+ OS << ",comdat";
+ }
+
+ if (Sec.isUnique())
+ OS << ",unique," << Sec.UniqueID;
+
+ OS << '\n';
+
+ if (Subsection)
+ OS << "\t.subsection\t" << Subsection << '\n';
+}
diff --git a/llvm/lib/MC/MCAsmInfoXCOFF.cpp b/llvm/lib/MC/MCAsmInfoXCOFF.cpp
index 6ef11ba..0403b44 100644
--- a/llvm/lib/MC/MCAsmInfoXCOFF.cpp
+++ b/llvm/lib/MC/MCAsmInfoXCOFF.cpp
@@ -8,7 +8,11 @@
#include "llvm/MC/MCAsmInfoXCOFF.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -16,8 +20,6 @@ namespace llvm {
extern cl::opt<cl::boolOrDefault> UseLEB128Directives;
}
-void MCAsmInfoXCOFF::anchor() {}
-
MCAsmInfoXCOFF::MCAsmInfoXCOFF() {
IsAIX = true;
IsLittleEndian = false;
@@ -56,3 +58,121 @@ bool MCAsmInfoXCOFF::isAcceptableChar(char C) const {
// any combination of these.
return isAlnum(C) || C == '_' || C == '.';
}
+
+bool MCAsmInfoXCOFF::useCodeAlign(const MCSection &Sec) const {
+ return static_cast<const MCSectionXCOFF &>(Sec).getKind().isText();
+}
+
+MCSectionXCOFF::~MCSectionXCOFF() = default;
+
+void MCSectionXCOFF::printCsectDirective(raw_ostream &OS) const {
+ OS << "\t.csect " << QualName->getName() << "," << Log2(getAlign()) << '\n';
+}
+
+void MCAsmInfoXCOFF::printSwitchToSection(const MCSection &Section, uint32_t,
+ const Triple &T,
+ raw_ostream &OS) const {
+ auto &Sec = static_cast<const MCSectionXCOFF &>(Section);
+ if (Sec.getKind().isText()) {
+ if (Sec.getMappingClass() != XCOFF::XMC_PR)
+ report_fatal_error("Unhandled storage-mapping class for .text csect");
+
+ Sec.printCsectDirective(OS);
+ return;
+ }
+
+ if (Sec.getKind().isReadOnly()) {
+ if (Sec.getMappingClass() != XCOFF::XMC_RO &&
+ Sec.getMappingClass() != XCOFF::XMC_TD)
+ report_fatal_error("Unhandled storage-mapping class for .rodata csect.");
+ Sec.printCsectDirective(OS);
+ return;
+ }
+
+ if (Sec.getKind().isReadOnlyWithRel()) {
+ if (Sec.getMappingClass() != XCOFF::XMC_RW &&
+ Sec.getMappingClass() != XCOFF::XMC_RO &&
+ Sec.getMappingClass() != XCOFF::XMC_TD)
+ report_fatal_error(
+ "Unexepected storage-mapping class for ReadOnlyWithRel kind");
+ Sec.printCsectDirective(OS);
+ return;
+ }
+
+ // Initialized TLS data.
+ if (Sec.getKind().isThreadData()) {
+ // We only expect XMC_TL here for initialized TLS data.
+ if (Sec.getMappingClass() != XCOFF::XMC_TL)
+ report_fatal_error("Unhandled storage-mapping class for .tdata csect.");
+ Sec.printCsectDirective(OS);
+ return;
+ }
+
+ if (Sec.getKind().isData()) {
+ switch (Sec.getMappingClass()) {
+ case XCOFF::XMC_RW:
+ case XCOFF::XMC_DS:
+ case XCOFF::XMC_TD:
+ Sec.printCsectDirective(OS);
+ break;
+ case XCOFF::XMC_TC:
+ case XCOFF::XMC_TE:
+ break;
+ case XCOFF::XMC_TC0:
+ OS << "\t.toc\n";
+ break;
+ default:
+ report_fatal_error("Unhandled storage-mapping class for .data csect.");
+ }
+ return;
+ }
+
+ if (Sec.isCsect() && Sec.getMappingClass() == XCOFF::XMC_TD) {
+ // Common csect type (uninitialized storage) does not have to print
+ // csect directive for section switching unless it is local.
+ if (Sec.getKind().isCommon() && !Sec.getKind().isBSSLocal())
+ return;
+
+ assert(Sec.getKind().isBSS() && "Unexpected section kind for toc-data");
+ Sec.printCsectDirective(OS);
+ return;
+ }
+ // Common csect type (uninitialized storage) does not have to print csect
+ // directive for section switching.
+ if (Sec.isCsect() && Sec.getCSectType() == XCOFF::XTY_CM) {
+ assert((Sec.getMappingClass() == XCOFF::XMC_RW ||
+ Sec.getMappingClass() == XCOFF::XMC_BS ||
+ Sec.getMappingClass() == XCOFF::XMC_UL) &&
+ "Generated a storage-mapping class for a common/bss/tbss csect we "
+ "don't "
+ "understand how to switch to.");
+ // Common symbols and local zero-initialized symbols for TLS and Non-TLS are
+ // eligible for .bss/.tbss csect, getKind().isThreadBSS() is used to
+ // cover TLS common and zero-initialized local symbols since linkage type
+ // (in the GlobalVariable) is not accessible in this class.
+ assert((Sec.getKind().isBSSLocal() || Sec.getKind().isCommon() ||
+ Sec.getKind().isThreadBSS()) &&
+ "wrong symbol type for .bss/.tbss csect");
+ // Don't have to print a directive for switching to section for commons
+ // and zero-initialized TLS data. The '.comm' and '.lcomm' directives of the
+ // variable will create the needed csect.
+ return;
+ }
+
+ // Zero-initialized TLS data with weak or external linkage are not eligible to
+ // be put into common csect.
+ if (Sec.getKind().isThreadBSS()) {
+ Sec.printCsectDirective(OS);
+ return;
+ }
+
+ // XCOFF debug sections.
+ if (Sec.getKind().isMetadata() && Sec.isDwarfSect()) {
+ OS << "\n\t.dwsect " << format("0x%" PRIx32, *Sec.getDwarfSubtypeFlags())
+ << '\n';
+ OS << Sec.getName() << ':' << '\n';
+ return;
+ }
+
+ report_fatal_error("Printing for this SectionKind is unimplemented.");
+}
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 7119ef4..da51da4 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -532,8 +532,8 @@ void MCAsmStreamer::switchSection(MCSection *Section, uint32_t Subsection) {
if (MCTargetStreamer *TS = getTargetStreamer()) {
TS->changeSection(Cur.first, Section, Subsection, OS);
} else {
- Section->printSwitchToSection(*MAI, getContext().getTargetTriple(), OS,
- Subsection);
+ MAI->printSwitchToSection(*Section, Subsection,
+ getContext().getTargetTriple(), OS);
}
}
MCStreamer::switchSection(Section, Subsection);
@@ -543,7 +543,7 @@ bool MCAsmStreamer::popSection() {
if (!MCStreamer::popSection())
return false;
auto [Sec, Subsec] = getCurrentSection();
- Sec->printSwitchToSection(*MAI, getContext().getTargetTriple(), OS, Subsec);
+ MAI->printSwitchToSection(*Sec, Subsec, getContext().getTargetTriple(), OS);
return true;
}
@@ -1105,7 +1105,7 @@ void MCAsmStreamer::emitZerofill(MCSection *Section, MCSymbol *Symbol,
// Note: a .zerofill directive does not switch sections.
OS << ".zerofill ";
- assert(Section->getVariant() == MCSection::SV_MachO &&
+ assert(getContext().getObjectFileType() == MCContext::IsMachO &&
".zerofill is a Mach-O specific directive");
// This is a mach-o specific directive.
@@ -1130,7 +1130,7 @@ void MCAsmStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
// Instead of using the Section we'll just use the shortcut.
- assert(Section->getVariant() == MCSection::SV_MachO &&
+ assert(getContext().getObjectFileType() == MCContext::IsMachO &&
".zerofill is a Mach-O specific directive");
// This is a mach-o specific directive and section.
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index c0448ae..39bf628 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -767,8 +767,8 @@ MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
if (Selection != COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE &&
COMDATSymbol->isDefined() &&
(!COMDATSymbol->isInSection() ||
- cast<MCSectionCOFF>(COMDATSymbol->getSection()).getCOMDATSymbol() !=
- COMDATSymbol))
+ static_cast<const MCSectionCOFF &>(COMDATSymbol->getSection())
+ .getCOMDATSymbol() != COMDATSymbol))
reportError(SMLoc(), "invalid symbol redefinition");
}
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index dbb2fd1..c24c82d 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -346,17 +346,16 @@ static void attemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
Displacement *= -1;
}
- // Track whether B is before a relaxable instruction and whether A is after
- // a relaxable instruction. If SA and SB are separated by a linker-relaxable
- // instruction, the difference cannot be resolved as it may be changed by
- // the linker.
+ // Track whether B is before a relaxable instruction/alignment and whether A
+ // is after a relaxable instruction/alignment. If SA and SB are separated by
+ // a linker-relaxable instruction/alignment, the difference cannot be
+ // resolved as it may be changed by the linker.
bool BBeforeRelax = false, AAfterRelax = false;
for (auto F = FB; F; F = F->getNext()) {
- auto DF = F->getKind() == MCFragment::FT_Data ? F : nullptr;
- if (DF && DF->isLinkerRelaxable()) {
- if (&*F != FB || SBOffset != DF->getContents().size())
+ if (F && F->isLinkerRelaxable()) {
+ if (&*F != FB || SBOffset != F->getSize())
BBeforeRelax = true;
- if (&*F != FA || SAOffset == DF->getContents().size())
+ if (&*F != FA || SAOffset == F->getSize())
AAfterRelax = true;
if (BBeforeRelax && AAfterRelax)
return;
@@ -370,17 +369,15 @@ static void attemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
}
int64_t Num;
- if (DF) {
- Displacement += DF->getContents().size();
- } else if (F->getKind() == MCFragment::FT_Relaxable &&
+ if (F->getKind() == MCFragment::FT_Data) {
+ Displacement += F->getFixedSize();
+ } else if ((F->getKind() == MCFragment::FT_Relaxable ||
+ F->getKind() == MCFragment::FT_Align) &&
Asm->hasFinalLayout()) {
// Before finishLayout, a relaxable fragment's size is indeterminate.
// After layout, during relocation generation, it can be treated as a
// data fragment.
Displacement += F->getSize();
- } else if (F->getKind() == MCFragment::FT_Align && Layout &&
- F->isLinkerRelaxable()) {
- Displacement += Asm->computeFragmentSize(*F);
} else if (auto *FF = dyn_cast<MCFillFragment>(F);
FF && FF->getNumValues().evaluateAsAbsolute(Num)) {
Displacement += Num * FF->getValueSize();
diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp
index 3c395e5..6cbdf74 100644
--- a/llvm/lib/MC/MCFragment.cpp
+++ b/llvm/lib/MC/MCFragment.cpp
@@ -35,7 +35,7 @@ MCFragment::MCFragment(FragmentType Kind, bool HasInstructions)
}
const MCSymbol *MCFragment::getAtom() const {
- return cast<MCSectionMachO>(Parent)->getAtom(LayoutOrder);
+ return static_cast<const MCSectionMachO *>(Parent)->getAtom(LayoutOrder);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp
index 4934815..1074669 100644
--- a/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/llvm/lib/MC/MCMachOStreamer.cpp
@@ -443,13 +443,13 @@ void MCMachOStreamer::finishImpl() {
// Set the fragment atom associations by tracking the last seen atom defining
// symbol.
for (MCSection &Sec : getAssembler()) {
- cast<MCSectionMachO>(Sec).allocAtoms();
+ static_cast<MCSectionMachO &>(Sec).allocAtoms();
const MCSymbol *CurrentAtom = nullptr;
size_t I = 0;
for (MCFragment &Frag : Sec) {
if (const MCSymbol *Symbol = DefiningSymbolMap.lookup(&Frag))
CurrentAtom = Symbol;
- cast<MCSectionMachO>(Sec).setAtom(I++, CurrentAtom);
+ static_cast<MCSectionMachO &>(Sec).setAtom(I++, CurrentAtom);
}
}
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index 37b6629..9c7b05b 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -280,11 +280,6 @@ void MCObjectStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
}
}
-void MCObjectStreamer::switchSectionNoPrint(MCSection *Section) {
- MCStreamer::switchSectionNoPrint(Section);
- changeSection(Section, 0);
-}
-
void MCObjectStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
getAssembler().registerSymbol(*Symbol);
MCStreamer::emitAssignment(Symbol, Value);
@@ -345,31 +340,33 @@ void MCObjectStreamer::emitInstToData(const MCInst &Inst,
MCFragment *F = getCurrentFragment();
// Append the instruction to the data fragment.
- size_t FixupStartIndex = F->getFixups().size();
size_t CodeOffset = F->getContents().size();
SmallVector<MCFixup, 1> Fixups;
getAssembler().getEmitter().encodeInstruction(
Inst, F->getContentsForAppending(), Fixups, STI);
F->doneAppending();
- if (!Fixups.empty())
- F->appendFixups(Fixups);
F->setHasInstructions(STI);
+ if (Fixups.empty())
+ return;
bool MarkedLinkerRelaxable = false;
- for (auto &Fixup : MutableArrayRef(F->getFixups()).slice(FixupStartIndex)) {
+ for (auto &Fixup : Fixups) {
Fixup.setOffset(Fixup.getOffset() + CodeOffset);
- if (!Fixup.isLinkerRelaxable())
+ if (!Fixup.isLinkerRelaxable() || MarkedLinkerRelaxable)
continue;
- F->setLinkerRelaxable();
+ MarkedLinkerRelaxable = true;
+ // Set the fragment's order within the subsection for use by
+ // MCAssembler::relaxAlign.
+ auto *Sec = F->getParent();
+ if (!Sec->isLinkerRelaxable())
+ Sec->setLinkerRelaxable();
// Do not add data after a linker-relaxable instruction. The difference
// between a new label and a label at or before the linker-relaxable
// instruction cannot be resolved at assemble-time.
- if (!MarkedLinkerRelaxable) {
- MarkedLinkerRelaxable = true;
- getCurrentSectionOnly()->setLinkerRelaxable();
- newFragment();
- }
+ F->setLinkerRelaxable();
+ newFragment();
}
+ F->appendFixups(Fixups);
}
void MCObjectStreamer::emitInstToFragment(const MCInst &Inst,
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index eda5e8c..9f64a98 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -3413,7 +3413,7 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, uint8_t ValueSize) {
// Check whether we should use optimal code alignment for this .align
// directive.
- if (Section->useCodeAlign() && !HasFillExpr) {
+ if (MAI.useCodeAlign(*Section) && !HasFillExpr) {
getStreamer().emitCodeAlignment(
Align(Alignment), &getTargetParser().getSTI(), MaxBytesToFill);
} else {
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index c7c3df3..2e251cc 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -644,8 +644,8 @@ EndStmt:
}
if (UseLastGroup) {
- if (const MCSectionELF *Section =
- cast_or_null<MCSectionELF>(getStreamer().getCurrentSectionOnly()))
+ if (auto *Section = static_cast<const MCSectionELF *>(
+ getStreamer().getCurrentSectionOnly()))
if (const MCSymbol *Group = Section->getGroup()) {
GroupName = Group->getName();
IsComdat = Section->isComdat();
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index f4684e6..780289e 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -4228,8 +4228,7 @@ bool MasmParser::emitAlignTo(int64_t Alignment) {
// Check whether we should use optimal code alignment for this align
// directive.
const MCSection *Section = getStreamer().getCurrentSectionOnly();
- assert(Section && "must have section to emit alignment");
- if (Section->useCodeAlign()) {
+ if (MAI.useCodeAlign(*Section)) {
getStreamer().emitCodeAlignment(Align(Alignment),
&getTargetParser().getSTI(),
/*MaxBytesToEmit=*/0);
diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
index 1f824b8..d97f4f5 100644
--- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
@@ -252,7 +252,7 @@ public:
if (TypeName == "function") {
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
auto *Current =
- cast<MCSectionWasm>(getStreamer().getCurrentSectionOnly());
+ static_cast<MCSectionWasm *>(getStreamer().getCurrentSectionOnly());
if (Current->getGroup())
WasmSym->setComdat(true);
} else if (TypeName == "global")
diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp
index e738a22..4f28267 100644
--- a/llvm/lib/MC/MCSection.cpp
+++ b/llvm/lib/MC/MCSection.cpp
@@ -18,10 +18,9 @@
using namespace llvm;
-MCSection::MCSection(SectionVariant V, StringRef Name, bool IsText, bool IsBss,
- MCSymbol *Begin)
+MCSection::MCSection(StringRef Name, bool IsText, bool IsBss, MCSymbol *Begin)
: Begin(Begin), HasInstructions(false), IsRegistered(false), IsText(IsText),
- IsBss(IsBss), LinkerRelaxable(false), Name(Name), Variant(V) {
+ IsBss(IsBss), LinkerRelaxable(false), Name(Name) {
DummyFragment.setParent(this);
}
diff --git a/llvm/lib/MC/MCSectionCOFF.cpp b/llvm/lib/MC/MCSectionCOFF.cpp
deleted file mode 100644
index 5bf1473..0000000
--- a/llvm/lib/MC/MCSectionCOFF.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-//===- lib/MC/MCSectionCOFF.cpp - COFF Code Section Representation --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCSectionCOFF.h"
-#include "llvm/BinaryFormat/COFF.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-
-using namespace llvm;
-
-// shouldOmitSectionDirective - Decides whether a '.section' directive
-// should be printed before the section name
-bool MCSectionCOFF::shouldOmitSectionDirective(StringRef Name,
- const MCAsmInfo &MAI) const {
- if (COMDATSymbol || isUnique())
- return false;
-
- // FIXME: Does .section .bss/.data/.text work everywhere??
- if (Name == ".text" || Name == ".data" || Name == ".bss")
- return true;
-
- return false;
-}
-
-void MCSectionCOFF::setSelection(int Selection) const {
- assert(Selection != 0 && "invalid COMDAT selection type");
- this->Selection = Selection;
- Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
-}
-
-void MCSectionCOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
- raw_ostream &OS,
- uint32_t Subsection) const {
- // standard sections don't require the '.section'
- if (shouldOmitSectionDirective(getName(), MAI)) {
- OS << '\t' << getName() << '\n';
- return;
- }
-
- OS << "\t.section\t" << getName() << ",\"";
- if (getCharacteristics() & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
- OS << 'd';
- if (getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
- OS << 'b';
- if (getCharacteristics() & COFF::IMAGE_SCN_MEM_EXECUTE)
- OS << 'x';
- if (getCharacteristics() & COFF::IMAGE_SCN_MEM_WRITE)
- OS << 'w';
- else if (getCharacteristics() & COFF::IMAGE_SCN_MEM_READ)
- OS << 'r';
- else
- OS << 'y';
- if (getCharacteristics() & COFF::IMAGE_SCN_LNK_REMOVE)
- OS << 'n';
- if (getCharacteristics() & COFF::IMAGE_SCN_MEM_SHARED)
- OS << 's';
- if ((getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE) &&
- !isImplicitlyDiscardable(getName()))
- OS << 'D';
- if (getCharacteristics() & COFF::IMAGE_SCN_LNK_INFO)
- OS << 'i';
- OS << '"';
-
- // unique should be tail of .section directive.
- if (isUnique() && !COMDATSymbol)
- OS << ",unique," << UniqueID;
-
- if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) {
- if (COMDATSymbol)
- OS << ",";
- else
- OS << "\n\t.linkonce\t";
- switch (Selection) {
- case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES:
- OS << "one_only";
- break;
- case COFF::IMAGE_COMDAT_SELECT_ANY:
- OS << "discard";
- break;
- case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE:
- OS << "same_size";
- break;
- case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH:
- OS << "same_contents";
- break;
- case COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE:
- OS << "associative";
- break;
- case COFF::IMAGE_COMDAT_SELECT_LARGEST:
- OS << "largest";
- break;
- case COFF::IMAGE_COMDAT_SELECT_NEWEST:
- OS << "newest";
- break;
- default:
- assert(false && "unsupported COFF selection type");
- break;
- }
- if (COMDATSymbol) {
- OS << ",";
- COMDATSymbol->print(OS, &MAI);
- }
- }
-
- if (isUnique() && COMDATSymbol)
- OS << ",unique," << UniqueID;
-
- OS << '\n';
-}
-
-bool MCSectionCOFF::useCodeAlign() const { return isText(); }
diff --git a/llvm/lib/MC/MCSectionDXContainer.cpp b/llvm/lib/MC/MCSectionDXContainer.cpp
deleted file mode 100644
index 7eee59d..0000000
--- a/llvm/lib/MC/MCSectionDXContainer.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-//===- lib/MC/MCSectionDXContainer.cpp - DXContainer Section --------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCSectionDXContainer.h"
-
-using namespace llvm;
-
-void MCSectionDXContainer::printSwitchToSection(const MCAsmInfo &,
- const Triple &, raw_ostream &,
- uint32_t) const {}
diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp
deleted file mode 100644
index ef33f9c..0000000
--- a/llvm/lib/MC/MCSectionELF.cpp
+++ /dev/null
@@ -1,217 +0,0 @@
-//===- lib/MC/MCSectionELF.cpp - ELF Code Section Representation ----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/TargetParser/Triple.h"
-#include <cassert>
-
-using namespace llvm;
-
-// Decides whether a '.section' directive
-// should be printed before the section name.
-bool MCSectionELF::shouldOmitSectionDirective(StringRef Name,
- const MCAsmInfo &MAI) const {
- if (isUnique())
- return false;
-
- return MAI.shouldOmitSectionDirective(Name);
-}
-
-static void printName(raw_ostream &OS, StringRef Name) {
- if (Name.find_first_not_of("0123456789_."
- "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == Name.npos) {
- OS << Name;
- return;
- }
- OS << '"';
- for (const char *B = Name.begin(), *E = Name.end(); B < E; ++B) {
- if (*B == '"') // Unquoted "
- OS << "\\\"";
- else if (*B != '\\') // Neither " or backslash
- OS << *B;
- else if (B + 1 == E) // Trailing backslash
- OS << "\\\\";
- else {
- OS << B[0] << B[1]; // Quoted character
- ++B;
- }
- }
- OS << '"';
-}
-
-void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
- raw_ostream &OS,
- uint32_t Subsection) const {
- if (shouldOmitSectionDirective(getName(), MAI)) {
- OS << '\t' << getName();
- if (Subsection)
- OS << '\t' << Subsection;
- OS << '\n';
- return;
- }
-
- OS << "\t.section\t";
- printName(OS, getName());
-
- // Handle the weird solaris syntax if desired.
- if (MAI.usesSunStyleELFSectionSwitchSyntax() &&
- !(Flags & ELF::SHF_MERGE)) {
- if (Flags & ELF::SHF_ALLOC)
- OS << ",#alloc";
- if (Flags & ELF::SHF_EXECINSTR)
- OS << ",#execinstr";
- if (Flags & ELF::SHF_WRITE)
- OS << ",#write";
- if (Flags & ELF::SHF_EXCLUDE)
- OS << ",#exclude";
- if (Flags & ELF::SHF_TLS)
- OS << ",#tls";
- OS << '\n';
- return;
- }
-
- OS << ",\"";
- if (Flags & ELF::SHF_ALLOC)
- OS << 'a';
- if (Flags & ELF::SHF_EXCLUDE)
- OS << 'e';
- if (Flags & ELF::SHF_EXECINSTR)
- OS << 'x';
- if (Flags & ELF::SHF_WRITE)
- OS << 'w';
- if (Flags & ELF::SHF_MERGE)
- OS << 'M';
- if (Flags & ELF::SHF_STRINGS)
- OS << 'S';
- if (Flags & ELF::SHF_TLS)
- OS << 'T';
- if (Flags & ELF::SHF_LINK_ORDER)
- OS << 'o';
- if (Flags & ELF::SHF_GROUP)
- OS << 'G';
- if (Flags & ELF::SHF_GNU_RETAIN)
- OS << 'R';
-
- // If there are os-specific flags, print them.
- if (T.isOSSolaris())
- if (Flags & ELF::SHF_SUNW_NODISCARD)
- OS << 'R';
-
- // If there are target-specific flags, print them.
- Triple::ArchType Arch = T.getArch();
- if (Arch == Triple::xcore) {
- if (Flags & ELF::XCORE_SHF_CP_SECTION)
- OS << 'c';
- if (Flags & ELF::XCORE_SHF_DP_SECTION)
- OS << 'd';
- } else if (T.isARM() || T.isThumb()) {
- if (Flags & ELF::SHF_ARM_PURECODE)
- OS << 'y';
- } else if (T.isAArch64()) {
- if (Flags & ELF::SHF_AARCH64_PURECODE)
- OS << 'y';
- } else if (Arch == Triple::hexagon) {
- if (Flags & ELF::SHF_HEX_GPREL)
- OS << 's';
- } else if (Arch == Triple::x86_64) {
- if (Flags & ELF::SHF_X86_64_LARGE)
- OS << 'l';
- }
-
- OS << '"';
-
- OS << ',';
-
- // If comment string is '@', e.g. as on ARM - use '%' instead
- if (MAI.getCommentString()[0] == '@')
- OS << '%';
- else
- OS << '@';
-
- if (Type == ELF::SHT_INIT_ARRAY)
- OS << "init_array";
- else if (Type == ELF::SHT_FINI_ARRAY)
- OS << "fini_array";
- else if (Type == ELF::SHT_PREINIT_ARRAY)
- OS << "preinit_array";
- else if (Type == ELF::SHT_NOBITS)
- OS << "nobits";
- else if (Type == ELF::SHT_NOTE)
- OS << "note";
- else if (Type == ELF::SHT_PROGBITS)
- OS << "progbits";
- else if (Type == ELF::SHT_X86_64_UNWIND)
- OS << "unwind";
- else if (Type == ELF::SHT_MIPS_DWARF)
- // Print hex value of the flag while we do not have
- // any standard symbolic representation of the flag.
- OS << "0x7000001e";
- else if (Type == ELF::SHT_LLVM_ODRTAB)
- OS << "llvm_odrtab";
- else if (Type == ELF::SHT_LLVM_LINKER_OPTIONS)
- OS << "llvm_linker_options";
- else if (Type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE)
- OS << "llvm_call_graph_profile";
- else if (Type == ELF::SHT_LLVM_DEPENDENT_LIBRARIES)
- OS << "llvm_dependent_libraries";
- else if (Type == ELF::SHT_LLVM_SYMPART)
- OS << "llvm_sympart";
- else if (Type == ELF::SHT_LLVM_BB_ADDR_MAP)
- OS << "llvm_bb_addr_map";
- else if (Type == ELF::SHT_LLVM_OFFLOADING)
- OS << "llvm_offloading";
- else if (Type == ELF::SHT_LLVM_LTO)
- OS << "llvm_lto";
- else if (Type == ELF::SHT_LLVM_JT_SIZES)
- OS << "llvm_jt_sizes";
- else if (Type == ELF::SHT_LLVM_CFI_JUMP_TABLE)
- OS << "llvm_cfi_jump_table";
- else
- OS << "0x" << Twine::utohexstr(Type);
-
- if (EntrySize) {
- assert((Flags & ELF::SHF_MERGE) || Type == ELF::SHT_LLVM_CFI_JUMP_TABLE);
- OS << "," << EntrySize;
- }
-
- if (Flags & ELF::SHF_LINK_ORDER) {
- OS << ",";
- if (LinkedToSym)
- printName(OS, LinkedToSym->getName());
- else
- OS << '0';
- }
-
- if (Flags & ELF::SHF_GROUP) {
- OS << ",";
- printName(OS, Group.getPointer()->getName());
- if (isComdat())
- OS << ",comdat";
- }
-
- if (isUnique())
- OS << ",unique," << UniqueID;
-
- OS << '\n';
-
- if (Subsection) {
- OS << "\t.subsection\t" << Subsection;
- OS << '\n';
- }
-}
-
-bool MCSectionELF::useCodeAlign() const {
- return getFlags() & ELF::SHF_EXECINSTR;
-}
diff --git a/llvm/lib/MC/MCSectionGOFF.cpp b/llvm/lib/MC/MCSectionGOFF.cpp
deleted file mode 100644
index 8163e5b..0000000
--- a/llvm/lib/MC/MCSectionGOFF.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-//===- MCSectionGOFF.cpp - GOFF Code Section Representation ---------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCSectionGOFF.h"
-#include "llvm/BinaryFormat/GOFF.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-static void emitCATTR(raw_ostream &OS, StringRef Name, GOFF::ESDRmode Rmode,
- GOFF::ESDAlignment Alignment,
- GOFF::ESDLoadingBehavior LoadBehavior,
- GOFF::ESDExecutable Executable, bool IsReadOnly,
- uint32_t SortKey, uint8_t FillByteValue,
- StringRef PartName) {
- OS << Name << " CATTR ";
- OS << "ALIGN(" << static_cast<unsigned>(Alignment) << "),"
- << "FILL(" << static_cast<unsigned>(FillByteValue) << ")";
- switch (LoadBehavior) {
- case GOFF::ESD_LB_Deferred:
- OS << ",DEFLOAD";
- break;
- case GOFF::ESD_LB_NoLoad:
- OS << ",NOLOAD";
- break;
- default:
- break;
- }
- switch (Executable) {
- case GOFF::ESD_EXE_CODE:
- OS << ",EXECUTABLE";
- break;
- case GOFF::ESD_EXE_DATA:
- OS << ",NOTEXECUTABLE";
- break;
- default:
- break;
- }
- if (IsReadOnly)
- OS << ",READONLY";
- if (Rmode != GOFF::ESD_RMODE_None) {
- OS << ',';
- OS << "RMODE(";
- switch (Rmode) {
- case GOFF::ESD_RMODE_24:
- OS << "24";
- break;
- case GOFF::ESD_RMODE_31:
- OS << "31";
- break;
- case GOFF::ESD_RMODE_64:
- OS << "64";
- break;
- case GOFF::ESD_RMODE_None:
- break;
- }
- OS << ')';
- }
- if (SortKey)
- OS << ",PRIORITY(" << SortKey << ")";
- if (!PartName.empty())
- OS << ",PART(" << PartName << ")";
- OS << '\n';
-}
-
-static void emitXATTR(raw_ostream &OS, StringRef Name,
- GOFF::ESDLinkageType Linkage,
- GOFF::ESDExecutable Executable,
- GOFF::ESDBindingScope BindingScope) {
- OS << Name << " XATTR ";
- OS << "LINKAGE(" << (Linkage == GOFF::ESD_LT_OS ? "OS" : "XPLINK") << "),";
- if (Executable != GOFF::ESD_EXE_Unspecified)
- OS << "REFERENCE(" << (Executable == GOFF::ESD_EXE_CODE ? "CODE" : "DATA")
- << "),";
- if (BindingScope != GOFF::ESD_BSC_Unspecified) {
- OS << "SCOPE(";
- switch (BindingScope) {
- case GOFF::ESD_BSC_Section:
- OS << "SECTION";
- break;
- case GOFF::ESD_BSC_Module:
- OS << "MODULE";
- break;
- case GOFF::ESD_BSC_Library:
- OS << "LIBRARY";
- break;
- case GOFF::ESD_BSC_ImportExport:
- OS << "EXPORT";
- break;
- default:
- break;
- }
- OS << ')';
- }
- OS << '\n';
-}
-
-void MCSectionGOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
- raw_ostream &OS,
- uint32_t Subsection) const {
- switch (SymbolType) {
- case GOFF::ESD_ST_SectionDefinition: {
- OS << Name << " CSECT\n";
- Emitted = true;
- break;
- }
- case GOFF::ESD_ST_ElementDefinition: {
- getParent()->printSwitchToSection(MAI, T, OS, Subsection);
- if (!Emitted) {
- emitCATTR(OS, Name, EDAttributes.Rmode, EDAttributes.Alignment,
- EDAttributes.LoadBehavior, GOFF::ESD_EXE_Unspecified,
- EDAttributes.IsReadOnly, 0, EDAttributes.FillByteValue,
- StringRef());
- Emitted = true;
- } else
- OS << Name << " CATTR\n";
- break;
- }
- case GOFF::ESD_ST_PartReference: {
- MCSectionGOFF *ED = getParent();
- ED->getParent()->printSwitchToSection(MAI, T, OS, Subsection);
- if (!Emitted) {
- emitCATTR(OS, ED->getName(), ED->getEDAttributes().Rmode,
- ED->EDAttributes.Alignment, ED->EDAttributes.LoadBehavior,
- PRAttributes.Executable, ED->EDAttributes.IsReadOnly,
- PRAttributes.SortKey, ED->EDAttributes.FillByteValue, Name);
- emitXATTR(OS, Name, PRAttributes.Linkage, PRAttributes.Executable,
- PRAttributes.BindingScope);
- ED->Emitted = true;
- Emitted = true;
- } else
- OS << ED->getName() << " CATTR PART(" << Name << ")\n";
- break;
- }
- default:
- llvm_unreachable("Wrong section type");
- }
-} \ No newline at end of file
diff --git a/llvm/lib/MC/MCSectionMachO.cpp b/llvm/lib/MC/MCSectionMachO.cpp
index 67453ce..67c8235 100644
--- a/llvm/lib/MC/MCSectionMachO.cpp
+++ b/llvm/lib/MC/MCSectionMachO.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Support/raw_ostream.h"
@@ -92,7 +93,7 @@ ENTRY("" /*FIXME*/, S_ATTR_LOC_RELOC)
MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
unsigned TAA, unsigned reserved2, SectionKind K,
MCSymbol *Begin)
- : MCSection(SV_MachO, Section, K.isText(),
+ : MCSection(Section, K.isText(),
MachO::isVirtualSection(TAA & MachO::SECTION_TYPE), Begin),
TypeAndAttributes(TAA), Reserved2(reserved2) {
assert(Segment.size() <= 16 && Section.size() <= 16 &&
@@ -105,19 +106,20 @@ MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
}
}
-void MCSectionMachO::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
- raw_ostream &OS,
- uint32_t Subsection) const {
- OS << "\t.section\t" << getSegmentName() << ',' << getName();
+void MCAsmInfoDarwin::printSwitchToSection(const MCSection &Section, uint32_t,
+ const Triple &T,
+ raw_ostream &OS) const {
+ auto &Sec = static_cast<const MCSectionMachO &>(Section);
+ OS << "\t.section\t" << Sec.getSegmentName() << ',' << Sec.getName();
// Get the section type and attributes.
- unsigned TAA = getTypeAndAttributes();
+ unsigned TAA = Sec.getTypeAndAttributes();
if (TAA == 0) {
OS << '\n';
return;
}
- MachO::SectionType SectionType = getType();
+ MachO::SectionType SectionType = Sec.getType();
assert(SectionType <= MachO::LAST_KNOWN_SECTION_TYPE &&
"Invalid SectionType specified!");
@@ -135,8 +137,8 @@ void MCSectionMachO::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
if (SectionAttrs == 0) {
// If we have a S_SYMBOL_STUBS size specified, print it along with 'none' as
// the attribute specifier.
- if (Reserved2 != 0)
- OS << ",none," << Reserved2;
+ if (Sec.Reserved2 != 0)
+ OS << ",none," << Sec.Reserved2;
OS << '\n';
return;
}
@@ -164,15 +166,11 @@ void MCSectionMachO::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
assert(SectionAttrs == 0 && "Unknown section attributes!");
// If we have a S_SYMBOL_STUBS size specified, print it.
- if (Reserved2 != 0)
- OS << ',' << Reserved2;
+ if (Sec.Reserved2 != 0)
+ OS << ',' << Sec.Reserved2;
OS << '\n';
}
-bool MCSectionMachO::useCodeAlign() const {
- return hasAttribute(MachO::S_ATTR_PURE_INSTRUCTIONS);
-}
-
/// ParseSectionSpecifier - Parse the section specifier indicated by "Spec".
/// This is a string that can appear after a .section directive in a mach-o
/// flavored .s file. If successful, this fills in the specified Out
diff --git a/llvm/lib/MC/MCSectionWasm.cpp b/llvm/lib/MC/MCSectionWasm.cpp
deleted file mode 100644
index e25af1c..0000000
--- a/llvm/lib/MC/MCSectionWasm.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-//===- lib/MC/MCSectionWasm.cpp - Wasm Code Section Representation --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCSectionWasm.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSymbolWasm.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-// Decides whether a '.section' directive
-// should be printed before the section name.
-bool MCSectionWasm::shouldOmitSectionDirective(StringRef Name,
- const MCAsmInfo &MAI) const {
- return MAI.shouldOmitSectionDirective(Name);
-}
-
-static void printName(raw_ostream &OS, StringRef Name) {
- if (Name.find_first_not_of("0123456789_."
- "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == Name.npos) {
- OS << Name;
- return;
- }
- OS << '"';
- for (const char *B = Name.begin(), *E = Name.end(); B < E; ++B) {
- if (*B == '"') // Unquoted "
- OS << "\\\"";
- else if (*B != '\\') // Neither " or backslash
- OS << *B;
- else if (B + 1 == E) // Trailing backslash
- OS << "\\\\";
- else {
- OS << B[0] << B[1]; // Quoted character
- ++B;
- }
- }
- OS << '"';
-}
-
-void MCSectionWasm::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
- raw_ostream &OS,
- uint32_t Subsection) const {
-
- if (shouldOmitSectionDirective(getName(), MAI)) {
- OS << '\t' << getName();
- if (Subsection)
- OS << '\t' << Subsection;
- OS << '\n';
- return;
- }
-
- OS << "\t.section\t";
- printName(OS, getName());
- OS << ",\"";
-
- if (IsPassive)
- OS << 'p';
- if (Group)
- OS << 'G';
- if (SegmentFlags & wasm::WASM_SEG_FLAG_STRINGS)
- OS << 'S';
- if (SegmentFlags & wasm::WASM_SEG_FLAG_TLS)
- OS << 'T';
- if (SegmentFlags & wasm::WASM_SEG_FLAG_RETAIN)
- OS << 'R';
-
- OS << '"';
-
- OS << ',';
-
- // If comment string is '@', e.g. as on ARM - use '%' instead
- if (MAI.getCommentString()[0] == '@')
- OS << '%';
- else
- OS << '@';
-
- // TODO: Print section type.
-
- if (Group) {
- OS << ",";
- printName(OS, Group->getName());
- OS << ",comdat";
- }
-
- if (isUnique())
- OS << ",unique," << UniqueID;
-
- OS << '\n';
-
- if (Subsection)
- OS << "\t.subsection\t" << Subsection << '\n';
-}
-
-bool MCSectionWasm::useCodeAlign() const { return false; }
diff --git a/llvm/lib/MC/MCSectionXCOFF.cpp b/llvm/lib/MC/MCSectionXCOFF.cpp
deleted file mode 100644
index 41043b2..0000000
--- a/llvm/lib/MC/MCSectionXCOFF.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-//===- lib/MC/MCSectionXCOFF.cpp - XCOFF Code Section Representation ------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCSectionXCOFF.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-namespace llvm {
-class MCExpr;
-class Triple;
-} // namespace llvm
-
-using namespace llvm;
-
-MCSectionXCOFF::~MCSectionXCOFF() = default;
-
-void MCSectionXCOFF::printCsectDirective(raw_ostream &OS) const {
- OS << "\t.csect " << QualName->getName() << "," << Log2(getAlign()) << '\n';
-}
-
-void MCSectionXCOFF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
- raw_ostream &OS,
- uint32_t Subsection) const {
- if (getKind().isText()) {
- if (getMappingClass() != XCOFF::XMC_PR)
- report_fatal_error("Unhandled storage-mapping class for .text csect");
-
- printCsectDirective(OS);
- return;
- }
-
- if (getKind().isReadOnly()) {
- if (getMappingClass() != XCOFF::XMC_RO &&
- getMappingClass() != XCOFF::XMC_TD)
- report_fatal_error("Unhandled storage-mapping class for .rodata csect.");
- printCsectDirective(OS);
- return;
- }
-
- if (getKind().isReadOnlyWithRel()) {
- if (getMappingClass() != XCOFF::XMC_RW &&
- getMappingClass() != XCOFF::XMC_RO &&
- getMappingClass() != XCOFF::XMC_TD)
- report_fatal_error(
- "Unexepected storage-mapping class for ReadOnlyWithRel kind");
- printCsectDirective(OS);
- return;
- }
-
- // Initialized TLS data.
- if (getKind().isThreadData()) {
- // We only expect XMC_TL here for initialized TLS data.
- if (getMappingClass() != XCOFF::XMC_TL)
- report_fatal_error("Unhandled storage-mapping class for .tdata csect.");
- printCsectDirective(OS);
- return;
- }
-
- if (getKind().isData()) {
- switch (getMappingClass()) {
- case XCOFF::XMC_RW:
- case XCOFF::XMC_DS:
- case XCOFF::XMC_TD:
- printCsectDirective(OS);
- break;
- case XCOFF::XMC_TC:
- case XCOFF::XMC_TE:
- break;
- case XCOFF::XMC_TC0:
- OS << "\t.toc\n";
- break;
- default:
- report_fatal_error(
- "Unhandled storage-mapping class for .data csect.");
- }
- return;
- }
-
- if (isCsect() && getMappingClass() == XCOFF::XMC_TD) {
- // Common csect type (uninitialized storage) does not have to print csect
- // directive for section switching unless it is local.
- if (getKind().isCommon() && !getKind().isBSSLocal())
- return;
-
- assert(getKind().isBSS() && "Unexpected section kind for toc-data");
- printCsectDirective(OS);
- return;
- }
- // Common csect type (uninitialized storage) does not have to print csect
- // directive for section switching.
- if (isCsect() && getCSectType() == XCOFF::XTY_CM) {
- assert((getMappingClass() == XCOFF::XMC_RW ||
- getMappingClass() == XCOFF::XMC_BS ||
- getMappingClass() == XCOFF::XMC_UL) &&
- "Generated a storage-mapping class for a common/bss/tbss csect we "
- "don't "
- "understand how to switch to.");
- // Common symbols and local zero-initialized symbols for TLS and Non-TLS are
- // eligible for .bss/.tbss csect, getKind().isThreadBSS() is used to cover
- // TLS common and zero-initialized local symbols since linkage type (in the
- // GlobalVariable) is not accessible in this class.
- assert((getKind().isBSSLocal() || getKind().isCommon() ||
- getKind().isThreadBSS()) &&
- "wrong symbol type for .bss/.tbss csect");
- // Don't have to print a directive for switching to section for commons and
- // zero-initialized TLS data. The '.comm' and '.lcomm' directives of the
- // variable will create the needed csect.
- return;
- }
-
- // Zero-initialized TLS data with weak or external linkage are not eligible to
- // be put into common csect.
- if (getKind().isThreadBSS()) {
- printCsectDirective(OS);
- return;
- }
-
- // XCOFF debug sections.
- if (getKind().isMetadata() && isDwarfSect()) {
- OS << "\n\t.dwsect " << format("0x%" PRIx32, *getDwarfSubtypeFlags())
- << '\n';
- OS << getName() << ':' << '\n';
- return;
- }
-
- report_fatal_error("Printing for this SectionKind is unimplemented.");
-}
-
-bool MCSectionXCOFF::useCodeAlign() const { return getKind().isText(); }
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
index 899a7df..bc73981 100644
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -56,12 +56,11 @@ void MCTargetStreamer::finish() {}
void MCTargetStreamer::emitConstantPools() {}
-void MCTargetStreamer::changeSection(const MCSection *CurSection,
- MCSection *Section, uint32_t Subsection,
- raw_ostream &OS) {
- Section->printSwitchToSection(*Streamer.getContext().getAsmInfo(),
- Streamer.getContext().getTargetTriple(), OS,
- Subsection);
+void MCTargetStreamer::changeSection(const MCSection *, MCSection *Sec,
+ uint32_t Subsection, raw_ostream &OS) {
+ auto &MAI = *Streamer.getContext().getAsmInfo();
+ MAI.printSwitchToSection(*Sec, Subsection,
+ Streamer.getContext().getTargetTriple(), OS);
}
void MCTargetStreamer::emitDwarfFileDirective(StringRef Directive) {
@@ -838,8 +837,8 @@ static MCSection *getWinCFISection(MCContext &Context, unsigned *NextWinCFIID,
if (TextSec == Context.getObjectFileInfo()->getTextSection())
return MainCFISec;
- const auto *TextSecCOFF = cast<MCSectionCOFF>(TextSec);
- auto *MainCFISecCOFF = cast<MCSectionCOFF>(MainCFISec);
+ const auto *TextSecCOFF = static_cast<const MCSectionCOFF *>(TextSec);
+ auto *MainCFISecCOFF = static_cast<MCSectionCOFF *>(MainCFISec);
unsigned UniqueID = TextSecCOFF->getOrAssignWinCFISectionID(NextWinCFIID);
// If this section is COMDAT, this unwind section should be COMDAT associative
@@ -1314,11 +1313,20 @@ void MCStreamer::emitZerofill(MCSection *, MCSymbol *, uint64_t, Align, SMLoc) {
}
void MCStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
uint64_t Size, Align ByteAlignment) {}
+
void MCStreamer::changeSection(MCSection *Sec, uint32_t) {
CurFrag = &Sec->getDummyFragment();
- if (auto *Sym = Sec->getBeginSymbol())
- Sym->setFragment(&Sec->getDummyFragment());
+ auto *Sym = Sec->getBeginSymbol();
+ if (!Sym || !Sym->isUndefined())
+ return;
+ // In Mach-O, DWARF sections use Begin as a temporary label, requiring a label
+ // definition, unlike section symbols in other file formats.
+ if (getContext().getObjectFileType() == MCContext::IsMachO)
+ emitLabel(Sym);
+ else
+ Sym->setFragment(CurFrag);
}
+
void MCStreamer::emitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {}
void MCStreamer::emitBytes(StringRef Data) {}
void MCStreamer::emitBinaryData(StringRef Data) { emitBytes(Data); }
diff --git a/llvm/lib/MC/MCWasmStreamer.cpp b/llvm/lib/MC/MCWasmStreamer.cpp
index 5891420c..e3ef111 100644
--- a/llvm/lib/MC/MCWasmStreamer.cpp
+++ b/llvm/lib/MC/MCWasmStreamer.cpp
@@ -58,7 +58,7 @@ void MCWasmStreamer::emitLabelAtPos(MCSymbol *S, SMLoc Loc, MCFragment &F,
void MCWasmStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
MCAssembler &Asm = getAssembler();
- auto *SectionWasm = cast<MCSectionWasm>(Section);
+ auto *SectionWasm = static_cast<const MCSectionWasm *>(Section);
const MCSymbol *Grp = SectionWasm->getGroup();
if (Grp)
Asm.registerSymbol(*Grp);
diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp
index 9369bea..1ffe25c 100644
--- a/llvm/lib/MC/MCWinCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp
@@ -157,7 +157,8 @@ void MCWinCOFFStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
// Ensure that the first and the second symbols relative to the section are
// the section symbol and the COMDAT symbol.
getAssembler().registerSymbol(*Section->getBeginSymbol());
- if (auto *Sym = cast<MCSectionCOFF>(Section)->getCOMDATSymbol())
+ if (auto *Sym =
+ static_cast<const MCSectionCOFF *>(Section)->getCOMDATSymbol())
getAssembler().registerSymbol(*Sym);
}
diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp
index 78e4b95..898ac5d 100644
--- a/llvm/lib/MC/MCXCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCXCOFFStreamer.cpp
@@ -38,7 +38,7 @@ XCOFFObjectWriter &MCXCOFFStreamer::getWriter() {
void MCXCOFFStreamer::changeSection(MCSection *Section, uint32_t Subsection) {
MCObjectStreamer::changeSection(Section, Subsection);
- auto *Sec = cast<MCSectionXCOFF>(Section);
+ auto *Sec = static_cast<const MCSectionXCOFF *>(Section);
// We might miss calculating the symbols difference as absolute value before
// adding fixups when symbol_A without the fragment set is the csect itself
// and symbol_B is in it.
diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp
index 48d2fc6..7b5c3c0 100644
--- a/llvm/lib/MC/MachObjectWriter.cpp
+++ b/llvm/lib/MC/MachObjectWriter.cpp
@@ -126,7 +126,8 @@ uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S) const {
uint64_t MachObjectWriter::getPaddingSize(const MCAssembler &Asm,
const MCSection *Sec) const {
uint64_t EndAddr = getSectionAddress(Sec) + Asm.getSectionAddressSize(*Sec);
- unsigned Next = cast<MCSectionMachO>(Sec)->getLayoutOrder() + 1;
+ unsigned Next =
+ static_cast<const MCSectionMachO *>(Sec)->getLayoutOrder() + 1;
if (Next >= SectionOrder.size())
return 0;
@@ -259,15 +260,12 @@ void MachObjectWriter::writeSegmentLoadCommand(
}
void MachObjectWriter::writeSection(const MCAssembler &Asm,
- const MCSection &Sec, uint64_t VMAddr,
+ const MCSectionMachO &Sec, uint64_t VMAddr,
uint64_t FileOffset, unsigned Flags,
uint64_t RelocationsStart,
unsigned NumRelocations) {
- uint64_t SectionSize = Asm.getSectionAddressSize(Sec);
- const MCSectionMachO &Section = cast<MCSectionMachO>(Sec);
-
// The offset is unused for virtual sections.
- if (Section.isBssSection()) {
+ if (Sec.isBssSection()) {
assert(Asm.getSectionFileSize(Sec) == 0 && "Invalid file size!");
FileOffset = 0;
}
@@ -275,11 +273,11 @@ void MachObjectWriter::writeSection(const MCAssembler &Asm,
// struct section (68 bytes) or
// struct section_64 (80 bytes)
+ uint64_t SectionSize = Asm.getSectionAddressSize(Sec);
uint64_t Start = W.OS.tell();
(void) Start;
-
- writeWithPadding(Section.getName(), 16);
- writeWithPadding(Section.getSegmentName(), 16);
+ writeWithPadding(Sec.getName(), 16);
+ writeWithPadding(Sec.getSegmentName(), 16);
if (is64Bit()) {
W.write<uint64_t>(VMAddr); // address
W.write<uint64_t>(SectionSize); // size
@@ -290,14 +288,14 @@ void MachObjectWriter::writeSection(const MCAssembler &Asm,
assert(isUInt<32>(FileOffset) && "Cannot encode offset of section");
W.write<uint32_t>(FileOffset);
- W.write<uint32_t>(Log2(Section.getAlign()));
+ W.write<uint32_t>(Log2(Sec.getAlign()));
assert((!NumRelocations || isUInt<32>(RelocationsStart)) &&
"Cannot encode offset of relocations");
W.write<uint32_t>(NumRelocations ? RelocationsStart : 0);
W.write<uint32_t>(NumRelocations);
W.write<uint32_t>(Flags);
W.write<uint32_t>(IndirectSymBase.lookup(&Sec)); // reserved1
- W.write<uint32_t>(Section.getStubSize()); // reserved2
+ W.write<uint32_t>(Sec.getStubSize()); // reserved2
if (is64Bit())
W.write<uint32_t>(0); // reserved3
@@ -531,7 +529,7 @@ void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
// Report errors for use of .indirect_symbol not in a symbol pointer section
// or stub section.
for (IndirectSymbolData &ISD : IndirectSymbols) {
- const MCSectionMachO &Section = cast<MCSectionMachO>(*ISD.Section);
+ const MCSectionMachO &Section = static_cast<MCSectionMachO &>(*ISD.Section);
if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
@@ -545,7 +543,7 @@ void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
// Bind non-lazy symbol pointers first.
for (auto [IndirectIndex, ISD] : enumerate(IndirectSymbols)) {
- const auto &Section = cast<MCSectionMachO>(*ISD.Section);
+ const auto &Section = static_cast<MCSectionMachO &>(*ISD.Section);
if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS)
@@ -559,7 +557,7 @@ void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
// Then lazy symbol pointers and symbol stubs.
for (auto [IndirectIndex, ISD] : enumerate(IndirectSymbols)) {
- const auto &Section = cast<MCSectionMachO>(*ISD.Section);
+ const auto &Section = static_cast<MCSectionMachO &>(*ISD.Section);
if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
Section.getType() != MachO::S_SYMBOL_STUBS)
@@ -684,13 +682,13 @@ void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm) {
for (MCSection &Sec : Asm) {
if (!Sec.isBssSection()) {
SectionOrder.push_back(&Sec);
- cast<MCSectionMachO>(Sec).setLayoutOrder(i++);
+ static_cast<MCSectionMachO &>(Sec).setLayoutOrder(i++);
}
}
for (MCSection &Sec : Asm) {
if (Sec.isBssSection()) {
SectionOrder.push_back(&Sec);
- cast<MCSectionMachO>(Sec).setLayoutOrder(i++);
+ static_cast<MCSectionMachO &>(Sec).setLayoutOrder(i++);
}
}
@@ -907,7 +905,7 @@ uint64_t MachObjectWriter::writeObject() {
// ... and then the section headers.
uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
for (const MCSection &Section : Asm) {
- const auto &Sec = cast<MCSectionMachO>(Section);
+ const auto &Sec = static_cast<const MCSectionMachO &>(Section);
std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
unsigned NumRelocs = Relocs.size();
uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec);
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index 3b99af4..bfd6334 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -480,7 +480,7 @@ void WasmObjectWriter::recordRelocation(const MCFragment &F,
// The WebAssembly backend should never generate FKF_IsPCRel fixups
assert(!Fixup.isPCRel());
- const auto &FixupSection = cast<MCSectionWasm>(*F.getParent());
+ const auto &FixupSection = static_cast<MCSectionWasm &>(*F.getParent());
uint64_t C = Target.getConstant();
uint64_t FixupOffset = Asm->getFragmentOffset(F) + Fixup.getOffset();
MCContext &Ctx = getContext();
diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 6ad4334..856850d 100644
--- a/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -373,7 +373,7 @@ void WinCOFFWriter::defineSymbol(const MCSymbol &MCSym) {
COFFSection *Sec = nullptr;
MCSectionCOFF *MCSec = nullptr;
if (Base && Base->getFragment()) {
- MCSec = cast<MCSectionCOFF>(Base->getFragment()->getParent());
+ MCSec = static_cast<MCSectionCOFF *>(Base->getFragment()->getParent());
Sec = SectionMap[MCSec];
}
@@ -1057,7 +1057,8 @@ uint64_t WinCOFFWriter::writeObject() {
continue;
}
- const auto *AssocMCSec = cast<MCSectionCOFF>(&AssocMCSym->getSection());
+ const auto *AssocMCSec =
+ static_cast<const MCSectionCOFF *>(&AssocMCSym->getSection());
assert(SectionMap.count(AssocMCSec));
COFFSection *AssocSec = SectionMap[AssocMCSec];
diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 2f6785f..65f543b 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -550,13 +550,13 @@ CsectGroup &XCOFFWriter::getCsectGroup(const MCSectionXCOFF *MCSec) {
static MCSectionXCOFF *getContainingCsect(const MCSymbolXCOFF *XSym) {
if (XSym->isDefined())
- return cast<MCSectionXCOFF>(XSym->getFragment()->getParent());
+ return static_cast<MCSectionXCOFF *>(XSym->getFragment()->getParent());
return XSym->getRepresentedCsect();
}
void XCOFFWriter::executePostLayoutBinding() {
for (const auto &S : *Asm) {
- const auto *MCSec = cast<const MCSectionXCOFF>(&S);
+ auto *MCSec = static_cast<const MCSectionXCOFF *>(&S);
assert(!SectionMap.contains(MCSec) && "Cannot add a section twice.");
// If the name does not fit in the storage provided in the symbol table
@@ -747,7 +747,7 @@ void XCOFFWriter::recordRelocation(const MCFragment &F, const MCFixup &Fixup,
FixedValue = TOCEntryOffset;
}
} else if (Type == XCOFF::RelocationType::R_RBR) {
- MCSectionXCOFF *ParentSec = cast<MCSectionXCOFF>(F.getParent());
+ auto *ParentSec = static_cast<MCSectionXCOFF *>(F.getParent());
assert((SymASec->getMappingClass() == XCOFF::XMC_PR &&
ParentSec->getMappingClass() == XCOFF::XMC_PR) &&
"Only XMC_PR csect may have the R_RBR relocation.");
@@ -768,7 +768,7 @@ void XCOFFWriter::recordRelocation(const MCFragment &F, const MCFixup &Fixup,
}
XCOFFRelocation Reloc = {Index, FixupOffsetInCsect, SignAndSize, Type};
- MCSectionXCOFF *RelocationSec = cast<MCSectionXCOFF>(F.getParent());
+ auto *RelocationSec = static_cast<MCSectionXCOFF *>(F.getParent());
assert(SectionMap.contains(RelocationSec) &&
"Expected containing csect to exist in map.");
SectionMap[RelocationSec]->Relocations.push_back(Reloc);
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index bb7ccdb..fd89583 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -119,7 +119,6 @@ MODULE_PASS("module-inline", ModuleInlinerPass())
MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
MODULE_PASS("no-op-module", NoOpModulePass())
MODULE_PASS("nsan", NumericalStabilitySanitizerPass())
-MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass())
MODULE_PASS("openmp-opt", OpenMPOptPass())
MODULE_PASS("openmp-opt-postlink",
OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink))
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 2d91afb..8d4e043 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -79,28 +79,13 @@ unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
// TODO: Refactor this to return Expected<...>
std::unique_ptr<SpecialCaseList>
SpecialCaseList::create(const std::vector<std::string> &Paths,
- llvm::vfs::FileSystem &FS,
- std::pair<unsigned, std::string> &Error) {
+ llvm::vfs::FileSystem &FS, std::string &Error) {
std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
if (SCL->createInternal(Paths, FS, Error))
return SCL;
return nullptr;
}
-std::unique_ptr<SpecialCaseList>
-SpecialCaseList::create(const std::vector<std::string> &Paths,
- llvm::vfs::FileSystem &FS, std::string &Error) {
- std::pair<unsigned, std::string> Err;
- std::unique_ptr<SpecialCaseList> SCL = create(Paths, FS, Err);
- if (!SCL) {
- assert(Err.first == 0 || Err.first == 1 && "Unexpected error kind");
- const char *Prefix =
- Err.first == 0 ? "can't open file " : "error parsing file ";
- Error = (Twine(Prefix) + Err.second).str();
- }
- return SCL;
-}
-
std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB,
std::string &Error) {
std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
@@ -112,28 +97,25 @@ std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB,
std::unique_ptr<SpecialCaseList>
SpecialCaseList::createOrDie(const std::vector<std::string> &Paths,
llvm::vfs::FileSystem &FS) {
- std::pair<unsigned, std::string> Error;
+ std::string Error;
if (auto SCL = create(Paths, FS, Error))
return SCL;
- report_fatal_error(Twine(Error.second));
+ report_fatal_error(Twine(Error));
}
bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
- vfs::FileSystem &VFS,
- std::pair<unsigned, std::string> &Error) {
+ vfs::FileSystem &VFS, std::string &Error) {
for (size_t i = 0; i < Paths.size(); ++i) {
const auto &Path = Paths[i];
ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
VFS.getBufferForFile(Path);
if (std::error_code EC = FileOrErr.getError()) {
- Error.first = 0 /* open failure */;
- Error.second = (Twine("'") + Path + "': " + EC.message()).str();
+ Error = (Twine("can't open file '") + Path + "': " + EC.message()).str();
return false;
}
std::string ParseError;
if (!parse(i, FileOrErr.get().get(), ParseError)) {
- Error.first = 1 /* parse failure */;
- Error.second = (Twine("'") + Path + "': " + ParseError).str();
+ Error = (Twine("error parsing file '") + Path + "': " + ParseError).str();
return false;
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index c218831..85de2d5 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -36,7 +36,7 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
// SHF_AARCH64_PURECODE flag set if the "+execute-only" target feature is
// present.
if (TM.getMCSubtargetInfo()->hasFeature(AArch64::FeatureExecuteOnly)) {
- auto *Text = cast<MCSectionELF>(TextSection);
+ auto *Text = static_cast<MCSectionELF *>(TextSection);
Text->setFlags(Text->getFlags() | ELF::SHF_AARCH64_PURECODE);
}
}
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 08f547a..6257e99 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -523,7 +523,8 @@ void AArch64TargetELFStreamer::finish() {
// mark it execute-only if it is empty and there is at least one
// execute-only section in the object.
if (any_of(Asm, [](const MCSection &Sec) {
- return cast<MCSectionELF>(Sec).getFlags() & ELF::SHF_AARCH64_PURECODE;
+ return static_cast<const MCSectionELF &>(Sec).getFlags() &
+ ELF::SHF_AARCH64_PURECODE;
})) {
auto *Text =
static_cast<MCSectionELF *>(Ctx.getObjectFileInfo()->getTextSection());
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 1ac340a..a22a17a 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -132,7 +132,8 @@ static bool canUseLocalRelocation(const MCSectionMachO &Section,
// But only if they don't point to a few forbidden sections.
if (!Symbol.isInSection())
return true;
- const MCSectionMachO &RefSec = cast<MCSectionMachO>(Symbol.getSection());
+ const MCSectionMachO &RefSec =
+ static_cast<MCSectionMachO &>(Symbol.getSection());
if (RefSec.getType() == MachO::S_CSTRING_LITERALS)
return false;
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 7d6723a..334afd3 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -38,7 +38,11 @@ bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1,
unsigned GCNRegPressure::getRegKind(const TargetRegisterClass *RC,
const SIRegisterInfo *STI) {
- return STI->isSGPRClass(RC) ? SGPR : (STI->isAGPRClass(RC) ? AGPR : VGPR);
+ return STI->isSGPRClass(RC)
+ ? SGPR
+ : (STI->isAGPRClass(RC)
+ ? AGPR
+ : (STI->isVectorSuperClass(RC) ? AVGPR : VGPR));
}
void GCNRegPressure::inc(unsigned Reg,
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 3749b6d..ea33a22 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -29,43 +29,57 @@ class raw_ostream;
class SlotIndex;
struct GCNRegPressure {
- enum RegKind { SGPR, VGPR, AGPR, TOTAL_KINDS };
+ enum RegKind { SGPR, VGPR, AGPR, AVGPR, TOTAL_KINDS };
GCNRegPressure() {
clear();
}
- bool empty() const { return !Value[SGPR] && !Value[VGPR] && !Value[AGPR]; }
+ bool empty() const {
+ return !Value[SGPR] && !Value[VGPR] && !Value[AGPR] && !Value[AVGPR];
+ }
void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); }
/// \returns the SGPR32 pressure
unsigned getSGPRNum() const { return Value[SGPR]; }
- /// \returns the aggregated ArchVGPR32, AccVGPR32 pressure dependent upon \p
- /// UnifiedVGPRFile
+ /// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure
+ /// dependent upon \p UnifiedVGPRFile
unsigned getVGPRNum(bool UnifiedVGPRFile) const {
if (UnifiedVGPRFile) {
- return Value[AGPR] ? getUnifiedVGPRNum(Value[VGPR], Value[AGPR])
- : Value[VGPR];
+ return Value[AGPR]
+ ? getUnifiedVGPRNum(Value[VGPR], Value[AGPR], Value[AVGPR])
+ : Value[VGPR] + Value[AVGPR];
}
- return std::max(Value[VGPR], Value[AGPR]);
+ // AVGPR assignment priority is based on the width of the register. Account
+ // AVGPR pressure as VGPR.
+ return std::max(Value[VGPR] + Value[AVGPR], Value[AGPR]);
}
/// Returns the aggregated VGPR pressure, assuming \p NumArchVGPRs ArchVGPRs
- /// and \p NumAGPRs AGPRS, for a target with a unified VGPR file.
+ /// \p NumAGPRs AGPRS, and \p NumAVGPRs AVGPRs for a target with a unified
+ /// VGPR file.
inline static unsigned getUnifiedVGPRNum(unsigned NumArchVGPRs,
- unsigned NumAGPRs) {
- return alignTo(NumArchVGPRs, AMDGPU::IsaInfo::getArchVGPRAllocGranule()) +
+ unsigned NumAGPRs,
+ unsigned NumAVGPRs) {
+
+ // Assume AVGPRs will be assigned as VGPRs.
+ return alignTo(NumArchVGPRs + NumAVGPRs,
+ AMDGPU::IsaInfo::getArchVGPRAllocGranule()) +
NumAGPRs;
}
- /// \returns the ArchVGPR32 pressure
- unsigned getArchVGPRNum() const { return Value[VGPR]; }
+ /// \returns the ArchVGPR32 pressure, plus the AVGPRS which we assume will be
+ /// allocated as VGPR
+ unsigned getArchVGPRNum() const { return Value[VGPR] + Value[AVGPR]; }
/// \returns the AccVGPR32 pressure
unsigned getAGPRNum() const { return Value[AGPR]; }
+ /// \returns the AVGPR32 pressure
+ unsigned getAVGPRNum() const { return Value[AVGPR]; }
unsigned getVGPRTuplesWeight() const {
- return std::max(Value[TOTAL_KINDS + VGPR], Value[TOTAL_KINDS + AGPR]);
+ return std::max(Value[TOTAL_KINDS + VGPR] + Value[TOTAL_KINDS + AVGPR],
+ Value[TOTAL_KINDS + AGPR]);
}
unsigned getSGPRTuplesWeight() const { return Value[TOTAL_KINDS + SGPR]; }
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index e5d1eaa..b77da4d 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1062,9 +1062,13 @@ bool SIFoldOperandsImpl::tryFoldRegSeqSplat(
switch (OpTy) {
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
OpRC = TRI->getSubRegisterClass(OpRC, AMDGPU::sub0);
break;
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
OpRC = TRI->getSubRegisterClass(OpRC, AMDGPU::sub0_sub1);
break;
default:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0eee7ad..8d51ec6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -14179,6 +14179,8 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
Op0.getOpcode() == AMDGPUISD::FMAX_LEGACY)) &&
(VT == MVT::f32 || VT == MVT::f64 ||
(VT == MVT::f16 && Subtarget->has16BitInsts()) ||
+ (VT == MVT::bf16 && Subtarget->hasBF16PackedInsts()) ||
+ (VT == MVT::v2bf16 && Subtarget->hasBF16PackedInsts()) ||
(VT == MVT::v2f16 && Subtarget->hasVOP3PInsts())) &&
Op0.hasOneUse()) {
if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1))
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d04aafb..8d6c1d0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2508,7 +2508,20 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addReg(DstHi);
}
break;
+
+ case AMDGPU::V_MAX_BF16_PSEUDO_e64:
+ assert(ST.hasBF16PackedInsts());
+ MI.setDesc(get(AMDGPU::V_PK_MAX_NUM_BF16));
+ MI.addOperand(MachineOperand::CreateImm(0)); // op_sel
+ MI.addOperand(MachineOperand::CreateImm(0)); // neg_lo
+ MI.addOperand(MachineOperand::CreateImm(0)); // neg_hi
+ auto Op0 = getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
+ Op0->setImm(Op0->getImm() | SISrcMods::OP_SEL_1);
+ auto Op1 = getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
+ Op1->setImm(Op1->getImm() | SISrcMods::OP_SEL_1);
+ break;
}
+
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index b0be3f86..83b0490 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2865,6 +2865,7 @@ def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], /*EnableClamp=*/1>;
+def VOP_BF16_BF16_BF16 : VOPProfile <[bf16, bf16, bf16, untyped]>;
def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index d05be8f..54fa192 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1894,6 +1894,9 @@ let SubtargetPredicate = UseRealTrue16Insts in
def : ClampPat<V_MAX_F16_t16_e64, f16>;
let SubtargetPredicate = UseFakeTrue16Insts in
def : ClampPat<V_MAX_F16_fake16_e64, f16>;
+// FIXME-TRUE16: Pseudo expansion of this won't work with True16.
+let True16Predicate = UseFakeTrue16Insts in
+def : ClampPat<V_MAX_BF16_PSEUDO_e64, bf16>;
let SubtargetPredicate = HasVOP3PInsts in {
def : GCNPat <
@@ -1903,6 +1906,13 @@ def : GCNPat <
>;
}
+let SubtargetPredicate = HasBF16PackedInsts in {
+def : GCNPat <
+ (v2bf16 (AMDGPUclamp (VOP3PMods v2bf16:$src0, i32:$src0_modifiers))),
+ (V_PK_MAX_NUM_BF16 $src0_modifiers, $src0,
+ $src0_modifiers, $src0, DSTCLAMP.ENABLE)
+>;
+} // End SubtargetPredicate = HasBF16PackedInsts
/********** ================================ **********/
/********** Floating point absolute/negative **********/
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 0039d2f..218841d 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -109,6 +109,23 @@ class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
let TSFlags{2} = HasVGPR;
let TSFlags{3} = HasAGPR;
let TSFlags{4} = HasSGPR;
+
+ // RA will use RegisterClass AllocationPriority amongst other info (e.g. ordering in the basic block)
+ // to decide which registers to try to assign first. Usually, this RegisterClass priority is given
+ // very high priority, if not the highest priority, when considering which VirtReg to allocate next.
+ //
+ // We have 5 bits to assign AllocationPriorities to RegisterClasses. Generally, it is beneficial to
+ // assign more constrained RegisterClasses first. As a result, we prioritize register classes with
+ // more 32 bit tuples (e.g. VReg_512) over registers with fewer tuples (e.g. VGPR_32).
+ //
+ // The interesting case is the vector register case on architectures which have ARegs, VRegs, AVRegs.
+ // In this case, we would like to assign ARegs and VRegs before AVRegs, as AVRegs are less constrained
+ // and can be assigned to both AGPRs and VGPRs. We use the 5th bit to encode this into the
+ // RegisterClass AllocationPriority. BaseClassPriority is used to turn the bit on, and BaseClassScaleFactor
+ // is used for scaling of the bit (i.e. 1 << 4).
+ field int BaseClassPriority = 1;
+ field int BaseClassScaleFactor = 16;
+
}
multiclass SIRegLoHi16 <string n, bits<8> regIdx, bit ArtificialHigh = 1,
@@ -575,7 +592,7 @@ let HasVGPR = 1 in {
def VGPR_16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
(add (interleave (sequence "VGPR%u_LO16", 0, 255),
(sequence "VGPR%u_HI16", 0, 255)))> {
- let AllocationPriority = 2;
+ let AllocationPriority = !add(2, !mul(BaseClassPriority, BaseClassScaleFactor));
let Size = 16;
let GeneratePressureSet = 0;
@@ -601,7 +618,7 @@ def VGPR_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
// i16/f16 only on VI+
def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
(add (sequence "VGPR%u", 0, 255))> {
- let AllocationPriority = 0;
+ let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor));
let Size = 32;
let Weight = 1;
let BaseClassOrder = 32;
@@ -610,7 +627,7 @@ def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types
// Identical to VGPR_32 except it only contains the low 128 (Lo128) registers.
def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
(add (sequence "VGPR%u", 0, 127))> {
- let AllocationPriority = 0;
+ let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor));
let GeneratePressureSet = 0;
let Size = 32;
let Weight = 1;
@@ -668,7 +685,7 @@ def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
// AccVGPR 32-bit registers
def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
(add (sequence "AGPR%u", 0, 255))> {
- let AllocationPriority = 0;
+ let AllocationPriority = !add(0, !mul(BaseClassPriority, BaseClassScaleFactor));
let Size = 32;
let Weight = 1;
let BaseClassOrder = 32;
@@ -940,14 +957,23 @@ class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
// Requires n v_mov_b32 to copy
let CopyCost = numRegs;
- let AllocationPriority = !sub(numRegs, 1);
+
+ // Since we only have 5 bits for the RegisterClass Allocation Priorty, and since we use the
+ // 5th bit for BaseClassPriority, we need to encode the SizePriority into 4 bits. As a result
+ // of this encoding, for registers with numRegs 15 or 16, we give SizePriority of 14, and for
+ // regsters with numRegs 17+ we give SizePriority of 15. In practice, there is only one
+ // RegClass per Vector Register type in each of these groups (i.e. numRegs = 15,16 : {VReg_512},
+ // and numRegs = 17+ : {VReg_1024}). Therefore, we have not lost any info by compressing.
+ defvar SizePrioriity = !if(!le(numRegs, 14), !sub(numRegs, 1), !if(!le(numRegs, 16), 14, 15));
+
+ let AllocationPriority = !add(SizePrioriity, !mul(BaseClassPriority, BaseClassScaleFactor));
let Weight = numRegs;
}
// Define a register tuple class, along with one requiring an even
// aligned base register.
multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> {
- let HasVGPR = 1 in {
+ let HasVGPR = 1, BaseClassPriority = 1 in {
// Define the regular class.
def "" : VRegClassBase<numRegs, regTypes, regList> {
let BaseClassOrder = !mul(numRegs, 32);
@@ -981,7 +1007,7 @@ defm VReg_1024 : VRegClass<32, Reg1024Types.types, (add VGPR_1024)>;
}
multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
- let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1 in {
+ let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1, BaseClassPriority = 1 in {
// Define the regular class.
def "" : VRegClassBase<numRegs, regTypes, regList> {
let BaseClassOrder = !mul(numRegs, 32);
@@ -1066,6 +1092,7 @@ def VS_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, (add VReg_64, SReg_6
def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> {
let HasVGPR = 1;
let HasAGPR = 1;
+ let BaseClassPriority = 0;
let Size = 32;
}
} // End GeneratePressureSet = 0
@@ -1074,7 +1101,7 @@ def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_3
// aligned base register.
multiclass AVRegClass<int numRegs, list<ValueType> regTypes,
dag vregList, dag aregList> {
- let HasVGPR = 1, HasAGPR = 1 in {
+ let HasVGPR = 1, HasAGPR = 1, BaseClassPriority = 0 in {
// Define the regular class.
def "" : VRegClassBase<numRegs, regTypes, (add vregList, aregList)>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index c812dc9..95fcd4a 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1236,6 +1236,12 @@ let isCommutable = 1, isReMaterializable = 1 in {
defm V_PK_MIN_NUM_BF16 : VOP3PInst<"v_pk_min_num_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, fminnum_like>;
defm V_PK_MAX_NUM_BF16 : VOP3PInst<"v_pk_max_num_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, fmaxnum_like>;
defm V_PK_FMA_BF16 : VOP3PInst<"v_pk_fma_bf16", VOP3P_Profile<VOP_V2BF16_V2BF16_V2BF16_V2BF16, VOP3_PACKED>, any_fma>;
+
+ // Scalar pseudo used to emulate AMDGPUClamp.
+ // Expanded to V_PK_MAX_NUM_BF16 with unused high half.
+ // FIXME-TRUE16: Pseudo expansion of this won't work with True16.
+ let True16Predicate = UseFakeTrue16Insts in
+ defm V_MAX_BF16_PSEUDO : VOP3Inst <"v_max_bf16", VOP_BF16_BF16_BF16>;
}
} // End isCommutable = 1, isReMaterializable = 1
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index ec6f4e2..ece6c10 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -12327,7 +12327,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
}
assert(Section && "must have section to emit alignment");
- if (Section->useCodeAlign())
+ if (getContext().getAsmInfo()->useCodeAlign(*Section))
getStreamer().emitCodeAlignment(Align(2), &getSTI());
else
getStreamer().emitValueToAlignment(Align(2));
@@ -12525,7 +12525,7 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) {
// '.align' is target specifically handled to mean 2**2 byte alignment.
const MCSection *Section = getStreamer().getCurrentSectionOnly();
assert(Section && "must have section to emit alignment");
- if (Section->useCodeAlign())
+ if (getContext().getAsmInfo()->useCodeAlign(*Section))
getStreamer().emitCodeAlignment(Align(4), &getSTI(), 0);
else
getStreamer().emitValueToAlignment(Align(4), 0, 1, 0);
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index f64a223..868556b 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -1138,7 +1138,8 @@ void ARMTargetELFStreamer::finish() {
MCContext &Ctx = getContext();
auto &Asm = getStreamer().getAssembler();
if (any_of(Asm, [](const MCSection &Sec) {
- return cast<MCSectionELF>(Sec).getFlags() & ELF::SHF_ARM_PURECODE;
+ return static_cast<const MCSectionELF &>(Sec).getFlags() &
+ ELF::SHF_ARM_PURECODE;
})) {
auto *Text =
static_cast<MCSectionELF *>(Ctx.getObjectFileInfo()->getTextSection());
diff --git a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
index ad8aa571..0fb33cd 100644
--- a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -260,7 +260,7 @@ bool AVRAsmPrinter::doFinalization(Module &M) {
continue;
}
- auto *Section = cast<MCSectionELF>(TLOF.SectionForGlobal(&GO, TM));
+ auto *Section = static_cast<MCSectionELF *>(TLOF.SectionForGlobal(&GO, TM));
if (Section->getName().starts_with(".data"))
NeedsCopyData = true;
else if (Section->getName().starts_with(".rodata") && SubTM->hasLPM())
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
index c2c1bb4..0615ec9 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
@@ -24,8 +24,6 @@ AVRMCAsmInfo::AVRMCAsmInfo(const Triple &TT, const MCTargetOptions &Options) {
CalleeSaveStackSlotSize = 2;
CommentString = ";";
SeparatorString = "$";
- PrivateGlobalPrefix = ".L";
- PrivateLabelPrefix = ".L";
UsesELFSectionDirectiveForBSS = true;
SupportsDebugInformation = true;
}
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h
index fab2713..1915fa8 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h
@@ -14,7 +14,7 @@
#define LLVM_AVR_ASM_INFO_H
#include "MCTargetDesc/AVRMCExpr.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
#include "llvm/MC/MCExpr.h"
namespace llvm {
@@ -22,7 +22,7 @@ namespace llvm {
class Triple;
/// Specifies the format of AVR assembly files.
-class AVRMCAsmInfo : public MCAsmInfo {
+class AVRMCAsmInfo : public MCAsmInfoELF {
public:
explicit AVRMCAsmInfo(const Triple &TT, const MCTargetOptions &Options);
void printSpecifierExpr(raw_ostream &OS,
diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index 1e29a0f..a87b9a2 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -1255,10 +1255,8 @@ void BTFDebug::beginFunctionImpl(const MachineFunction *MF) {
FuncInfo.Label = FuncLabel;
FuncInfo.TypeId = FuncTypeId;
if (FuncLabel->isInSection()) {
- MCSection &Section = FuncLabel->getSection();
- const MCSectionELF *SectionELF = dyn_cast<MCSectionELF>(&Section);
- assert(SectionELF && "Null section for Function Label");
- SecNameOff = addString(SectionELF->getName());
+ auto &Sec = static_cast<const MCSectionELF &>(FuncLabel->getSection());
+ SecNameOff = addString(Sec.getName());
} else {
SecNameOff = addString(".text");
}
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 827e928..bb74f6a 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -54,11 +54,8 @@ unsigned BPFELFObjectWriter::getRelocType(const MCFixup &Fixup,
const MCSymbol &Sym = *A;
if (Sym.isDefined()) {
- MCSection &Section = Sym.getSection();
- const MCSectionELF *SectionELF = dyn_cast<MCSectionELF>(&Section);
- assert(SectionELF && "Null section for reloc symbol");
-
- unsigned Flags = SectionELF->getFlags();
+ auto &Section = static_cast<const MCSectionELF &>(Sym.getSection());
+ unsigned Flags = Section.getFlags();
if (Sym.isTemporary()) {
// .BTF.ext generates FK_Data_4 relocations for
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index 7b21684..63d6e6f 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -13,18 +13,19 @@
#ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCASMINFO_H
#define LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
#include "llvm/TargetParser/Triple.h"
namespace llvm {
-class BPFMCAsmInfo : public MCAsmInfo {
+class BPFMCAsmInfo : public MCAsmInfoELF {
public:
explicit BPFMCAsmInfo(const Triple &TT, const MCTargetOptions &Options) {
if (TT.getArch() == Triple::bpfeb)
IsLittleEndian = false;
PrivateGlobalPrefix = ".L";
+ PrivateLabelPrefix = "L";
WeakRefDirective = "\t.weak\t";
UsesELFSectionDirectiveForBSS = true;
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index e915a3c4..613cfb5 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2385,19 +2385,9 @@ SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
return Res;
}
-static bool isConstantOrUndef(const SDValue Op) {
- if (Op->isUndef())
- return true;
- if (isa<ConstantSDNode>(Op))
- return true;
- if (isa<ConstantFPSDNode>(Op))
- return true;
- return false;
-}
-
-static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
+static bool isConstantBUILD_VECTOR(const BuildVectorSDNode *Op) {
for (unsigned i = 0; i < Op->getNumOperands(); ++i)
- if (isConstantOrUndef(Op->getOperand(i)))
+ if (isIntOrFPConstant(Op->getOperand(i)))
return true;
return false;
}
@@ -2505,20 +2495,23 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
return Op;
- if (!isConstantOrUndefBUILD_VECTOR(Node)) {
+ if (!isConstantBUILD_VECTOR(Node)) {
// Use INSERT_VECTOR_ELT operations rather than expand to stores.
// The resulting code is the same length as the expansion, but it doesn't
// use memory operations.
- EVT ResTy = Node->getValueType(0);
-
assert(ResTy.isVector());
unsigned NumElts = ResTy.getVectorNumElements();
- SDValue Vector =
- DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Node->getOperand(0));
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Vector = DAG.getUNDEF(ResTy);
+
+ if (!Op0.isUndef())
+ Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
for (unsigned i = 1; i < NumElts; ++i) {
- Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
- Node->getOperand(i),
+ SDValue Opi = Node->getOperand(i);
+ if (Opi.isUndef())
+ continue;
+ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
}
return Vector;
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index 8fa72bc..d9ea88c 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -254,6 +254,7 @@ bool LoongArchAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
MCFixup Fixup =
MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN);
F.setVarFixups({Fixup});
+ F.setLinkerRelaxable();
F.getParent()->setLinkerRelaxable();
return true;
}
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index feb4eb3..d9680c7 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -969,7 +969,7 @@ void MipsTargetELFStreamer::finish() {
Align Alignment = Section.getAlign();
S.switchSection(&Section);
- if (Section.useCodeAlign())
+ if (getContext().getAsmInfo()->useCodeAlign(Section))
S.emitCodeAlignment(Alignment, &STI, Alignment.value());
else
S.emitValueToAlignment(Alignment, 0, 1, Alignment.value());
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 614b321..ce9cd12 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -15,8 +15,6 @@
using namespace llvm;
-void NVPTXMCAsmInfo::anchor() {}
-
NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple,
const MCTargetOptions &Options) {
if (TheTriple.getArch() == Triple::nvptx64) {
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index 77c4dae..f071406 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -19,8 +19,6 @@ namespace llvm {
class Triple;
class NVPTXMCAsmInfo : public MCAsmInfo {
- virtual void anchor();
-
public:
explicit NVPTXMCAsmInfo(const Triple &TheTriple,
const MCTargetOptions &Options);
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
index 9f91143..329e3b5 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
@@ -97,10 +97,7 @@ void NVPTXTargetStreamer::changeSection(const MCSection *CurSection,
if (isDwarfSection(FI, Section)) {
// Emit DWARF .file directives in the outermost scope.
outputDwarfFileDirectives();
- OS << "\t.section";
- Section->printSwitchToSection(*getStreamer().getContext().getAsmInfo(),
- getStreamer().getContext().getTargetTriple(),
- OS, SubSection);
+ OS << "\t.section\t" << Section->getName() << '\n';
// DWARF sections are enclosed into braces - emit the open one.
OS << "\t{\n";
HasSections = true;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 8baf866..1af2f9c 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -220,8 +220,6 @@ bool PPCELFMCAsmInfo::evaluateAsRelocatableImpl(const MCSpecifierExpr &Expr,
return evaluateAsRelocatable(Expr, Res, Asm);
}
-void PPCXCOFFMCAsmInfo::anchor() {}
-
PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) {
if (T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle)
report_fatal_error("XCOFF is not supported for little-endian targets");
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 0f945b3..6af1bd7 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -33,8 +33,6 @@ public:
};
class PPCXCOFFMCAsmInfo : public MCAsmInfoXCOFF {
- void anchor() override;
-
public:
explicit PPCXCOFFMCAsmInfo(bool is64Bit, const Triple &);
void printSpecifierExpr(raw_ostream &OS,
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 54497d9..3dad0e8 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -213,7 +213,7 @@ public:
void emitTCEntry(const MCSymbol &S, PPCMCExpr::Specifier Kind) override {
if (const MCSymbolXCOFF *XSym = dyn_cast<MCSymbolXCOFF>(&S)) {
MCSymbolXCOFF *TCSym =
- cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly())
+ static_cast<const MCSectionXCOFF *>(Streamer.getCurrentSectionOnly())
->getQualNameSymbol();
// On AIX, we have TLS variable offsets (symbol@({gd|ie|le|ld}) depending
// on the TLS access method (or model). For the general-dynamic access
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index a091b21..ce1d51a 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2274,9 +2274,9 @@ void PPCAIXAsmPrinter::emitLinkage(const GlobalValue *GV,
void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) {
// Setup CurrentFnDescSym and its containing csect.
- MCSectionXCOFF *FnDescSec =
- cast<MCSectionXCOFF>(getObjFileLowering().getSectionForFunctionDescriptor(
- &MF.getFunction(), TM));
+ auto *FnDescSec = static_cast<MCSectionXCOFF *>(
+ getObjFileLowering().getSectionForFunctionDescriptor(&MF.getFunction(),
+ TM));
FnDescSec->setAlignment(Align(Subtarget->isPPC64() ? 8 : 4));
CurrentFnDescSym = FnDescSec->getQualNameSymbol();
@@ -2669,9 +2669,9 @@ void PPCAIXAsmPrinter::emitTracebackTable() {
MCSymbol *EHInfoSym =
TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(MF);
MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(EHInfoSym, TOCType_EHBlock);
- const MCSymbol *TOCBaseSym =
- cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection())
- ->getQualNameSymbol();
+ const MCSymbol *TOCBaseSym = static_cast<const MCSectionXCOFF *>(
+ getObjFileLowering().getTOCBaseSection())
+ ->getQualNameSymbol();
const MCExpr *Exp =
MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCEntry, Ctx),
MCSymbolRefExpr::create(TOCBaseSym, Ctx), Ctx);
@@ -2788,7 +2788,7 @@ void PPCAIXAsmPrinter::emitGlobalVariableHelper(const GlobalVariable *GV) {
}
}
- MCSectionXCOFF *Csect = cast<MCSectionXCOFF>(
+ auto *Csect = static_cast<MCSectionXCOFF *>(
getObjFileLowering().SectionForGlobal(GV, GVKind, TM));
// Switch to the containing csect.
@@ -2869,9 +2869,9 @@ void PPCAIXAsmPrinter::emitFunctionDescriptor() {
OutStreamer->emitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext),
PointerSize);
// Emit TOC base address.
- const MCSymbol *TOCBaseSym =
- cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection())
- ->getQualNameSymbol();
+ const MCSymbol *TOCBaseSym = static_cast<const MCSectionXCOFF *>(
+ getObjFileLowering().getTOCBaseSection())
+ ->getQualNameSymbol();
OutStreamer->emitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext),
PointerSize);
// Emit a null environment pointer.
@@ -2996,10 +2996,10 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
Name += Prefix;
Name += cast<MCSymbolXCOFF>(I.first.first)->getSymbolTableName();
MCSymbol *S = OutContext.getOrCreateSymbol(Name);
- TCEntry = cast<MCSectionXCOFF>(
+ TCEntry = static_cast<MCSectionXCOFF *>(
getObjFileLowering().getSectionForTOCEntry(S, TM));
} else {
- TCEntry = cast<MCSectionXCOFF>(
+ TCEntry = static_cast<MCSectionXCOFF *>(
getObjFileLowering().getSectionForTOCEntry(I.first.first, TM));
}
OutStreamer->switchSection(TCEntry);
@@ -3054,7 +3054,7 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
return;
SectionKind GOKind = getObjFileLowering().getKindForGlobal(GO, TM);
- MCSectionXCOFF *Csect = cast<MCSectionXCOFF>(
+ auto *Csect = static_cast<MCSectionXCOFF *>(
getObjFileLowering().SectionForGlobal(GO, GOKind, TM));
Align GOAlign = getGVAlignment(GO, GO->getDataLayout());
@@ -3316,9 +3316,9 @@ void PPCAIXAsmPrinter::emitTTypeReference(const GlobalValue *GV,
GlobalType = TOCType_GlobalExternal;
MCSymbol *TypeInfoSym = TM.getSymbol(GV);
MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(TypeInfoSym, GlobalType);
- const MCSymbol *TOCBaseSym =
- cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection())
- ->getQualNameSymbol();
+ const MCSymbol *TOCBaseSym = static_cast<const MCSectionXCOFF *>(
+ getObjFileLowering().getTOCBaseSection())
+ ->getQualNameSymbol();
auto &Ctx = OutStreamer->getContext();
const MCExpr *Exp =
MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCEntry, Ctx),
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index a143d85..d71c42c 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -3849,9 +3849,14 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
switch (Inst.getOpcode()) {
default:
break;
- case RISCV::PseudoC_ADDI_NOP:
- emitToStreamer(Out, MCInstBuilder(RISCV::C_NOP));
+ case RISCV::PseudoC_ADDI_NOP: {
+ if (Inst.getOperand(2).getImm() == 0)
+ emitToStreamer(Out, MCInstBuilder(RISCV::C_NOP));
+ else
+ emitToStreamer(
+ Out, MCInstBuilder(RISCV::C_NOP_HINT).addOperand(Inst.getOperand(2)));
return false;
+ }
case RISCV::PseudoLLAImm:
case RISCV::PseudoLAImm:
case RISCV::PseudoLI: {
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index fa7bcfa..5e54b82 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -193,21 +193,19 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, uint32_t RegNo,
static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
- if (RegNo == 0) {
+ if (RegNo == 0)
return MCDisassembler::Fail;
- }
return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static DecodeStatus
-DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo, uint32_t Address,
- const MCDisassembler *Decoder) {
- if (RegNo == 2) {
+static DecodeStatus DecodeGPRNoX2RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint32_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo == 2)
return MCDisassembler::Fail;
- }
- return DecodeGPRNoX0RegisterClass(Inst, RegNo, Address, Decoder);
+ return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
}
static DecodeStatus DecodeGPRNoX31RegisterClass(MCInst &Inst, uint32_t RegNo,
@@ -536,31 +534,6 @@ static DecodeStatus decodeRTZArg(MCInst &Inst, uint32_t Imm, int64_t Address,
return MCDisassembler::Success;
}
-static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder);
-
-static DecodeStatus decodeRVCInstrRdSImm6(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder);
-
-static DecodeStatus decodeRVCInstrRdCLUIImm(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder);
-
-static DecodeStatus
-decodeRVCInstrRdRs1UImmLog2XLenNonZero(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder);
-
-static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder);
-
-static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder);
-
static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
uint64_t Address,
const MCDisassembler *Decoder);
@@ -579,18 +552,6 @@ static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn,
#include "RISCVGenDisassemblerTables.inc"
-static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- DecodeStatus S = MCDisassembler::Success;
- uint32_t Rd = fieldFromInstruction(Insn, 7, 5);
- if (!Check(S, DecodeGPRNoX0RegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
- Inst.addOperand(Inst.getOperand(0));
- Inst.addOperand(MCOperand::createImm(0));
- return S;
-}
-
static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn,
uint64_t Address,
const MCDisassembler *Decoder) {
@@ -601,66 +562,6 @@ static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn,
return MCDisassembler::Success;
}
-static DecodeStatus decodeRVCInstrRdSImm6(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- Inst.addOperand(MCOperand::createReg(RISCV::X0));
- uint32_t Imm =
- fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5);
- [[maybe_unused]] DecodeStatus Result =
- decodeSImmOperand<6>(Inst, Imm, Address, Decoder);
- assert(Result == MCDisassembler::Success && "Invalid immediate");
- return MCDisassembler::Success;
-}
-
-static DecodeStatus decodeRVCInstrRdCLUIImm(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- Inst.addOperand(MCOperand::createReg(RISCV::X0));
- uint32_t Imm =
- fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5);
- return decodeCLUIImmOperand(Inst, Imm, Address, Decoder);
-}
-
-static DecodeStatus
-decodeRVCInstrRdRs1UImmLog2XLenNonZero(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- Inst.addOperand(MCOperand::createReg(RISCV::X0));
- Inst.addOperand(Inst.getOperand(0));
-
- uint32_t UImm6 =
- fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5);
- return decodeUImmLog2XLenNonZeroOperand(Inst, UImm6, Address, Decoder);
-}
-
-static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- DecodeStatus S = MCDisassembler::Success;
- uint32_t Rd = fieldFromInstruction(Insn, 7, 5);
- uint32_t Rs2 = fieldFromInstruction(Insn, 2, 5);
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder)))
- return MCDisassembler::Fail;
- return S;
-}
-
-static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, uint32_t Insn,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- DecodeStatus S = MCDisassembler::Success;
- uint32_t Rd = fieldFromInstruction(Insn, 7, 5);
- uint32_t Rs2 = fieldFromInstruction(Insn, 2, 5);
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
- Inst.addOperand(Inst.getOperand(0));
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder)))
- return MCDisassembler::Fail;
- return S;
-}
-
static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
uint64_t Address,
const MCDisassembler *Decoder) {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 2c37c3b..82e3b5c 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -320,6 +320,7 @@ bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
MCFixup Fixup =
MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_RISCV_ALIGN);
F.setVarFixups({Fixup});
+ F.setLinkerRelaxable();
F.getParent()->setLinkerRelaxable();
return true;
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 7ad5d5f..bddea43 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -330,7 +330,6 @@ enum OperandType : unsigned {
OPERAND_UIMM32,
OPERAND_UIMM48,
OPERAND_UIMM64,
- OPERAND_ZERO,
OPERAND_THREE,
OPERAND_FOUR,
OPERAND_SIMM5,
diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index d4f5d8f..2f32e2a 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -293,7 +293,7 @@ void RISCVAsmPrinter::emitNTLHint(const MachineInstr *MI) {
MCInst Hint;
if (STI->hasStdExtZca())
- Hint.setOpcode(RISCV::C_ADD_HINT);
+ Hint.setOpcode(RISCV::C_ADD);
else
Hint.setOpcode(RISCV::ADD);
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index b1ab76a..9fc0d81 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1581,7 +1581,8 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Set the register and all its subregisters.
if (!MRI.def_empty(CSReg) || MRI.getUsedPhysRegsMask().test(CSReg)) {
SavedRegs.set(CSReg);
- llvm::for_each(SubRegs, [&](unsigned Reg) { return SavedRegs.set(Reg); });
+ for (unsigned Reg : SubRegs)
+ SavedRegs.set(Reg);
}
// Combine to super register if all of its subregisters are marked.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 64f9e3e..085064e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2859,9 +2859,6 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
case RISCVOp::OPERAND_UIMM16_NONZERO:
Ok = isUInt<16>(Imm) && (Imm != 0);
break;
- case RISCVOp::OPERAND_ZERO:
- Ok = Imm == 0;
- break;
case RISCVOp::OPERAND_THREE:
Ok = Imm == 3;
break;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index 8252a9b..c5551fb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -57,12 +57,6 @@ def simm6nonzero : RISCVOp,
}];
}
-def immzero : RISCVOp,
- ImmLeaf<XLenVT, [{return (Imm == 0);}]> {
- let ParserMatchClass = ImmZeroAsmOperand;
- let OperandType = "OPERAND_ZERO";
-}
-
def CLUIImmAsmOperand : AsmOperandClass {
let Name = "CLUIImm";
let RenderMethod = "addImmOperands";
@@ -272,7 +266,7 @@ class Bcz<bits<3> funct3, string OpcodeStr>
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class Shift_right<bits<2> funct2, string OpcodeStr>
: RVInst16CB<0b100, 0b01, (outs GPRC:$rd),
- (ins GPRC:$rs1, uimmlog2xlennonzero:$imm),
+ (ins GPRC:$rs1, uimmlog2xlen:$imm),
OpcodeStr, "$rs1, $imm"> {
let Constraints = "$rs1 = $rd";
let Inst{12} = imm{5};
@@ -402,17 +396,19 @@ def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">,
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
- (ins GPRNoX0:$rd, simm6nonzero:$imm),
+ (ins GPRNoX0:$rd, simm6:$imm),
"c.addi", "$rd, $imm">,
Sched<[WriteIALU, ReadIALU]> {
let Constraints = "$rd = $rd_wb";
}
-// Alternate syntax for c.nop. Converted to C_NOP by the assembler.
+// Alternate syntax for c.nop. Converted to C_NOP/C_NOP_HINT by the assembler.
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
-def PseudoC_ADDI_NOP : Pseudo<(outs GPRX0:$rd), (ins GPRX0:$rs1, immzero:$imm),
- [], "c.addi", "$rd, $imm">;
+def PseudoC_ADDI_NOP : Pseudo<(outs GPRX0:$rd), (ins GPRX0:$rs1, simm6:$imm),
+ [], "c.addi", "$rd, $imm"> {
+ let Constraints = "$rs1 = $rd";
+}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1,
DecoderNamespace = "RV32Only", Defs = [X1],
@@ -430,7 +426,7 @@ def C_ADDIW : RVInst16CI<0b001, 0b01, (outs GPRNoX0:$rd_wb),
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def C_LI : RVInst16CI<0b010, 0b01, (outs GPRNoX0:$rd), (ins simm6:$imm),
+def C_LI : RVInst16CI<0b010, 0b01, (outs GPR:$rd), (ins simm6:$imm),
"c.li", "$rd, $imm">,
Sched<[WriteIALU]>;
@@ -449,7 +445,7 @@ def C_ADDI16SP : RVInst16CI<0b011, 0b01, (outs SP:$rd_wb),
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def C_LUI : RVInst16CI<0b011, 0b01, (outs GPRNoX0X2:$rd),
+def C_LUI : RVInst16CI<0b011, 0b01, (outs GPRNoX2:$rd),
(ins c_lui_imm:$imm),
"c.lui", "$rd, $imm">,
Sched<[WriteIALU]>;
@@ -497,8 +493,8 @@ def C_BEQZ : Bcz<0b110, "c.beqz">, Sched<[WriteJmp, ReadJmp]>;
def C_BNEZ : Bcz<0b111, "c.bnez">, Sched<[WriteJmp, ReadJmp]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPRNoX0:$rd_wb),
- (ins GPRNoX0:$rd, uimmlog2xlennonzero:$imm),
+def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb),
+ (ins GPR:$rd, uimmlog2xlen:$imm),
"c.slli", "$rd, $imm">,
Sched<[WriteShiftImm, ReadShiftImm]> {
let Constraints = "$rd = $rd_wb";
@@ -544,7 +540,7 @@ def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1),
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isMoveReg = 1,
isAsCheapAsAMove = 1 in
-def C_MV : RVInst16CR<0b1000, 0b10, (outs GPRNoX0:$rs1), (ins GPRNoX0:$rs2),
+def C_MV : RVInst16CR<0b1000, 0b10, (outs GPR:$rs1), (ins GPRNoX0:$rs2),
"c.mv", "$rs1, $rs2">,
Sched<[WriteIALU, ReadIALU]>;
@@ -557,8 +553,8 @@ def C_JALR : RVInst16CR<0b1001, 0b10, (outs), (ins GPRNoX0:$rs1),
"c.jalr", "$rs1">, Sched<[WriteJalr, ReadJalr]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPRNoX0:$rd),
- (ins GPRNoX0:$rs1, GPRNoX0:$rs2),
+def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPR:$rd),
+ (ins GPR:$rs1, GPRNoX0:$rs2),
"c.add", "$rs1, $rs2">,
Sched<[WriteIALU, ReadIALU, ReadIALU]> {
let Constraints = "$rs1 = $rd";
@@ -616,81 +612,6 @@ def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm),
let rd = 0;
}
-def C_ADDI_HINT_IMM_ZERO : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
- (ins GPRNoX0:$rd, immzero:$imm),
- "c.addi", "$rd, $imm">,
- Sched<[WriteIALU, ReadIALU]> {
- let Constraints = "$rd = $rd_wb";
- let imm = 0;
- let DecoderMethod = "decodeRVCInstrRdRs1ImmZero";
-}
-
-def C_LI_HINT : RVInst16CI<0b010, 0b01, (outs GPRX0:$rd), (ins simm6:$imm),
- "c.li", "$rd, $imm">,
- Sched<[WriteIALU]> {
- let Inst{11-7} = 0;
- let DecoderMethod = "decodeRVCInstrRdSImm6";
-}
-
-def C_LUI_HINT : RVInst16CI<0b011, 0b01, (outs GPRX0:$rd),
- (ins c_lui_imm:$imm),
- "c.lui", "$rd, $imm">,
- Sched<[WriteIALU]> {
- let Inst{11-7} = 0;
- let DecoderMethod = "decodeRVCInstrRdCLUIImm";
-}
-
-def C_MV_HINT : RVInst16CR<0b1000, 0b10, (outs GPRX0:$rs1), (ins GPRNoX0:$rs2),
- "c.mv", "$rs1, $rs2">, Sched<[WriteIALU, ReadIALU]> {
- let Inst{11-7} = 0;
- let DecoderMethod = "decodeRVCInstrRdRs2";
-}
-
-def C_ADD_HINT : RVInst16CR<0b1001, 0b10, (outs GPRX0:$rd),
- (ins GPRX0:$rs1, GPRNoX0:$rs2),
- "c.add", "$rs1, $rs2">,
- Sched<[WriteIALU, ReadIALU, ReadIALU]> {
- let Constraints = "$rs1 = $rd";
- let Inst{11-7} = 0;
- let DecoderMethod = "decodeRVCInstrRdRs1Rs2";
-}
-
-def C_SLLI_HINT : RVInst16CI<0b000, 0b10, (outs GPRX0:$rd_wb),
- (ins GPRX0:$rd, uimmlog2xlennonzero:$imm),
- "c.slli", "$rd, $imm">,
- Sched<[WriteShiftImm, ReadShiftImm]> {
- let Constraints = "$rd = $rd_wb";
- let Inst{11-7} = 0;
- let DecoderMethod = "decodeRVCInstrRdRs1UImmLog2XLenNonZero";
-}
-
-def C_SLLI64_HINT : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), (ins GPR:$rd),
- "c.slli64", "$rd">,
- Sched<[WriteShiftImm, ReadShiftImm]> {
- let Constraints = "$rd = $rd_wb";
- let imm = 0;
-}
-
-def C_SRLI64_HINT : RVInst16CB<0b100, 0b01, (outs GPRC:$rd),
- (ins GPRC:$rs1),
- "c.srli64", "$rs1">,
- Sched<[WriteShiftImm, ReadShiftImm]> {
- let Constraints = "$rs1 = $rd";
- let Inst{6-2} = 0;
- let Inst{11-10} = 0b00;
- let Inst{12} = 0;
-}
-
-def C_SRAI64_HINT : RVInst16CB<0b100, 0b01, (outs GPRC:$rd),
- (ins GPRC:$rs1),
- "c.srai64", "$rs1">,
- Sched<[WriteShiftImm, ReadShiftImm]> {
- let Constraints = "$rs1 = $rd";
- let Inst{6-2} = 0;
- let Inst{11-10} = 0b01;
- let Inst{12} = 0;
-}
-
} // Predicates = [HasStdExtZca], hasSideEffects = 0, mayLoad = 0,
// mayStore = 0
@@ -699,15 +620,17 @@ def C_SRAI64_HINT : RVInst16CB<0b100, 0b01, (outs GPRC:$rd),
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtZca] in {
-// Just a different syntax for the c.nop hint: c.addi x0, simm6 vs c.nop simm6.
-def : InstAlias<"c.addi x0, $imm", (C_NOP_HINT simm6nonzero:$imm), 0>;
+// Legacy aliases.
+def : InstAlias<"c.slli64 $rd", (C_SLLI GPR:$rd, 0), 0>;
+def : InstAlias<"c.srli64 $rs1", (C_SRLI GPRC:$rs1, 0), 0>;
+def : InstAlias<"c.srai64 $rs1", (C_SRAI GPRC:$rs1, 0), 0>;
}
let Predicates = [HasStdExtC, HasStdExtZihintntl] in {
-def : InstAlias<"c.ntl.p1", (C_ADD_HINT X0, X2)>;
-def : InstAlias<"c.ntl.pall", (C_ADD_HINT X0, X3)>;
-def : InstAlias<"c.ntl.s1", (C_ADD_HINT X0, X4)>;
-def : InstAlias<"c.ntl.all", (C_ADD_HINT X0, X5)>;
+def : InstAlias<"c.ntl.p1", (C_ADD X0, X2)>;
+def : InstAlias<"c.ntl.pall", (C_ADD X0, X3)>;
+def : InstAlias<"c.ntl.s1", (C_ADD X0, X4)>;
+def : InstAlias<"c.ntl.all", (C_ADD X0, X5)>;
} // Predicates = [HasStdExtC, HasStdExtZihintntl]
let EmitPriority = 0 in {
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index e87f452..ccb39e8 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -268,6 +268,11 @@ def GPRNoX0 : GPRRegisterClass<(sub GPR, X0)> {
let DiagnosticString = "register must be a GPR excluding zero (x0)";
}
+def GPRNoX2 : GPRRegisterClass<(sub GPR, X2)> {
+ let DiagnosticType = "InvalidRegClassGPRNoX2";
+ let DiagnosticString = "register must be a GPR excluding sp (x2)";
+}
+
def GPRNoX0X2 : GPRRegisterClass<(sub GPR, X0, X2)> {
let DiagnosticType = "InvalidRegClassGPRNoX0X2";
let DiagnosticString = "register must be a GPR excluding zero (x0) and sp (x2)";
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 1624f12a..fd634b5 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -979,12 +979,10 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) const {
- // The interleaved memory access pass will lower (de)interleave ops combined
- // with an adjacent appropriate memory to vlseg/vsseg intrinsics. vlseg/vsseg
- // only support masking per-iteration (i.e. condition), not per-segment (i.e.
- // gap).
- // TODO: Support masked interleaved access for fixed length vector.
- if ((isa<ScalableVectorType>(VecTy) || !UseMaskForCond) && !UseMaskForGaps &&
+ // The interleaved memory access pass will lower interleaved memory ops (i.e
+ // a load and store followed by a specific shuffle) to vlseg/vsseg
+ // intrinsics.
+ if (!UseMaskForCond && !UseMaskForGaps &&
Factor <= TLI->getMaxSupportedInterleaveFactor()) {
auto *VTy = cast<VectorType>(VecTy);
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VTy);
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 05d504c..d62d99c 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -398,10 +398,6 @@ public:
bool enableInterleavedAccessVectorization() const override { return true; }
- bool enableMaskedInterleavedAccessVectorization() const override {
- return ST->hasVInstructions();
- }
-
unsigned getMinTripCountTailFoldingThreshold() const override;
enum RISCVRegisterClass { GPRRC, FPRRC, VRRC };
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp
index 3ef6030..72bb372 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp
@@ -69,8 +69,8 @@ void SystemZHLASMAsmStreamer::EmitEOL() {
void SystemZHLASMAsmStreamer::changeSection(MCSection *Section,
uint32_t Subsection) {
- Section->printSwitchToSection(*MAI, getContext().getTargetTriple(), OS,
- Subsection);
+ MAI->printSwitchToSection(*Section, Subsection,
+ getContext().getTargetTriple(), OS);
MCStreamer::changeSection(Section, Subsection);
}
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 19c9e9c..6ae69a4 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -900,7 +900,8 @@ public:
bool checkDataSection() {
if (CurrentState != DataSection) {
- auto *WS = cast<MCSectionWasm>(getStreamer().getCurrentSectionOnly());
+ auto *WS = static_cast<const MCSectionWasm *>(
+ getStreamer().getCurrentSectionOnly());
if (WS && WS->isText())
return error("data directive must occur in a data segment: ",
Lexer.getTok());
@@ -1218,7 +1219,8 @@ public:
void doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) override {
// Code below only applies to labels in text sections.
- auto *CWS = cast<MCSectionWasm>(getStreamer().getCurrentSectionOnly());
+ auto *CWS = static_cast<const MCSectionWasm *>(
+ getStreamer().getCurrentSectionOnly());
if (!CWS->isText())
return;
diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td
index 13603f8..a606209 100644
--- a/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -71,6 +71,7 @@ def FeatureReferenceTypes :
SubtargetFeature<"reference-types", "HasReferenceTypes", "true",
"Enable reference types">;
+def FeatureGC : SubtargetFeature<"gc", "HasGC", "true", "Enable wasm gc">;
def FeatureRelaxedSIMD :
SubtargetFeature<"relaxed-simd", "SIMDLevel", "RelaxedSIMD",
"Enable relaxed-simd instructions">;
@@ -136,13 +137,13 @@ def : ProcessorModel<"lime1", NoSchedModel,
// Latest and greatest experimental version of WebAssembly. Bugs included!
def : ProcessorModel<"bleeding-edge", NoSchedModel,
- [FeatureAtomics, FeatureBulkMemory, FeatureBulkMemoryOpt,
- FeatureCallIndirectOverlong, FeatureExceptionHandling,
- FeatureExtendedConst, FeatureFP16, FeatureMultiMemory,
- FeatureMultivalue, FeatureMutableGlobals,
- FeatureNontrappingFPToInt, FeatureRelaxedSIMD,
- FeatureReferenceTypes, FeatureSIMD128, FeatureSignExt,
- FeatureTailCall]>;
+ [FeatureAtomics, FeatureBulkMemory, FeatureBulkMemoryOpt,
+ FeatureCallIndirectOverlong, FeatureExceptionHandling,
+ FeatureExtendedConst, FeatureFP16, FeatureMultiMemory,
+ FeatureMultivalue, FeatureMutableGlobals,
+ FeatureNontrappingFPToInt, FeatureRelaxedSIMD,
+ FeatureReferenceTypes, FeatureGC, FeatureSIMD128,
+ FeatureSignExt, FeatureTailCall]>;
//===----------------------------------------------------------------------===//
// Target Declaration
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 11936a3..cd434f7 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -288,7 +288,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Expand float operations supported for scalars but not SIMD
for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
- ISD::FEXP, ISD::FEXP2})
+ ISD::FEXP, ISD::FEXP2, ISD::FEXP10})
for (auto T : {MVT::v4f32, MVT::v2f64})
setOperationAction(Op, T, Expand);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index b5e723e..2b632fd 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -76,6 +76,9 @@ def HasReferenceTypes :
Predicate<"Subtarget->hasReferenceTypes()">,
AssemblerPredicate<(all_of FeatureReferenceTypes), "reference-types">;
+def HasGC : Predicate<"Subtarget->hasGC()">,
+ AssemblerPredicate<(all_of FeatureGC), "gc">;
+
def HasRelaxedSIMD :
Predicate<"Subtarget->hasRelaxedSIMD()">,
AssemblerPredicate<(all_of FeatureRelaxedSIMD), "relaxed-simd">;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
index 40b87a0..fc82e5b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
@@ -36,13 +36,10 @@ multiclass REF_I<WebAssemblyRegClass rc, ValueType vt, string ht> {
Requires<[HasReferenceTypes]>;
}
-defm REF_TEST_FUNCREF :
- I<(outs I32: $res),
- (ins TypeIndex:$type, FUNCREF: $ref),
- (outs),
- (ins TypeIndex:$type),
- [],
- "ref.test\t$type, $ref", "ref.test $type", 0xfb14>;
+defm REF_TEST_FUNCREF : I<(outs I32:$res), (ins TypeIndex:$type, FUNCREF:$ref),
+ (outs), (ins TypeIndex:$type), [],
+ "ref.test\t$type, $ref", "ref.test $type", 0xfb14>,
+ Requires<[HasGC]>;
defm "" : REF_I<FUNCREF, funcref, "func">;
defm "" : REF_I<EXTERNREF, externref, "extern">;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index 40ea48a..a3ce40f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -43,6 +43,11 @@ WebAssemblySubtarget::initializeSubtargetDependencies(StringRef CPU,
Bits.set(WebAssembly::FeatureBulkMemoryOpt);
}
+ // gc implies reference-types
+ if (HasGC) {
+ HasReferenceTypes = true;
+ }
+
// reference-types implies call-indirect-overlong
if (HasReferenceTypes) {
HasCallIndirectOverlong = true;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index 591ce256..f814274 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -51,6 +51,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
bool HasMutableGlobals = false;
bool HasNontrappingFPToInt = false;
bool HasReferenceTypes = false;
+ bool HasGC = false;
bool HasSignExt = false;
bool HasTailCall = false;
bool HasWideArithmetic = false;
@@ -107,6 +108,7 @@ public:
bool hasMutableGlobals() const { return HasMutableGlobals; }
bool hasNontrappingFPToInt() const { return HasNontrappingFPToInt; }
bool hasReferenceTypes() const { return HasReferenceTypes; }
+ bool hasGC() const { return HasGC; }
bool hasRelaxedSIMD() const { return SIMDLevel >= RelaxedSIMD; }
bool hasSignExt() const { return HasSignExt; }
bool hasSIMD128() const { return SIMDLevel >= SIMD128; }
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 8213e51..d7671ed 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -4803,7 +4803,7 @@ bool X86AsmParser::parseDirectiveEven(SMLoc L) {
getStreamer().initSections(false, getSTI());
Section = getStreamer().getCurrentSectionOnly();
}
- if (Section->useCodeAlign())
+ if (getContext().getAsmInfo()->useCodeAlign(*Section))
getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0);
else
getStreamer().emitValueToAlignment(Align(2), 0, 1, 0);
diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def
index 620526ff..3f2a433 100644
--- a/llvm/lib/Target/X86/X86PassRegistry.def
+++ b/llvm/lib/Target/X86/X86PassRegistry.def
@@ -12,8 +12,52 @@
// NOTE: NO INCLUDE GUARD DESIRED!
+#ifndef DUMMY_FUNCTION_PASS
+#define DUMMY_FUNCTION_PASS(NAME, CREATE_PASS)
+#endif
+DUMMY_FUNCTION_PASS("lower-amx-intrinsics", X86LowerAMXIntrinsics(*this))
+DUMMY_FUNCTION_PASS("lower-amx-type", X86LowerAMXTypePass(*this))
+DUMMY_FUNCTION_PASS("x86-partial-reduction", X86PartialReduction())
+DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass())
+#undef DUMMY_FUNCTION_PASS
+
#ifndef MACHINE_FUNCTION_PASS
#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
#endif
MACHINE_FUNCTION_PASS("x86-isel", X86ISelDAGToDAGPass(*this))
#undef MACHINE_FUNCTION_PASS
+
+#ifndef DUMMY_MACHINE_FUNCTION_PASS
+#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME)
+#endif
+DUMMY_MACHINE_FUNCTION_PASS("x86-avoid-SFB", X86AvoidSFBPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-avoid-trailing-call", X86AvoidTrailingCallPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-cf-opt", X86CallFrameOptimization())
+DUMMY_MACHINE_FUNCTION_PASS("x86-cmov-conversion", X86CmovConverterPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-codege", FPS())
+DUMMY_MACHINE_FUNCTION_PASS("x86-compress-evex", CompressEVEXPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-domain-reassignment", X86DomainReassignment())
+DUMMY_MACHINE_FUNCTION_PASS("x86-dyn-alloca-expander", X86DynAllocaExpander())
+DUMMY_MACHINE_FUNCTION_PASS("x86-execution-domain-fix", X86ExecutionDomainFix())
+DUMMY_MACHINE_FUNCTION_PASS("fastpretileconfig", X86FastPreTileConfig())
+DUMMY_MACHINE_FUNCTION_PASS("fasttileconfig", X86FastTileConfig())
+DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-LEAs", FixupLEAPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-bw-inst", FixupBWInstPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-inst-tuning", X86FixupInstTuningPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-setcc", X86FixupSetCCPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-vector-constants", X86FixupVectorConstantsPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-flags-copy-lowering", X86FlagsCopyLoweringPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-lower-tile-copy", X86LowerTileCopy())
+DUMMY_MACHINE_FUNCTION_PASS("x86-lvi-load", X86LoadValueInjectionLoadHardeningPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-lvi-ret", X86LoadValueInjectionRetHardeningPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-optimize-LEAs", X86OptimizeLEAPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-pseudo", X86ExpandPseudo())
+DUMMY_MACHINE_FUNCTION_PASS("x86-return-thunks", X86ReturnThunks())
+DUMMY_MACHINE_FUNCTION_PASS("x86-seses", X86SpeculativeExecutionSideEffectSuppression())
+DUMMY_MACHINE_FUNCTION_PASS("x86-slh", X86SpeculativeLoadHardeningPass())
+DUMMY_MACHINE_FUNCTION_PASS("x86-suppress-apx-for-relocation", X86SuppressAPXForRelocationPass())
+DUMMY_MACHINE_FUNCTION_PASS("tile-pre-config", X86PreTileConfig())
+DUMMY_MACHINE_FUNCTION_PASS("tileconfig", X86TileConfig())
+DUMMY_MACHINE_FUNCTION_PASS("x86-wineh-unwindv2", X86WinEHUnwindV2())
+DUMMY_MACHINE_FUNCTION_PASS("x86argumentstackrebase", X86ArgumentStackSlotPass())
+#undef DUMMY_MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 37a7b37..90791fc 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1838,14 +1838,15 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry AVX512BWShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v32i16, { 1, 1, 1, 1 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v32f16, { 1, 1, 1, 1 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v64i8, { 1, 1, 1, 1 } }, // vpbroadcastb
+ { TTI::SK_Broadcast, MVT::v32i16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v32f16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v64i8, { 1, 3, 1, 1 } }, // vpbroadcastb
- { TTI::SK_Reverse, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw
- { TTI::SK_Reverse, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw
+ { TTI::SK_Reverse, MVT::v32i16, { 2, 6, 2, 4 } }, // vpermw
+ { TTI::SK_Reverse, MVT::v32f16, { 2, 6, 2, 4 } }, // vpermw
{ TTI::SK_Reverse, MVT::v16i16, { 2, 2, 2, 2 } }, // vpermw
- { TTI::SK_Reverse, MVT::v64i8, { 2, 2, 2, 2 } }, // pshufb + vshufi64x2
+ { TTI::SK_Reverse, MVT::v16f16, { 2, 2, 2, 2 } }, // vpermw
+ { TTI::SK_Reverse, MVT::v64i8, { 2, 9, 2, 3 } }, // pshufb + vshufi64x2
{ TTI::SK_PermuteSingleSrc, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw
{ TTI::SK_PermuteSingleSrc, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw
@@ -1874,18 +1875,25 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry AVX512ShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v8f64, { 1, 1, 1, 1 } }, // vbroadcastsd
- {TTI::SK_Broadcast, MVT::v16f32, { 1, 1, 1, 1 } }, // vbroadcastss
- {TTI::SK_Broadcast, MVT::v8i64, { 1, 1, 1, 1 } }, // vpbroadcastq
- {TTI::SK_Broadcast, MVT::v16i32, { 1, 1, 1, 1 } }, // vpbroadcastd
- {TTI::SK_Broadcast, MVT::v32i16, { 1, 1, 1, 1 } }, // vpbroadcastw
- {TTI::SK_Broadcast, MVT::v32f16, { 1, 1, 1, 1 } }, // vpbroadcastw
- {TTI::SK_Broadcast, MVT::v64i8, { 1, 1, 1, 1 } }, // vpbroadcastb
-
- {TTI::SK_Reverse, MVT::v8f64, { 1, 3, 1, 1 } }, // vpermpd
- {TTI::SK_Reverse, MVT::v16f32, { 1, 3, 1, 1 } }, // vpermps
- {TTI::SK_Reverse, MVT::v8i64, { 1, 3, 1, 1 } }, // vpermq
- {TTI::SK_Reverse, MVT::v16i32, { 1, 3, 1, 1 } }, // vpermd
+ {TTI::SK_Broadcast, MVT::v8f64, { 1, 3, 1, 1 } }, // vbroadcastsd
+ {TTI::SK_Broadcast, MVT::v4f64, { 1, 3, 1, 1 } }, // vbroadcastsd
+ {TTI::SK_Broadcast, MVT::v16f32, { 1, 3, 1, 1 } }, // vbroadcastss
+ {TTI::SK_Broadcast, MVT::v8f32, { 1, 3, 1, 1 } }, // vbroadcastss
+ {TTI::SK_Broadcast, MVT::v8i64, { 1, 3, 1, 1 } }, // vpbroadcastq
+ {TTI::SK_Broadcast, MVT::v4i64, { 1, 3, 1, 1 } }, // vpbroadcastq
+ {TTI::SK_Broadcast, MVT::v16i32, { 1, 3, 1, 1 } }, // vpbroadcastd
+ {TTI::SK_Broadcast, MVT::v8i32, { 1, 3, 1, 1 } }, // vpbroadcastd
+ {TTI::SK_Broadcast, MVT::v32i16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v16i16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v32f16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v16f16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v64i8, { 1, 3, 1, 1 } }, // vpbroadcastb
+ {TTI::SK_Broadcast, MVT::v32i8, { 1, 3, 1, 1 }}, // vpbroadcastb
+
+ {TTI::SK_Reverse, MVT::v8f64, { 1, 5, 2, 3 } }, // vpermpd
+ {TTI::SK_Reverse, MVT::v16f32, { 1, 3, 2, 3 } }, // vpermps
+ {TTI::SK_Reverse, MVT::v8i64, { 1, 5, 2, 3 } }, // vpermq
+ {TTI::SK_Reverse, MVT::v16i32, { 1, 3, 2, 3 } }, // vpermd
{TTI::SK_Reverse, MVT::v32i16, { 7, 7, 7, 7 } }, // per mca
{TTI::SK_Reverse, MVT::v32f16, { 7, 7, 7, 7 } }, // per mca
{TTI::SK_Reverse, MVT::v64i8, { 7, 7, 7, 7 } }, // per mca
@@ -1973,21 +1981,24 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry AVX2ShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v4f64, { 1, 1, 1, 1 } }, // vbroadcastpd
- { TTI::SK_Broadcast, MVT::v8f32, { 1, 1, 1, 1 } }, // vbroadcastps
- { TTI::SK_Broadcast, MVT::v4i64, { 1, 1, 1, 1 } }, // vpbroadcastq
- { TTI::SK_Broadcast, MVT::v8i32, { 1, 1, 1, 1 } }, // vpbroadcastd
- { TTI::SK_Broadcast, MVT::v16i16, { 1, 1, 1, 1 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v16f16, { 1, 1, 1, 1 } }, // vpbroadcastw
- { TTI::SK_Broadcast, MVT::v32i8, { 1, 1, 1, 1 } }, // vpbroadcastb
-
- { TTI::SK_Reverse, MVT::v4f64, { 1, 1, 1, 1 } }, // vpermpd
- { TTI::SK_Reverse, MVT::v8f32, { 1, 1, 1, 1 } }, // vpermps
- { TTI::SK_Reverse, MVT::v4i64, { 1, 1, 1, 1 } }, // vpermq
- { TTI::SK_Reverse, MVT::v8i32, { 1, 1, 1, 1 } }, // vpermd
- { TTI::SK_Reverse, MVT::v16i16, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
- { TTI::SK_Reverse, MVT::v16f16, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
- { TTI::SK_Reverse, MVT::v32i8, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
+ { TTI::SK_Broadcast, MVT::v4f64, { 1, 3, 1, 2 } }, // vbroadcastpd
+ { TTI::SK_Broadcast, MVT::v8f32, { 1, 3, 1, 2 } }, // vbroadcastps
+ { TTI::SK_Broadcast, MVT::v4i64, { 1, 3, 1, 2 } }, // vpbroadcastq
+ { TTI::SK_Broadcast, MVT::v8i32, { 1, 3, 1, 2 } }, // vpbroadcastd
+ { TTI::SK_Broadcast, MVT::v16i16, { 1, 3, 1, 2 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v8i16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v16f16, { 1, 3, 1, 2 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v8f16, { 1, 3, 1, 1 } }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v32i8, { 1, 3, 1, 2 } }, // vpbroadcastb
+ { TTI::SK_Broadcast, MVT::v16i8, { 1, 3, 1, 1 } }, // vpbroadcastb
+
+ { TTI::SK_Reverse, MVT::v4f64, { 1, 6, 1, 2 } }, // vpermpd
+ { TTI::SK_Reverse, MVT::v8f32, { 2, 7, 2, 4 } }, // vpermps
+ { TTI::SK_Reverse, MVT::v4i64, { 1, 6, 1, 2 } }, // vpermq
+ { TTI::SK_Reverse, MVT::v8i32, { 2, 7, 2, 4 } }, // vpermd
+ { TTI::SK_Reverse, MVT::v16i16, { 2, 9, 2, 4 } }, // vperm2i128 + pshufb
+ { TTI::SK_Reverse, MVT::v16f16, { 2, 9, 2, 4 } }, // vperm2i128 + pshufb
+ { TTI::SK_Reverse, MVT::v32i8, { 2, 9, 2, 4 } }, // vperm2i128 + pshufb
{ TTI::SK_Select, MVT::v16i16, { 1, 1, 1, 1 } }, // vpblendvb
{ TTI::SK_Select, MVT::v16f16, { 1, 1, 1, 1 } }, // vpblendvb
@@ -2077,23 +2088,23 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry AVX1ShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vpermilpd
- {TTI::SK_Broadcast, MVT::v8f32, {2,2,2,2}}, // vperm2f128 + vpermilps
- {TTI::SK_Broadcast, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vpermilpd
- {TTI::SK_Broadcast, MVT::v8i32, {2,2,2,2}}, // vperm2f128 + vpermilps
- {TTI::SK_Broadcast, MVT::v16i16, {3,3,3,3}}, // vpshuflw + vpshufd + vinsertf128
- {TTI::SK_Broadcast, MVT::v16f16, {3,3,3,3}}, // vpshuflw + vpshufd + vinsertf128
- {TTI::SK_Broadcast, MVT::v32i8, {2,2,2,2}}, // vpshufb + vinsertf128
-
- {TTI::SK_Reverse, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vpermilpd
- {TTI::SK_Reverse, MVT::v8f32, {2,2,2,2}}, // vperm2f128 + vpermilps
- {TTI::SK_Reverse, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vpermilpd
- {TTI::SK_Reverse, MVT::v8i32, {2,2,2,2}}, // vperm2f128 + vpermilps
- {TTI::SK_Reverse, MVT::v16i16, {4,4,4,4}}, // vextractf128 + 2*pshufb
+ {TTI::SK_Broadcast, MVT::v4f64, {2,3,2,3}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Broadcast, MVT::v8f32, {2,3,2,3}}, // vperm2f128 + vpermilps
+ {TTI::SK_Broadcast, MVT::v4i64, {2,3,2,3}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Broadcast, MVT::v8i32, {2,3,2,3}}, // vperm2f128 + vpermilps
+ {TTI::SK_Broadcast, MVT::v16i16, {2,3,3,4}}, // vpshuflw + vpshufd + vinsertf128
+ {TTI::SK_Broadcast, MVT::v16f16, {2,3,3,4}}, // vpshuflw + vpshufd + vinsertf128
+ {TTI::SK_Broadcast, MVT::v32i8, {3,4,3,6}}, // vpshufb + vinsertf128
+
+ {TTI::SK_Reverse, MVT::v4f64, {2,6,2,2}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Reverse, MVT::v8f32, {2,7,2,4}}, // vperm2f128 + vpermilps
+ {TTI::SK_Reverse, MVT::v4i64, {2,6,2,2}}, // vperm2f128 + vpermilpd
+ {TTI::SK_Reverse, MVT::v8i32, {2,7,2,4}}, // vperm2f128 + vpermilps
+ {TTI::SK_Reverse, MVT::v16i16, {2,9,5,5}}, // vextractf128 + 2*pshufb
// + vinsertf128
- {TTI::SK_Reverse, MVT::v16f16, {4,4,4,4}}, // vextractf128 + 2*pshufb
+ {TTI::SK_Reverse, MVT::v16f16, {2,9,5,5}}, // vextractf128 + 2*pshufb
// + vinsertf128
- {TTI::SK_Reverse, MVT::v32i8, {4,4,4,4}}, // vextractf128 + 2*pshufb
+ {TTI::SK_Reverse, MVT::v32i8, {2,9,5,5}}, // vextractf128 + 2*pshufb
// + vinsertf128
{TTI::SK_Select, MVT::v4i64, {1,1,1,1}}, // vblendpd
@@ -2156,13 +2167,13 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return LT.first * *KindCost;
static const CostKindTblEntry SSSE3ShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v8i16, {1, 1, 1, 1}}, // pshufb
- {TTI::SK_Broadcast, MVT::v8f16, {1, 1, 1, 1}}, // pshufb
- {TTI::SK_Broadcast, MVT::v16i8, {1, 1, 1, 1}}, // pshufb
+ {TTI::SK_Broadcast, MVT::v8i16, {1, 3, 2, 2}}, // pshufb
+ {TTI::SK_Broadcast, MVT::v8f16, {1, 3, 2, 2}}, // pshufb
+ {TTI::SK_Broadcast, MVT::v16i8, {1, 3, 2, 2}}, // pshufb
- {TTI::SK_Reverse, MVT::v8i16, {1, 1, 1, 1}}, // pshufb
- {TTI::SK_Reverse, MVT::v8f16, {1, 1, 1, 1}}, // pshufb
- {TTI::SK_Reverse, MVT::v16i8, {1, 1, 1, 1}}, // pshufb
+ {TTI::SK_Reverse, MVT::v8i16, {1, 2, 1, 2}}, // pshufb
+ {TTI::SK_Reverse, MVT::v8f16, {1, 2, 1, 2}}, // pshufb
+ {TTI::SK_Reverse, MVT::v16i8, {1, 2, 1, 2}}, // pshufb
{TTI::SK_Select, MVT::v8i16, {3, 3, 3, 3}}, // 2*pshufb + por
{TTI::SK_Select, MVT::v8f16, {3, 3, 3, 3}}, // 2*pshufb + por
@@ -2192,16 +2203,16 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_Broadcast, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
{TTI::SK_Broadcast, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
{TTI::SK_Broadcast, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
- {TTI::SK_Broadcast, MVT::v8i16, {2, 2, 2, 2}}, // pshuflw + pshufd
- {TTI::SK_Broadcast, MVT::v8f16, {2, 2, 2, 2}}, // pshuflw + pshufd
- {TTI::SK_Broadcast, MVT::v16i8, {3, 3, 3, 3}}, // unpck + pshuflw + pshufd
+ {TTI::SK_Broadcast, MVT::v8i16, {1, 2, 2, 2}}, // pshuflw + pshufd
+ {TTI::SK_Broadcast, MVT::v8f16, {1, 2, 2, 2}}, // pshuflw + pshufd
+ {TTI::SK_Broadcast, MVT::v16i8, {2, 3, 3, 4}}, // unpck + pshuflw + pshufd
{TTI::SK_Reverse, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
{TTI::SK_Reverse, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
{TTI::SK_Reverse, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
- {TTI::SK_Reverse, MVT::v8i16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
- {TTI::SK_Reverse, MVT::v8f16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
- {TTI::SK_Reverse, MVT::v16i8, {9, 9, 9, 9}}, // 2*pshuflw + 2*pshufhw
+ {TTI::SK_Reverse, MVT::v8i16, {2, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
+ {TTI::SK_Reverse, MVT::v8f16, {2, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
+ {TTI::SK_Reverse, MVT::v16i8, {5, 6,11,11}}, // 2*pshuflw + 2*pshufhw
// + 2*pshufd + 2*unpck + packus
{TTI::SK_Select, MVT::v2i64, {1, 1, 1, 1}}, // movsd
diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp
index 17c9833..d6afb8a 100644
--- a/llvm/lib/TargetParser/RISCVISAInfo.cpp
+++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp
@@ -858,16 +858,15 @@ void RISCVISAInfo::updateImplication() {
StringRef ExtName = WorkList.pop_back_val();
auto Range = std::equal_range(std::begin(ImpliedExts),
std::end(ImpliedExts), ExtName);
- std::for_each(Range.first, Range.second,
- [&](const ImpliedExtsEntry &Implied) {
- const char *ImpliedExt = Implied.ImpliedExt;
- auto [It, Inserted] = Exts.try_emplace(ImpliedExt);
- if (!Inserted)
- return;
- auto Version = findDefaultVersion(ImpliedExt);
- It->second = *Version;
- WorkList.push_back(ImpliedExt);
- });
+ for (const ImpliedExtsEntry &Implied : llvm::make_range(Range)) {
+ const char *ImpliedExt = Implied.ImpliedExt;
+ auto [It, Inserted] = Exts.try_emplace(ImpliedExt);
+ if (!Inserted)
+ continue;
+ auto Version = findDefaultVersion(ImpliedExt);
+ It->second = *Version;
+ WorkList.push_back(ImpliedExt);
+ }
}
// Add Zcd if C and D are enabled.
diff --git a/llvm/lib/TextAPI/SymbolSet.cpp b/llvm/lib/TextAPI/SymbolSet.cpp
index 2e0b416..f21a061 100644
--- a/llvm/lib/TextAPI/SymbolSet.cpp
+++ b/llvm/lib/TextAPI/SymbolSet.cpp
@@ -11,6 +11,11 @@
using namespace llvm;
using namespace llvm::MachO;
+SymbolSet::~SymbolSet() {
+ for (auto &[Key, Sym] : Symbols)
+ Sym->~Symbol();
+}
+
Symbol *SymbolSet::addGlobalImpl(EncodeKind Kind, StringRef Name,
SymbolFlags Flags) {
Name = copyString(Name);
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 0164fcd..81de0a5 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -97,6 +97,8 @@ STATISTIC(MissingAllocForContextId,
"Number of missing alloc nodes for context ids");
STATISTIC(SkippedCallsCloning,
"Number of calls skipped during cloning due to unexpected operand");
+STATISTIC(MismatchedCloneAssignments,
+ "Number of callsites assigned to call multiple non-matching clones");
static cl::opt<std::string> DotFilePathPrefix(
"memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
@@ -2060,6 +2062,20 @@ static bool isMemProfClone(const Function &F) {
return F.getName().contains(MemProfCloneSuffix);
}
+// Return the clone number of the given function by extracting it from the
+// memprof suffix. Assumes the caller has already confirmed it is a memprof
+// clone.
+static unsigned getMemProfCloneNum(const Function &F) {
+ assert(isMemProfClone(F));
+ auto Pos = F.getName().find_last_of('.');
+ assert(Pos > 0);
+ unsigned CloneNo;
+ bool Err = F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
+ assert(!Err);
+ (void)Err;
+ return CloneNo;
+}
+
std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
const Instruction *Call,
unsigned CloneNo) const {
@@ -3979,7 +3995,22 @@ IndexCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const {
void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
FuncInfo CalleeFunc) {
- if (CalleeFunc.cloneNo() > 0)
+ auto *CurF = cast<CallBase>(CallerCall.call())->getCalledFunction();
+ auto NewCalleeCloneNo = CalleeFunc.cloneNo();
+ if (isMemProfClone(*CurF)) {
+ // If we already assigned this callsite to call a specific non-default
+ // clone (i.e. not the original function which is clone 0), ensure that we
+ // aren't trying to now update it to call a different clone, which is
+ // indicative of a bug in the graph or function assignment.
+ auto CurCalleeCloneNo = getMemProfCloneNum(*CurF);
+ if (CurCalleeCloneNo != NewCalleeCloneNo) {
+ LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was "
+ << CurCalleeCloneNo << " now " << NewCalleeCloneNo
+ << "\n");
+ MismatchedCloneAssignments++;
+ }
+ }
+ if (NewCalleeCloneNo > 0)
cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
OREGetter(CallerCall.call()->getFunction())
.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call())
@@ -3995,7 +4026,19 @@ void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
assert(CI &&
"Caller cannot be an allocation which should not have profiled calls");
assert(CI->Clones.size() > CallerCall.cloneNo());
- CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
+ auto NewCalleeCloneNo = CalleeFunc.cloneNo();
+ auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
+ // If we already assigned this callsite to call a specific non-default
+ // clone (i.e. not the original function which is clone 0), ensure that we
+ // aren't trying to now update it to call a different clone, which is
+ // indicative of a bug in the graph or function assignment.
+ if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
+ LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was "
+ << CurCalleeCloneNo << " now " << NewCalleeCloneNo
+ << "\n");
+ MismatchedCloneAssignments++;
+ }
+ CurCalleeCloneNo = NewCalleeCloneNo;
}
// Update the debug information attached to NewFunc to use the clone Name. Note
@@ -4703,6 +4746,19 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
// where the callers were assigned to different clones of a function.
}
+ auto FindFirstAvailFuncClone = [&]() {
+ // Find first function in FuncClonesToCallMap without an assigned
+ // clone of this callsite Node. We should always have one
+ // available at this point due to the earlier cloning when the
+ // FuncClonesToCallMap size was smaller than the clone number.
+ for (auto &CF : FuncClonesToCallMap) {
+ if (!FuncCloneToCurNodeCloneMap.count(CF.first))
+ return CF.first;
+ }
+ assert(false &&
+ "Expected an available func clone for this callsite clone");
+ };
+
// See if we can use existing function clone. Walk through
// all caller edges to see if any have already been assigned to
// a clone of this callsite's function. If we can use it, do so. If not,
@@ -4819,16 +4875,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
// clone of OrigFunc for another caller during this iteration over
// its caller edges.
if (!FuncCloneAssignedToCurCallsiteClone) {
- // Find first function in FuncClonesToCallMap without an assigned
- // clone of this callsite Node. We should always have one
- // available at this point due to the earlier cloning when the
- // FuncClonesToCallMap size was smaller than the clone number.
- for (auto &CF : FuncClonesToCallMap) {
- if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
- FuncCloneAssignedToCurCallsiteClone = CF.first;
- break;
- }
- }
+ FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
assert(FuncCloneAssignedToCurCallsiteClone);
// Assign Clone to FuncCloneAssignedToCurCallsiteClone
AssignCallsiteCloneToFuncClone(
@@ -4842,6 +4889,31 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
FuncCloneAssignedToCurCallsiteClone);
}
}
+ // If we didn't assign a function clone to this callsite clone yet, e.g.
+ // none of its callers has a non-null call, do the assignment here.
+ // We want to ensure that every callsite clone is assigned to some
+ // function clone, so that the call updates below work as expected.
+ // In particular if this is the original callsite, we want to ensure it
+ // is assigned to the original function, otherwise the original function
+ // will appear available for assignment to other callsite clones,
+ // leading to unintended effects. For one, the unknown and not updated
+ // callers will call into cloned paths leading to the wrong hints,
+ // because they still call the original function (clone 0). Also,
+ // because all callsites start out as being clone 0 by default, we can't
+ // easily distinguish between callsites explicitly assigned to clone 0
+ // vs those never assigned, which can lead to multiple updates of the
+ // calls when invoking updateCall below, with mismatched clone values.
+ // TODO: Add a flag to the callsite nodes or some other mechanism to
+ // better distinguish and identify callsite clones that are not getting
+ // assigned to function clones as expected.
+ if (!FuncCloneAssignedToCurCallsiteClone) {
+ FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
+ assert(FuncCloneAssignedToCurCallsiteClone &&
+ "No available func clone for this callsite clone");
+ AssignCallsiteCloneToFuncClone(
+ FuncCloneAssignedToCurCallsiteClone, Call, Clone,
+ /*IsAlloc=*/AllocationCallToContextNodeMap.contains(Call));
+ }
}
if (VerifyCCG) {
checkNode<DerivedCCG, FuncTy, CallTy>(Node);
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index fe3315e..4e5a8d1 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -1459,8 +1459,6 @@ void HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
size_t Size = memtag::getAllocaSizeInBytes(*AI);
size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
- Value *AICast = IRB.CreatePointerCast(AI, PtrTy);
-
auto HandleLifetime = [&](IntrinsicInst *II) {
// Set the lifetime intrinsic to cover the whole alloca. This reduces the
// set of assumptions we need to make about the lifetime. Without this we
@@ -1473,14 +1471,13 @@ void HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
// one set of start / end in any execution (i.e. the ends are not
// reachable from each other), so this will not cause any problems.
II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize));
- II->setArgOperand(1, AICast);
};
llvm::for_each(Info.LifetimeStart, HandleLifetime);
llvm::for_each(Info.LifetimeEnd, HandleLifetime);
- AI->replaceUsesWithIf(Replacement, [AICast, AILong](const Use &U) {
+ AI->replaceUsesWithIf(Replacement, [AILong](const Use &U) {
auto *User = U.getUser();
- return User != AILong && User != AICast && !isa<LifetimeIntrinsic>(User);
+ return User != AILong && !isa<LifetimeIntrinsic>(User);
});
memtag::annotateDebugRecords(Info, retagMask(N));
diff --git a/llvm/lib/Transforms/ObjCARC/CMakeLists.txt b/llvm/lib/Transforms/ObjCARC/CMakeLists.txt
index 80867db..4274667 100644
--- a/llvm/lib/Transforms/ObjCARC/CMakeLists.txt
+++ b/llvm/lib/Transforms/ObjCARC/CMakeLists.txt
@@ -2,7 +2,6 @@ add_llvm_component_library(LLVMObjCARCOpts
ObjCARC.cpp
ObjCARCOpts.cpp
ObjCARCExpand.cpp
- ObjCARCAPElim.cpp
ObjCARCContract.cpp
DependencyAnalysis.cpp
ProvenanceAnalysis.cpp
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
deleted file mode 100644
index dceb2eb..0000000
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-//===- ObjCARCAPElim.cpp - ObjC ARC Optimization --------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines ObjC ARC optimizations. ARC stands for Automatic
-/// Reference Counting and is a system for managing reference counts for objects
-/// in Objective C.
-///
-/// This specific file implements optimizations which remove extraneous
-/// autorelease pools.
-///
-/// WARNING: This file knows about certain library functions. It recognizes them
-/// by name, and hardwires knowledge of their semantics.
-///
-/// WARNING: This file knows about how certain Objective-C library functions are
-/// used. Naive LLVM IR transformations which would otherwise be
-/// behavior-preserving may break these assumptions.
-///
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
-#include "llvm/Analysis/ObjCARCInstKind.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/ObjCARC.h"
-
-using namespace llvm;
-using namespace llvm::objcarc;
-
-#define DEBUG_TYPE "objc-arc-ap-elim"
-
-namespace {
-
-/// Interprocedurally determine if calls made by the given call site can
-/// possibly produce autoreleases.
-bool MayAutorelease(const CallBase &CB, unsigned Depth = 0) {
- if (const Function *Callee = CB.getCalledFunction()) {
- if (!Callee->hasExactDefinition())
- return true;
- for (const BasicBlock &BB : *Callee) {
- for (const Instruction &I : BB)
- if (const CallBase *JCB = dyn_cast<CallBase>(&I))
- // This recursion depth limit is arbitrary. It's just great
- // enough to cover known interesting testcases.
- if (Depth < 3 && !JCB->onlyReadsMemory() &&
- MayAutorelease(*JCB, Depth + 1))
- return true;
- }
- return false;
- }
-
- return true;
-}
-
-bool OptimizeBB(BasicBlock *BB) {
- bool Changed = false;
-
- Instruction *Push = nullptr;
- for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
- switch (GetBasicARCInstKind(&Inst)) {
- case ARCInstKind::AutoreleasepoolPush:
- Push = &Inst;
- break;
- case ARCInstKind::AutoreleasepoolPop:
- // If this pop matches a push and nothing in between can autorelease,
- // zap the pair.
- if (Push && cast<CallInst>(&Inst)->getArgOperand(0) == Push) {
- Changed = true;
- LLVM_DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop "
- "autorelease pair:\n"
- " Pop: "
- << Inst << "\n"
- << " Push: " << *Push
- << "\n");
- Inst.eraseFromParent();
- Push->eraseFromParent();
- }
- Push = nullptr;
- break;
- case ARCInstKind::CallOrUser:
- if (MayAutorelease(cast<CallBase>(Inst)))
- Push = nullptr;
- break;
- default:
- break;
- }
- }
-
- return Changed;
-}
-
-bool runImpl(Module &M) {
- if (!EnableARCOpts)
- return false;
-
- // If nothing in the Module uses ARC, don't do anything.
- if (!ModuleHasARC(M))
- return false;
- // Find the llvm.global_ctors variable, as the first step in
- // identifying the global constructors. In theory, unnecessary autorelease
- // pools could occur anywhere, but in practice it's pretty rare. Global
- // ctors are a place where autorelease pools get inserted automatically,
- // so it's pretty common for them to be unnecessary, and it's pretty
- // profitable to eliminate them.
- GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
- if (!GV)
- return false;
-
- assert(GV->hasDefinitiveInitializer() &&
- "llvm.global_ctors is uncooperative!");
-
- bool Changed = false;
-
- // Dig the constructor functions out of GV's initializer.
- ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
- for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end();
- OI != OE; ++OI) {
- Value *Op = *OI;
- // llvm.global_ctors is an array of three-field structs where the second
- // members are constructor functions.
- Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
- // If the user used a constructor function with the wrong signature and
- // it got bitcasted or whatever, look the other way.
- if (!F)
- continue;
- // Only look at function definitions.
- if (F->isDeclaration())
- continue;
- // Only look at functions with one basic block.
- if (std::next(F->begin()) != F->end())
- continue;
- // Ok, a single-block constructor function definition. Try to optimize it.
- Changed |= OptimizeBB(&F->front());
- }
-
- return Changed;
-}
-
-} // namespace
-
-PreservedAnalyses ObjCARCAPElimPass::run(Module &M, ModuleAnalysisManager &AM) {
- if (!runImpl(M))
- return PreservedAnalyses::all();
- PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
- return PA;
-}
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 84d1c0b..9220abb 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1593,11 +1593,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
// since both llvm.lifetime.start and llvm.lifetime.end intrinsics
// practically fill all the bytes of the alloca with an undefined
// value, although conceptually marked as alive/dead.
- int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
- if (Size < 0 || Size == DestSize) {
- LifetimeMarkers.push_back(UI);
- continue;
- }
+ LifetimeMarkers.push_back(UI);
+ continue;
}
AAMetadataInstrs.insert(UI);
@@ -1614,9 +1611,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return true;
};
- // Check that dest has no Mod/Ref, from the alloca to the Store, except full
- // size lifetime intrinsics. And collect modref inst for the reachability
- // check.
+ // Check that dest has no Mod/Ref, from the alloca to the Store. And collect
+ // modref inst for the reachability check.
ModRefInfo DestModRef = ModRefInfo::NoModRef;
MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
SmallVector<BasicBlock *, 8> ReachabilityWorklist;
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index a69d649..44e63a0 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -128,6 +129,7 @@ struct PredInfo {
using BBPredicates = DenseMap<BasicBlock *, PredInfo>;
using PredMap = DenseMap<BasicBlock *, BBPredicates>;
using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
+using Val2BBMap = DenseMap<Value *, BasicBlock *>;
// A traits type that is intended to be used in graph algorithms. The graph
// traits starts at an entry node, and traverses the RegionNodes that are in
@@ -279,7 +281,7 @@ class StructurizeCFG {
ConstantInt *BoolTrue;
ConstantInt *BoolFalse;
Value *BoolPoison;
-
+ const TargetTransformInfo *TTI;
Function *Func;
Region *ParentRegion;
@@ -301,8 +303,12 @@ class StructurizeCFG {
PredMap LoopPreds;
BranchVector LoopConds;
+ Val2BBMap HoistedValues;
+
RegionNode *PrevNode;
+ void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
+
void orderNodes();
void analyzeLoops(RegionNode *N);
@@ -332,6 +338,8 @@ class StructurizeCFG {
void simplifyAffectedPhis();
+ void simplifyHoistedPhis();
+
DebugLoc killTerminator(BasicBlock *BB);
void changeExit(RegionNode *Node, BasicBlock *NewExit,
@@ -359,7 +367,7 @@ class StructurizeCFG {
public:
void init(Region *R);
- bool run(Region *R, DominatorTree *DT);
+ bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI);
bool makeUniformRegion(Region *R, UniformityInfo &UA);
};
@@ -385,8 +393,11 @@ public:
if (SCFG.makeUniformRegion(R, UA))
return false;
}
+ Function *F = R->getEntry()->getParent();
+ const TargetTransformInfo *TTI =
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return SCFG.run(R, DT);
+ return SCFG.run(R, DT, TTI);
}
StringRef getPassName() const override { return "Structurize control flow"; }
@@ -394,7 +405,9 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
if (SkipUniformRegions)
AU.addRequired<UniformityInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
RegionPass::getAnalysisUsage(AU);
@@ -403,6 +416,34 @@ public:
} // end anonymous namespace
+/// Checks whether an instruction is zero cost instruction and checks if the
+/// operands are from different BB. If so, this instruction can be coalesced
+/// if its hoisted to predecessor block. So, this returns true.
+static bool isHoistableInstruction(Instruction *I, BasicBlock *BB,
+ const TargetTransformInfo *TTI) {
+ if (I->getParent() != BB || isa<PHINode>(I))
+ return false;
+
+ // If the instruction is not a zero cost instruction, return false.
+ auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
+ InstructionCost::CostType CostVal =
+ Cost.isValid()
+ ? Cost.getValue()
+ : (InstructionCost::CostType)TargetTransformInfo::TCC_Expensive;
+ if (CostVal != 0)
+ return false;
+
+ // Check if any operands are instructions defined in the same block.
+ for (auto &Op : I->operands()) {
+ if (auto *OpI = dyn_cast<Instruction>(Op)) {
+ if (OpI->getParent() == BB)
+ return false;
+ }
+ }
+
+ return true;
+}
+
char StructurizeCFGLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
@@ -413,6 +454,39 @@ INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
"Structurize the CFG", false, false)
+/// Structurization can introduce unnecessary VGPR copies due to register
+/// coalescing interference. For example, if the Else block has a zero-cost
+/// instruction and the Then block modifies the VGPR value, only one value is
+/// live at a time in merge block before structurization. After structurization,
+/// the coalescer may incorrectly treat the Then value as live in the Else block
+/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies.
+///
+/// This function examines phi nodes whose incoming values are zero-cost
+/// instructions in the Else block. It identifies such values that can be safely
+/// hoisted and moves them to the nearest common dominator of Then and Else
+/// blocks. A follow-up function after setting PhiNodes assigns the hoisted
+/// value to poison phi nodes along the if→flow edge, aiding register coalescing
+/// and minimizing unnecessary live ranges.
+void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
+ BasicBlock *ThenBB) {
+
+ BasicBlock *ElseSucc = ElseBB->getSingleSuccessor();
+ BasicBlock *CommonDominator = DT->findNearestCommonDominator(ElseBB, ThenBB);
+
+ if (!ElseSucc || !CommonDominator)
+ return;
+ Instruction *Term = CommonDominator->getTerminator();
+ for (PHINode &Phi : ElseSucc->phis()) {
+ Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
+ auto *Inst = dyn_cast<Instruction>(ElseVal);
+ if (!Inst || !isHoistableInstruction(Inst, ElseBB, TTI))
+ continue;
+ Inst->removeFromParent();
+ Inst->insertInto(CommonDominator, Term->getIterator());
+ HoistedValues[Inst] = CommonDominator;
+ }
+}
+
/// Build up the general order of nodes, by performing a topological sort of the
/// parent region's nodes, while ensuring that there is no outer cycle node
/// between any two inner cycle nodes.
@@ -535,7 +609,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
BasicBlock *Other = Term->getSuccessor(!i);
if (Visited.count(Other) && !Loops.count(Other) &&
!Pred.count(Other) && !Pred.count(P)) {
-
+ hoistZeroCostElseBlockPhiValues(Succ, Other);
Pred[Other] = {BoolFalse, std::nullopt};
Pred[P] = {BoolTrue, std::nullopt};
continue;
@@ -891,6 +965,44 @@ void StructurizeCFG::setPhiValues() {
AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
}
+/// Updates PHI nodes after hoisted zero cost instructions by replacing poison
+/// entries on Flow nodes with the appropriate hoisted values
+void StructurizeCFG::simplifyHoistedPhis() {
+ for (WeakVH VH : AffectedPhis) {
+ PHINode *Phi = dyn_cast_or_null<PHINode>(VH);
+ if (!Phi || Phi->getNumIncomingValues() != 2)
+ continue;
+
+ for (int i = 0; i < 2; i++) {
+ Value *V = Phi->getIncomingValue(i);
+ auto BBIt = HoistedValues.find(V);
+
+ if (BBIt == HoistedValues.end())
+ continue;
+
+ Value *OtherV = Phi->getIncomingValue(!i);
+ PHINode *OtherPhi = dyn_cast<PHINode>(OtherV);
+ if (!OtherPhi)
+ continue;
+
+ int PoisonValBBIdx = -1;
+ for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
+ if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i)))
+ continue;
+ PoisonValBBIdx = i;
+ break;
+ }
+ if (PoisonValBBIdx == -1 ||
+ !DT->dominates(BBIt->second,
+ OtherPhi->getIncomingBlock(PoisonValBBIdx)))
+ continue;
+
+ OtherPhi->setIncomingValue(PoisonValBBIdx, V);
+ Phi->setIncomingValue(i, OtherV);
+ }
+ }
+}
+
void StructurizeCFG::simplifyAffectedPhis() {
bool Changed;
do {
@@ -1283,12 +1395,13 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
}
/// Run the transformation for each region found
-bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
+bool StructurizeCFG::run(Region *R, DominatorTree *DT,
+ const TargetTransformInfo *TTI) {
if (R->isTopLevelRegion())
return false;
this->DT = DT;
-
+ this->TTI = TTI;
Func = R->getEntry()->getParent();
assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
@@ -1300,6 +1413,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
insertConditions(false);
insertConditions(true);
setPhiValues();
+ simplifyHoistedPhis();
simplifyConditions();
simplifyAffectedPhis();
rebuildSSA();
@@ -1349,7 +1463,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
bool Changed = false;
DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
auto &RI = AM.getResult<RegionInfoAnalysis>(F);
-
+ TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
UniformityInfo *UI = nullptr;
if (SkipUniformRegions)
UI = &AM.getResult<UniformityInfoAnalysis>(F);
@@ -1368,7 +1482,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
continue;
}
- Changed |= SCFG.run(R, DT);
+ Changed |= SCFG.run(R, DT, TTI);
}
if (!Changed)
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9c9201c..6616e61f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1359,9 +1359,7 @@ public:
return;
// Override EVL styles if needed.
// FIXME: Investigate opportunity for fixed vector factor.
- // FIXME: Support interleave accesses.
bool EVLIsLegal = UserIC <= 1 && IsScalableVF &&
- !InterleaveInfo.hasGroups() &&
TTI.hasActiveVectorLength() && !EnableVPlanNativePath;
if (EVLIsLegal)
return;
@@ -2023,6 +2021,9 @@ public:
/// Retrieves the MemCheckCond and MemCheckBlock that were generated as IR
/// outside VPlan.
std::pair<Value *, BasicBlock *> getMemRuntimeChecks() {
+ using namespace llvm::PatternMatch;
+ if (MemRuntimeCheckCond && match(MemRuntimeCheckCond, m_ZeroInt()))
+ return {nullptr, nullptr};
return {MemRuntimeCheckCond, MemCheckBlock};
}
@@ -7278,6 +7279,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPBasicBlock *VectorPH = cast<VPBasicBlock>(BestVPlan.getVectorPreheader());
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
+ VPlanTransforms::removeBranchOnConst(BestVPlan);
VPlanTransforms::narrowInterleaveGroups(
BestVPlan, BestVF,
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
@@ -10074,12 +10076,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Get user vectorization factor and interleave count.
ElementCount UserVF = Hints.getWidth();
unsigned UserIC = Hints.getInterleave();
- if (LVL.hasUncountableEarlyExit() && UserIC != 1) {
- UserIC = 1;
- reportVectorizationInfo("Interleaving not supported for loops "
- "with uncountable early exits",
- "InterleaveEarlyExitDisabled", ORE, L);
- }
// Plan how to best vectorize.
LVP.plan(UserVF, UserIC);
@@ -10097,9 +10093,20 @@ bool LoopVectorizePass::processLoop(Loop *L) {
unsigned SelectedIC = std::max(IC, UserIC);
// Optimistically generate runtime checks if they are needed. Drop them if
// they turn out to not be profitable.
- if (VF.Width.isVector() || SelectedIC > 1)
+ if (VF.Width.isVector() || SelectedIC > 1) {
Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
+ // Bail out early if either the SCEV or memory runtime checks are known to
+ // fail. In that case, the vector loop would never execute.
+ using namespace llvm::PatternMatch;
+ if (Checks.getSCEVChecks().first &&
+ match(Checks.getSCEVChecks().first, m_One()))
+ return false;
+ if (Checks.getMemRuntimeChecks().first &&
+ match(Checks.getMemRuntimeChecks().first, m_One()))
+ return false;
+ }
+
// Check if it is profitable to vectorize with runtime checks.
bool ForceVectorization =
Hints.getForce() == LoopVectorizeHints::FK_Enabled;
@@ -10230,6 +10237,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
L, PSE, LI, DT, TLI, TTI, AC, ORE, ElementCount::getFixed(1),
ElementCount::getFixed(1), IC, &CM, BFI, PSI, Checks, BestPlan);
+ // TODO: Move to general VPlan pipeline once epilogue loops are also
+ // supported.
+ VPlanTransforms::runPass(VPlanTransforms::materializeVectorTripCount,
+ BestPlan, VF.Width, IC, PSE);
+
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
ORE->emit([&]() {
@@ -10297,6 +10309,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
VF.MinProfitableTripCount, IC, &CM, BFI, PSI,
Checks, BestPlan);
+ // TODO: Move to general VPlan pipeline once epilogue loops are also
+ // supported.
+ VPlanTransforms::runPass(VPlanTransforms::materializeVectorTripCount,
+ BestPlan, VF.Width, IC, PSE);
+
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
++LoopsVectorized;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0d0b342..593868f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -206,6 +206,12 @@ static cl::opt<bool> VectorizeNonPowerOf2(
"slp-vectorize-non-power-of-2", cl::init(false), cl::Hidden,
cl::desc("Try to vectorize with non-power-of-2 number of elements."));
+/// Enables vectorization of copyable elements.
+static cl::opt<bool> VectorizeCopyableElements(
+ "slp-copyable-elements", cl::init(true), cl::Hidden,
+ cl::desc("Try to replace values with the idempotent instructions for "
+ "better vectorization."));
+
// Limit the number of alias checks. The limit is chosen so that
// it has no negative effect on the llvm benchmarks.
static const unsigned AliasedCheckLimit = 10;
@@ -855,6 +861,13 @@ static std::optional<unsigned> getExtractIndex(const Instruction *E) {
return *EI->idx_begin();
}
+namespace llvm {
+/// Checks if the specified value does not require scheduling. It does not
+/// require scheduling if all operands and all users do not need to be scheduled
+/// in the current basic block.
+static bool doesNotNeedToBeScheduled(Value *V);
+} // namespace llvm
+
namespace {
/// \returns true if \p Opcode is allowed as part of the main/alternate
/// instruction for SLP vectorization.
@@ -957,6 +970,33 @@ class BinOpSameOpcodeHelper {
return Instruction::Xor;
llvm_unreachable("Cannot find interchangeable instruction.");
}
+
+ /// Return true if the instruction can be converted to \p Opcode.
+ bool hasCandidateOpcode(unsigned Opcode) const {
+ MaskType Candidate = Mask & SeenBefore;
+ switch (Opcode) {
+ case Instruction::Shl:
+ return Candidate & ShlBIT;
+ case Instruction::AShr:
+ return Candidate & AShrBIT;
+ case Instruction::Mul:
+ return Candidate & MulBIT;
+ case Instruction::Add:
+ return Candidate & AddBIT;
+ case Instruction::Sub:
+ return Candidate & SubBIT;
+ case Instruction::And:
+ return Candidate & AndBIT;
+ case Instruction::Or:
+ return Candidate & OrBIT;
+ case Instruction::Xor:
+ return Candidate & XorBIT;
+ default:
+ break;
+ }
+ llvm_unreachable("Cannot find interchangeable instruction.");
+ }
+
SmallVector<Value *> getOperand(const Instruction *To) const {
unsigned ToOpcode = To->getOpcode();
unsigned FromOpcode = I->getOpcode();
@@ -1117,6 +1157,10 @@ public:
AltOp.trySet(OpcodeInMaskForm, InterchangeableMask));
}
unsigned getMainOpcode() const { return MainOp.getOpcode(); }
+ /// Checks if the list of potential opcodes includes \p Opcode.
+ bool hasCandidateOpcode(unsigned Opcode) const {
+ return MainOp.hasCandidateOpcode(Opcode);
+ }
bool hasAltOp() const { return AltOp.I; }
unsigned getAltOpcode() const {
return hasAltOp() ? AltOp.getOpcode() : getMainOpcode();
@@ -1152,6 +1196,8 @@ class InstructionsState {
/// GetVectorCost.
Instruction *MainOp = nullptr;
Instruction *AltOp = nullptr;
+ /// Wether the instruction state represents copyable instructions.
+ bool HasCopyables = false;
public:
Instruction *getMainOp() const {
@@ -1190,9 +1236,11 @@ public:
if (!I->isBinaryOp())
return nullptr;
BinOpSameOpcodeHelper Converter(MainOp);
- if (Converter.add(I) && Converter.add(MainOp) && !Converter.hasAltOp())
- return MainOp;
- return AltOp;
+ if (!Converter.add(I) || !Converter.add(MainOp))
+ return nullptr;
+ if (Converter.hasAltOp() && !isAltShuffle())
+ return nullptr;
+ return Converter.hasAltOp() ? AltOp : MainOp;
}
/// Checks if main/alt instructions are shift operations.
@@ -1237,9 +1285,63 @@ public:
explicit operator bool() const { return valid(); }
InstructionsState() = delete;
- InstructionsState(Instruction *MainOp, Instruction *AltOp)
- : MainOp(MainOp), AltOp(AltOp) {}
+ InstructionsState(Instruction *MainOp, Instruction *AltOp,
+ bool HasCopyables = false)
+ : MainOp(MainOp), AltOp(AltOp), HasCopyables(HasCopyables) {}
static InstructionsState invalid() { return {nullptr, nullptr}; }
+
+ bool isCopyableElement(Value *V) const {
+ assert(valid() && "InstructionsState is invalid.");
+ if (!HasCopyables)
+ return false;
+ if (isAltShuffle() || getOpcode() == Instruction::GetElementPtr)
+ return false;
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return !isa<PoisonValue>(V);
+ if (I->getParent() != MainOp->getParent() &&
+ (!isVectorLikeInstWithConstOps(I) ||
+ !isVectorLikeInstWithConstOps(MainOp)))
+ return true;
+ if (I->getOpcode() == MainOp->getOpcode())
+ return false;
+ if (!I->isBinaryOp())
+ return true;
+ BinOpSameOpcodeHelper Converter(MainOp);
+ return !Converter.add(I) || !Converter.add(MainOp) ||
+ Converter.hasAltOp() || !Converter.hasCandidateOpcode(getOpcode());
+ }
+
+ /// Checks if the value is non-schedulable.
+ bool isNonSchedulable(Value *V) const {
+ assert(valid() && "InstructionsState is invalid.");
+ auto *I = dyn_cast<Instruction>(V);
+ if (!HasCopyables)
+ return !I || isa<PHINode>(I) || isVectorLikeInstWithConstOps(I) ||
+ doesNotNeedToBeScheduled(V);
+ // MainOp for copyables always schedulable to correctly identify
+ // non-schedulable copyables.
+ if (isCopyableElement(V)) {
+ auto IsNonSchedulableCopyableElement = [this](Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ return !I || isa<PHINode>(I) || I->getParent() != MainOp->getParent() ||
+ (doesNotNeedToBeScheduled(I) &&
+ // If the copyable instructions comes after MainOp
+ // (non-schedulable, but used in the block) - cannot vectorize
+ // it, will possibly generate use before def.
+ (isVectorLikeInstWithConstOps(I) || !MainOp->comesBefore(I)));
+ };
+
+ return IsNonSchedulableCopyableElement(V);
+ }
+ return !I || isa<PHINode>(I) || isVectorLikeInstWithConstOps(I) ||
+ doesNotNeedToBeScheduled(V);
+ }
+
+ bool areInstructionsWithCopyableElements() const {
+ assert(valid() && "InstructionsState is invalid.");
+ return HasCopyables;
+ }
};
std::pair<Instruction *, SmallVector<Value *>>
@@ -1917,6 +2019,7 @@ public:
CompressEntryToData.clear();
ExternalUses.clear();
ExternalUsesAsOriginalScalar.clear();
+ ExternalUsesWithNonUsers.clear();
for (auto &Iter : BlocksSchedules) {
BlockScheduling *BS = Iter.second.get();
BS->clear();
@@ -2899,9 +3002,6 @@ public:
for (OperandDataVec &Ops : OpsVec)
Ops.resize(NumLanes);
for (unsigned Lane : seq<unsigned>(NumLanes)) {
- Value *V = VL[Lane];
- assert((isa<Instruction>(V) || isa<PoisonValue>(V)) &&
- "Expected instruction or poison value");
// Our tree has just 3 nodes: the root and two operands.
// It is therefore trivial to get the APO. We only need to check the
// opcode of V and whether the operand at OpIdx is the LHS or RHS
@@ -2912,17 +3012,24 @@ public:
// Since operand reordering is performed on groups of commutative
// operations or alternating sequences (e.g., +, -), we can safely tell
// the inverse operations by checking commutativity.
- if (isa<PoisonValue>(V)) {
+ auto *I = dyn_cast<Instruction>(VL[Lane]);
+ if (!I && isa<PoisonValue>(VL[Lane])) {
for (unsigned OpIdx : seq<unsigned>(NumOperands))
OpsVec[OpIdx][Lane] = {Operands[OpIdx][Lane], true, false};
continue;
}
- auto [SelectedOp, Ops] = convertTo(cast<Instruction>(V), S);
- // We cannot check commutativity by the converted instruction
- // (SelectedOp) because isCommutative also examines def-use
- // relationships.
- bool IsInverseOperation =
- !isCommutative(SelectedOp, cast<Instruction>(V));
+ bool IsInverseOperation = false;
+ if (S.isCopyableElement(VL[Lane])) {
+ // The value is a copyable element.
+ IsInverseOperation = !isCommutative(MainOp);
+ } else {
+ assert(I && "Expected instruction");
+ auto [SelectedOp, Ops] = convertTo(I, S);
+ // We cannot check commutativity by the converted instruction
+ // (SelectedOp) because isCommutative also examines def-use
+ // relationships.
+ IsInverseOperation = !isCommutative(SelectedOp, I);
+ }
for (unsigned OpIdx : seq<unsigned>(ArgSize)) {
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
OpsVec[OpIdx][Lane] = {Operands[OpIdx][Lane], APO, false};
@@ -3792,6 +3899,9 @@ private:
/// reordering of operands during buildTreeRec() and vectorizeTree().
SmallVector<ValueList, 2> Operands;
+ /// Copyable elements of the entry node.
+ SmallPtrSet<const Value *, 4> CopyableElements;
+
/// MainOp and AltOp are recorded inside. S should be obtained from
/// newTreeEntry.
InstructionsState S = InstructionsState::invalid();
@@ -3820,11 +3930,7 @@ private:
void setInterleave(unsigned Factor) { InterleaveFactor = Factor; }
/// Marks the node as one that does not require scheduling.
- void setDoesNotNeedToSchedule() {
- assert(::doesNotNeedToSchedule(Scalars) &&
- "Expected to not need scheduling");
- DoesNotNeedToSchedule = true;
- }
+ void setDoesNotNeedToSchedule() { DoesNotNeedToSchedule = true; }
/// Returns true if the node is marked as one that does not require
/// scheduling.
bool doesNotNeedToSchedule() const { return DoesNotNeedToSchedule; }
@@ -3896,6 +4002,20 @@ private:
bool hasState() const { return S.valid(); }
+ /// Add \p V to the list of copyable elements.
+ void addCopyableElement(Value *V) {
+ assert(S.isCopyableElement(V) && "Not a copyable element.");
+ CopyableElements.insert(V);
+ }
+
+ /// Returns true if \p V is a copyable element.
+ bool isCopyableElement(Value *V) const {
+ return CopyableElements.contains(V);
+ }
+
+ /// Returns true if any scalar in the list is a copyable element.
+ bool hasCopyableElements() const { return !CopyableElements.empty(); }
+
/// When ReuseReorderShuffleIndices is empty it just returns position of \p
/// V within vector of Scalars. Otherwise, try to remap on its reuse index.
unsigned findLaneForValue(Value *V) const {
@@ -3968,6 +4088,8 @@ private:
for (Value *V : Scalars)
dbgs().indent(2) << *V << "\n";
dbgs() << "State: ";
+ if (S && hasCopyableElements())
+ dbgs() << "[[Copyable]] ";
switch (State) {
case Vectorize:
if (InterleaveFactor > 0) {
@@ -4145,12 +4267,20 @@ private:
}
}
} else if (!Last->isGather()) {
- if (doesNotNeedToSchedule(VL))
+ if (isa<PHINode>(S.getMainOp()) ||
+ isVectorLikeInstWithConstOps(S.getMainOp()) ||
+ (!S.areInstructionsWithCopyableElements() &&
+ doesNotNeedToSchedule(VL)) ||
+ all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))
Last->setDoesNotNeedToSchedule();
SmallPtrSet<Value *, 4> Processed;
for (Value *V : VL) {
if (isa<PoisonValue>(V))
continue;
+ if (S.isCopyableElement(V)) {
+ Last->addCopyableElement(V);
+ continue;
+ }
auto It = ScalarToTreeEntries.find(V);
if (It == ScalarToTreeEntries.end()) {
ScalarToTreeEntries.try_emplace(V).first->getSecond().push_back(Last);
@@ -4162,16 +4292,14 @@ private:
}
}
// Update the scheduler bundle to point to this TreeEntry.
- assert((!Bundle.getBundle().empty() || isa<PHINode>(S.getMainOp()) ||
- isVectorLikeInstWithConstOps(S.getMainOp()) ||
- Last->doesNotNeedToSchedule()) &&
+ assert((!Bundle.getBundle().empty() || Last->doesNotNeedToSchedule()) &&
"Bundle and VL out of sync");
if (!Bundle.getBundle().empty()) {
#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS)
auto *BundleMember = Bundle.getBundle().begin();
SmallPtrSet<Value *, 4> Processed;
for (Value *V : VL) {
- if (doesNotNeedToBeScheduled(V) || !Processed.insert(V).second)
+ if (S.isNonSchedulable(V) || !Processed.insert(V).second)
continue;
++BundleMember;
}
@@ -4280,7 +4408,8 @@ private:
/// in general.
ScalarsVectorizationLegality
getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
- const EdgeInfo &UserTreeIdx) const;
+ const EdgeInfo &UserTreeIdx,
+ bool TryCopyableElementsVectorization) const;
/// Checks if the specified list of the instructions/values can be vectorized
/// and fills required data before actual scheduling of the instructions.
@@ -4420,6 +4549,10 @@ private:
/// extractelement instructions.
SmallPtrSet<Value *, 4> ExternalUsesAsOriginalScalar;
+ /// A list of scalar to be extracted without specific user necause of too many
+ /// uses.
+ SmallPtrSet<Value *, 4> ExternalUsesWithNonUsers;
+
/// Values used only by @llvm.assume calls.
SmallPtrSet<const Value *, 32> EphValues;
@@ -4996,7 +5129,8 @@ private:
/// Build a bundle from the ScheduleData nodes corresponding to the
/// scalar instruction for each lane.
- ScheduleBundle &buildBundle(ArrayRef<Value *> VL);
+ ScheduleBundle &buildBundle(ArrayRef<Value *> VL,
+ const InstructionsState &S);
/// Checks if a bundle of instructions can be scheduled, i.e. has no
/// cyclic dependencies. This is only a dry-run, no instructions are
@@ -6727,7 +6861,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom,
return std::move(ResOrder);
}
if (TE.State == TreeEntry::StridedVectorize && !TopToBottom &&
- (!TE.UserTreeIndex ||
+ (!TE.UserTreeIndex || !TE.UserTreeIndex.UserTE->hasState() ||
!Instruction::isBinaryOp(TE.UserTreeIndex.UserTE->getOpcode())) &&
(TE.ReorderIndices.empty() || isReverseOrder(TE.ReorderIndices)))
return std::nullopt;
@@ -7038,10 +7172,11 @@ bool BoUpSLP::isProfitableToReorder() const {
VectorizableTree.front()->getOpcode() == Instruction::ICmp))) &&
VectorizableTree.front()->ReorderIndices.empty()) {
// Check if the tree has only single store and single (unordered) load node,
- // other nodes are phis or geps/binops, combined with phis, and/orsingle
+ // other nodes are phis or geps/binops, combined with phis, and/or single
// gather load node
bool HasPhis = false;
- if (VectorizableTree.front()->getOpcode() == Instruction::PHI &&
+ if (VectorizableTree.front()->hasState() &&
+ VectorizableTree.front()->getOpcode() == Instruction::PHI &&
VectorizableTree.front()->Scalars.size() == TinyVF &&
VectorizableTree.front()->getNumOperands() > PhiOpsLimit)
return false;
@@ -7049,6 +7184,8 @@ bool BoUpSLP::isProfitableToReorder() const {
unsigned GatherLoads = 0;
for (const std::unique_ptr<TreeEntry> &TE :
ArrayRef(VectorizableTree).drop_front()) {
+ if (TE->State == TreeEntry::SplitVectorize)
+ continue;
if (!TE->hasState()) {
if (all_of(TE->Scalars, IsaPred<Constant, PHINode>) ||
all_of(TE->Scalars, IsaPred<BinaryOperator, PHINode>))
@@ -7072,7 +7209,10 @@ bool BoUpSLP::isProfitableToReorder() const {
if (TE->getOpcode() == Instruction::GetElementPtr ||
Instruction::isBinaryOp(TE->getOpcode()))
continue;
- if (TE->getOpcode() != Instruction::PHI)
+ if (TE->getOpcode() != Instruction::PHI &&
+ (!TE->hasCopyableElements() ||
+ static_cast<unsigned>(count_if(TE->Scalars, IsaPred<PHINode>)) <
+ TE->Scalars.size() / 2))
return true;
if (VectorizableTree.front()->Scalars.size() == TinyVF &&
TE->getNumOperands() > PhiOpsLimit)
@@ -7860,7 +8000,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
}
Instruction *BoUpSLP::getRootEntryInstruction(const TreeEntry &Entry) const {
- if ((Entry.getOpcode() == Instruction::Store ||
+ if (Entry.hasState() &&
+ (Entry.getOpcode() == Instruction::Store ||
Entry.getOpcode() == Instruction::Load) &&
Entry.State == TreeEntry::StridedVectorize &&
!Entry.ReorderIndices.empty() && isReverseOrder(Entry.ReorderIndices))
@@ -7870,7 +8011,9 @@ Instruction *BoUpSLP::getRootEntryInstruction(const TreeEntry &Entry) const {
void BoUpSLP::buildExternalUses(
const ExtraValueToDebugLocsMap &ExternallyUsedValues) {
+ const size_t NumVectScalars = ScalarToTreeEntries.size() + 1;
DenseMap<Value *, unsigned> ScalarToExtUses;
+ SmallPtrSet<Value *, 4> ExternalUsers;
// Collect the values that we need to extract from the tree.
for (auto &TEPtr : VectorizableTree) {
TreeEntry *Entry = TEPtr.get();
@@ -7882,13 +8025,24 @@ void BoUpSLP::buildExternalUses(
// For each lane:
for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
Value *Scalar = Entry->Scalars[Lane];
- if (!isa<Instruction>(Scalar))
+ if (!isa<Instruction>(Scalar) || Entry->isCopyableElement(Scalar))
continue;
+
// All uses must be replaced already? No need to do it again.
auto It = ScalarToExtUses.find(Scalar);
if (It != ScalarToExtUses.end() && !ExternalUses[It->second].User)
continue;
+ if (Scalar->hasNUsesOrMore(NumVectScalars)) {
+ unsigned FoundLane = Entry->findLaneForValue(Scalar);
+ LLVM_DEBUG(dbgs() << "SLP: Need to extract from lane " << FoundLane
+ << " from " << *Scalar << "for many users.\n");
+ It = ScalarToExtUses.try_emplace(Scalar, ExternalUses.size()).first;
+ ExternalUses.emplace_back(Scalar, nullptr, *Entry, FoundLane);
+ ExternalUsesWithNonUsers.insert(Scalar);
+ continue;
+ }
+
// Check if the scalar is externally used as an extra arg.
const auto ExtI = ExternallyUsedValues.find(Scalar);
if (ExtI != ExternallyUsedValues.end()) {
@@ -7916,7 +8070,10 @@ void BoUpSLP::buildExternalUses(
// Some in-tree scalars will remain as scalar in vectorized
// instructions. If that is the case, the one in FoundLane will
// be used.
- if (all_of(UseEntries, [&](TreeEntry *UseEntry) {
+ if (!((Scalar->getType()->getScalarType()->isPointerTy() &&
+ isa<LoadInst, StoreInst>(UserInst)) ||
+ isa<CallInst>(UserInst)) ||
+ all_of(UseEntries, [&](TreeEntry *UseEntry) {
return UseEntry->State == TreeEntry::ScatterVectorize ||
!doesInTreeUserNeedToExtract(
Scalar, getRootEntryInstruction(*UseEntry), TLI,
@@ -7946,6 +8103,7 @@ void BoUpSLP::buildExternalUses(
<< ".\n");
It = ScalarToExtUses.try_emplace(Scalar, ExternalUses.size()).first;
ExternalUses.emplace_back(Scalar, U, *Entry, FoundLane);
+ ExternalUsesWithNonUsers.insert(Scalar);
if (!U)
break;
}
@@ -9612,7 +9770,8 @@ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
PoisonValue::get(UniqueValues.front()->getType()));
// Check that extended with poisons operations are still valid for
// vectorization (div/rem are not allowed).
- if (!getSameOpcode(PaddedUniqueValues, TLI).valid()) {
+ if (!S.areInstructionsWithCopyableElements() &&
+ !getSameOpcode(PaddedUniqueValues, TLI).valid()) {
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
ReuseShuffleIndices.clear();
return false;
@@ -9761,13 +9920,95 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL,
}
namespace {
-/// Class accepts incoming list of values and generates the list of values
-/// for scheduling and list of operands for the new nodes.
+/// Class accepts incoming list of values, checks if it is able to model
+/// "copyable" values as compatible operations, and generates the list of values
+/// for scheduling and list of operands doe the new nodes.
class InstructionsCompatibilityAnalysis {
DominatorTree &DT;
const DataLayout &DL;
const TargetTransformInfo &TTI;
const TargetLibraryInfo &TLI;
+ unsigned MainOpcode = 0;
+ Instruction *MainOp = nullptr;
+
+ /// Identifies the best candidate value, which represents main opcode
+ /// operation.
+ /// Currently the best candidate is the Add instruction with the parent
+ /// block with the highest DFS incoming number (block, that dominates other).
+ void findAndSetMainInstruction(ArrayRef<Value *> VL) {
+ BasicBlock *Parent = nullptr;
+ // Checks if the instruction has supported opcode.
+ auto IsSupportedOpcode = [](Instruction *I) {
+ return I && I->getOpcode() == Instruction::Add;
+ };
+ SmallDenseSet<Value *, 8> Operands;
+ for (Value *V : VL) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ continue;
+ if (!DT.isReachableFromEntry(I->getParent()))
+ continue;
+ if (!MainOp) {
+ MainOp = I;
+ Parent = I->getParent();
+ Operands.insert(I->op_begin(), I->op_end());
+ continue;
+ }
+ if (Parent == I->getParent()) {
+ if (!IsSupportedOpcode(MainOp))
+ MainOp = I;
+ if (MainOp->getOpcode() == I->getOpcode() &&
+ doesNotNeedToBeScheduled(MainOp) && !doesNotNeedToBeScheduled(I))
+ MainOp = I;
+ Operands.insert(I->op_begin(), I->op_end());
+ continue;
+ }
+ auto *NodeA = DT.getNode(Parent);
+ auto *NodeB = DT.getNode(I->getParent());
+ assert(NodeA && "Should only process reachable instructions");
+ assert(NodeB && "Should only process reachable instructions");
+ assert((NodeA == NodeB) ==
+ (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
+ "Different nodes should have different DFS numbers");
+ if (NodeA->getDFSNumIn() < NodeB->getDFSNumIn()) {
+ MainOp = I;
+ Parent = I->getParent();
+ Operands.clear();
+ Operands.insert(I->op_begin(), I->op_end());
+ }
+ }
+ if (!IsSupportedOpcode(MainOp) || Operands.contains(MainOp)) {
+ MainOp = nullptr;
+ return;
+ }
+ MainOpcode = MainOp->getOpcode();
+ }
+
+ /// Returns the idempotent value for the \p MainOp with the detected \p
+ /// MainOpcode. For Add, returns 0. For Or, it should choose between false and
+ /// the operand itself, since V or V == V.
+ Value *selectBestIdempotentValue() const {
+ assert(MainOpcode == Instruction::Add && "Unsupported opcode");
+ return ConstantExpr::getBinOpIdentity(MainOpcode, MainOp->getType(),
+ !MainOp->isCommutative());
+ }
+
+ /// Returns the value and operands for the \p V, considering if it is original
+ /// instruction and its actual operands should be returned, or it is a
+ /// copyable element and its should be represented as idempotent instruction.
+ SmallVector<Value *> getOperands(const InstructionsState &S, Value *V) const {
+ if (isa<PoisonValue>(V))
+ return {V, V};
+ if (!S.isCopyableElement(V))
+ return convertTo(cast<Instruction>(V), S).second;
+ switch (MainOpcode) {
+ case Instruction::Add:
+ return {V, selectBestIdempotentValue()};
+ default:
+ break;
+ }
+ llvm_unreachable("Unsupported opcode");
+ }
/// Builds operands for the original instructions.
void
@@ -9928,22 +10169,165 @@ public:
const TargetLibraryInfo &TLI)
: DT(DT), DL(DL), TTI(TTI), TLI(TLI) {}
+ InstructionsState
+ buildInstructionsState(ArrayRef<Value *> VL, const BoUpSLP &R,
+ bool TryCopyableElementsVectorization,
+ bool WithProfitabilityCheck = false,
+ bool SkipSameCodeCheck = false) {
+ InstructionsState S = (SkipSameCodeCheck || !allSameBlock(VL))
+ ? InstructionsState::invalid()
+ : getSameOpcode(VL, TLI);
+ if (S)
+ return S;
+ if (!VectorizeCopyableElements || !TryCopyableElementsVectorization)
+ return S;
+ findAndSetMainInstruction(VL);
+ if (!MainOp)
+ return InstructionsState::invalid();
+ S = InstructionsState(MainOp, MainOp, /*HasCopyables=*/true);
+ // TODO: Remove this check once support for schulable copyables is landed.
+ if (any_of(VL, [&](Value *V) {
+ return S.isCopyableElement(V) && !S.isNonSchedulable(V);
+ }))
+ return InstructionsState::invalid();
+
+ if (!WithProfitabilityCheck)
+ return S;
+ // Check if it is profitable to vectorize the instruction.
+ SmallVector<BoUpSLP::ValueList> Operands = buildOperands(S, VL);
+ auto BuildCandidates =
+ [](SmallVectorImpl<std::pair<Value *, Value *>> &Candidates, Value *V1,
+ Value *V2) {
+ if (V1 != V2 && isa<PHINode>(V1))
+ return;
+ auto *I1 = dyn_cast<Instruction>(V1);
+ auto *I2 = dyn_cast<Instruction>(V2);
+ if (I1 && I2 && I1->getOpcode() == I2->getOpcode() &&
+ I1->getParent() != I2->getParent())
+ return;
+ Candidates.emplace_back(V1, (I1 || I2) ? V2 : V1);
+ };
+ if (VL.size() == 2) {
+ // Check if the operands allow better vectorization.
+ SmallVector<std::pair<Value *, Value *>, 4> Candidates1, Candidates2;
+ BuildCandidates(Candidates1, Operands[0][0], Operands[0][1]);
+ BuildCandidates(Candidates2, Operands[1][0], Operands[1][1]);
+ bool Res = !Candidates1.empty() && !Candidates2.empty() &&
+ R.findBestRootPair(Candidates1) &&
+ R.findBestRootPair(Candidates2);
+ if (!Res && isCommutative(MainOp)) {
+ Candidates1.clear();
+ Candidates2.clear();
+ BuildCandidates(Candidates1, Operands[0][0], Operands[1][1]);
+ BuildCandidates(Candidates2, Operands[1][0], Operands[0][1]);
+ Res = !Candidates1.empty() && !Candidates2.empty() &&
+ R.findBestRootPair(Candidates1) &&
+ R.findBestRootPair(Candidates2);
+ }
+ if (!Res)
+ return InstructionsState::invalid();
+ return S;
+ }
+ assert(Operands.size() == 2 && "Unexpected number of operands!");
+ unsigned CopyableNum =
+ count_if(VL, [&](Value *V) { return S.isCopyableElement(V); });
+ if (CopyableNum < VL.size() / 2)
+ return S;
+ // Too many phi copyables - exit.
+ const unsigned Limit = VL.size() / 24;
+ if ((CopyableNum >= VL.size() - Limit ||
+ (CopyableNum >= VL.size() - 1 && VL.size() > 4) ||
+ CopyableNum >= MaxPHINumOperands) &&
+ all_of(VL, [&](Value *V) {
+ return isa<PHINode>(V) || !S.isCopyableElement(V);
+ }))
+ return InstructionsState::invalid();
+ // Check profitability if number of copyables > VL.size() / 2.
+ // 1. Reorder operands for better matching.
+ if (isCommutative(MainOp)) {
+ for (auto &Ops : Operands) {
+ // Make instructions the first operands.
+ if (!isa<Instruction>(Ops.front()) && isa<Instruction>(Ops.back())) {
+ std::swap(Ops.front(), Ops.back());
+ continue;
+ }
+ // Make constants the second operands.
+ if (isa<Constant>(Ops.front())) {
+ std::swap(Ops.front(), Ops.back());
+ continue;
+ }
+ }
+ }
+ // 2. Check, if operands can be vectorized.
+ if (count_if(Operands.back(), IsaPred<Instruction>) > 1)
+ return InstructionsState::invalid();
+ auto CheckOperand = [&](ArrayRef<Value *> Ops) {
+ if (allConstant(Ops) || isSplat(Ops))
+ return true;
+ // Check if it is "almost" splat, i.e. has >= 4 elements and only single
+ // one is different.
+ constexpr unsigned Limit = 4;
+ if (Operands.front().size() >= Limit) {
+ SmallDenseMap<const Value *, unsigned> Counters;
+ for (Value *V : Ops) {
+ if (isa<UndefValue>(V))
+ continue;
+ ++Counters[V];
+ }
+ if (Counters.size() == 2 &&
+ any_of(Counters, [&](const std::pair<const Value *, unsigned> &C) {
+ return C.second == 1;
+ }))
+ return true;
+ }
+ // First operand not a constant or splat? Last attempt - check for
+ // potential vectorization.
+ InstructionsCompatibilityAnalysis Analysis(DT, DL, TTI, TLI);
+ InstructionsState OpS = Analysis.buildInstructionsState(
+ Ops, R, /*TryCopyableElementsVectorization=*/true);
+ if (!OpS || (OpS.getOpcode() == Instruction::PHI && !allSameBlock(Ops)))
+ return false;
+ unsigned CopyableNum =
+ count_if(Ops, [&](Value *V) { return OpS.isCopyableElement(V); });
+ return CopyableNum <= VL.size() / 2;
+ };
+ if (!CheckOperand(Operands.front()))
+ return InstructionsState::invalid();
+
+ return S;
+ }
+
SmallVector<BoUpSLP::ValueList> buildOperands(const InstructionsState &S,
ArrayRef<Value *> VL) {
assert(S && "Invalid state!");
SmallVector<BoUpSLP::ValueList> Operands;
- buildOriginalOperands(S, VL, Operands);
+ if (S.areInstructionsWithCopyableElements()) {
+ MainOp = S.getMainOp();
+ MainOpcode = S.getOpcode();
+ Operands.assign(MainOp->getNumOperands(),
+ BoUpSLP::ValueList(VL.size(), nullptr));
+ for (auto [Idx, V] : enumerate(VL)) {
+ SmallVector<Value *> OperandsForValue = getOperands(S, V);
+ for (auto [OperandIdx, Operand] : enumerate(OperandsForValue))
+ Operands[OperandIdx][Idx] = Operand;
+ }
+ } else {
+ buildOriginalOperands(S, VL, Operands);
+ }
return Operands;
}
};
} // namespace
-BoUpSLP::ScalarsVectorizationLegality
-BoUpSLP::getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
- const EdgeInfo &UserTreeIdx) const {
+BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
+ ArrayRef<Value *> VL, unsigned Depth, const EdgeInfo &UserTreeIdx,
+ bool TryCopyableElementsVectorization) const {
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
- InstructionsState S = getSameOpcode(VL, *TLI);
+ InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI);
+ InstructionsState S = Analysis.buildInstructionsState(
+ VL, *this, TryCopyableElementsVectorization,
+ /*WithProfitabilityCheck=*/true, TryCopyableElementsVectorization);
// Don't go into catchswitch blocks, which can happen with PHIs.
// Such blocks can only have PHIs and the catchswitch. There is no
@@ -10066,7 +10450,7 @@ BoUpSLP::getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
bool IsScatterVectorizeUserTE =
UserTreeIdx.UserTE &&
UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
- bool AreAllSameBlock = S && allSameBlock(VL);
+ bool AreAllSameBlock = S.valid();
bool AreScatterAllGEPSameBlock =
(IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() &&
VL.size() > 2 &&
@@ -10091,12 +10475,18 @@ BoUpSLP::getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
NotProfitableForVectorization(VL)) {
if (!S) {
LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
- "C,S,B,O, small shuffle. \n");
+ "C,S,B,O, small shuffle. \n";
+ dbgs() << "[";
+ interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
+ dbgs() << "]\n");
return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
/*TryToFindDuplicates=*/true,
/*TrySplitVectorize=*/true);
}
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n");
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n";
+ dbgs() << "[";
+ interleaveComma(VL, dbgs(), [&](Value *V) { dbgs() << *V; });
+ dbgs() << "]\n");
return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
}
@@ -10242,9 +10632,29 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
return true;
};
- ScalarsVectorizationLegality Legality =
- getScalarsVectorizationLegality(VL, Depth, UserTreeIdx);
- const InstructionsState &S = Legality.getInstructionsState();
+ auto AreOnlyConstsWithPHIs = [](ArrayRef<Value *> VL) {
+ bool AreConsts = false;
+ for (Value *V : VL) {
+ if (isa<PoisonValue>(V))
+ continue;
+ if (isa<Constant>(V)) {
+ AreConsts = true;
+ continue;
+ }
+ if (!isa<PHINode>(V))
+ return false;
+ }
+ return AreConsts;
+ };
+ if (AreOnlyConstsWithPHIs(VL)) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to all constants and PHIs.\n");
+ newGatherTreeEntry(VL, InstructionsState::invalid(), UserTreeIdx);
+ return;
+ }
+
+ ScalarsVectorizationLegality Legality = getScalarsVectorizationLegality(
+ VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/false);
+ InstructionsState S = Legality.getInstructionsState();
if (!Legality.isLegal()) {
if (Legality.trySplitVectorize()) {
auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL);
@@ -10252,11 +10662,18 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
return;
}
- if (Legality.tryToFindDuplicates())
- tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx);
+ if (!S)
+ Legality = getScalarsVectorizationLegality(
+ VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/true);
+ if (!Legality.isLegal()) {
+ if (Legality.tryToFindDuplicates())
+ tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S,
+ UserTreeIdx);
- newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
- return;
+ newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
+ return;
+ }
+ S = Legality.getInstructionsState();
}
// FIXME: investigate if there are profitable cases for VL.size() <= 4.
@@ -13024,7 +13441,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
assert(E->getOpcode() &&
((allSameType(VL) && allSameBlock(VL)) ||
(E->getOpcode() == Instruction::GetElementPtr &&
- E->getMainOp()->getType()->isPointerTy())) &&
+ E->getMainOp()->getType()->isPointerTy()) ||
+ E->hasCopyableElements()) &&
"Invalid VL");
Instruction *VL0 = E->getMainOp();
unsigned ShuffleOrOp =
@@ -13036,6 +13454,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
SmallBitVector UsedScalars(Sz, false);
for (unsigned I = 0; I < Sz; ++I) {
if (isa<Instruction>(UniqueValues[I]) &&
+ !E->isCopyableElement(UniqueValues[I]) &&
getTreeEntries(UniqueValues[I]).front() == E)
continue;
UsedScalars.set(I);
@@ -14075,15 +14494,45 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
// If the tree contains only phis, buildvectors, split nodes and
// small nodes with reuses, we can skip it.
+ SmallVector<const TreeEntry *> StoreLoadNodes;
+ unsigned NumGathers = 0;
+ constexpr int LimitTreeSize = 36;
if (!ForReduction && !SLPCostThreshold.getNumOccurrences() &&
- all_of(VectorizableTree, [](const std::unique_ptr<TreeEntry> &TE) {
- return TE->State == TreeEntry::SplitVectorize ||
- (TE->isGather() &&
- none_of(TE->Scalars, IsaPred<ExtractElementInst>)) ||
- (TE->hasState() && (TE->getOpcode() == Instruction::PHI ||
- (!TE->ReuseShuffleIndices.empty() &&
- TE->Scalars.size() == 2)));
- }))
+ all_of(VectorizableTree,
+ [&](const std::unique_ptr<TreeEntry> &TE) {
+ if (!TE->isGather() && TE->hasState() &&
+ (TE->getOpcode() == Instruction::Load ||
+ TE->getOpcode() == Instruction::Store)) {
+ StoreLoadNodes.push_back(TE.get());
+ return true;
+ }
+ if (TE->isGather())
+ ++NumGathers;
+ return TE->State == TreeEntry::SplitVectorize ||
+ (TE->Idx == 0 && TE->Scalars.size() == 2 &&
+ TE->hasState() && TE->getOpcode() == Instruction::ICmp &&
+ VectorizableTree.size() > LimitTreeSize) ||
+ (TE->isGather() &&
+ none_of(TE->Scalars, IsaPred<ExtractElementInst>)) ||
+ (TE->hasState() &&
+ (TE->getOpcode() == Instruction::PHI ||
+ (TE->hasCopyableElements() &&
+ static_cast<unsigned>(count_if(
+ TE->Scalars, IsaPred<PHINode, Constant>)) >=
+ TE->Scalars.size() / 2) ||
+ ((!TE->ReuseShuffleIndices.empty() ||
+ !TE->ReorderIndices.empty() || TE->isAltShuffle()) &&
+ TE->Scalars.size() == 2)));
+ }) &&
+ (StoreLoadNodes.empty() ||
+ (VectorizableTree.size() > LimitTreeSize * StoreLoadNodes.size() &&
+ (NumGathers > 0 || none_of(StoreLoadNodes, [&](const TreeEntry *TE) {
+ return TE->getOpcode() == Instruction::Store ||
+ all_of(TE->Scalars, [&](Value *V) {
+ return !isa<LoadInst>(V) ||
+ areAllUsersVectorized(cast<Instruction>(V));
+ });
+ })))))
return true;
// We can vectorize the tree if its size is greater than or equal to the
@@ -14826,6 +15275,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
bool IsProfitablePHIUser =
(KeepScalar || (ScalarCost - ExtraCost <= TTI::TCC_Basic &&
VectorizableTree.front()->Scalars.size() > 2)) &&
+ VectorizableTree.front()->hasState() &&
VectorizableTree.front()->getOpcode() == Instruction::PHI &&
!Inst->hasNUsesOrMore(UsesLimit) &&
none_of(Inst->users(),
@@ -15276,7 +15726,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
const BasicBlock *TEInsertBlock = nullptr;
// Main node of PHI entries keeps the correct order of operands/incoming
// blocks.
- if (auto *PHI = dyn_cast<PHINode>(TEUseEI.UserTE->getMainOp());
+ if (auto *PHI = dyn_cast_or_null<PHINode>(
+ TEUseEI.UserTE->hasState() ? TEUseEI.UserTE->getMainOp() : nullptr);
PHI && TEUseEI.UserTE->State != TreeEntry::SplitVectorize) {
TEInsertBlock = PHI->getIncomingBlock(TEUseEI.EdgeIdx);
TEInsertPt = TEInsertBlock->getTerminator();
@@ -15375,7 +15826,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
"Expected only single user of a gather node.");
const EdgeInfo &UseEI = TEPtr->UserTreeIndex;
- PHINode *UserPHI = UseEI.UserTE->State != TreeEntry::SplitVectorize
+ PHINode *UserPHI = (UseEI.UserTE->State != TreeEntry::SplitVectorize &&
+ UseEI.UserTE->hasState())
? dyn_cast<PHINode>(UseEI.UserTE->getMainOp())
: nullptr;
Instruction *InsertPt =
@@ -15388,7 +15840,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
TEUseEI.UserTE->isAltShuffle()) &&
all_of(TEUseEI.UserTE->Scalars, isUsedOutsideBlock)) {
if (UseEI.UserTE->State != TreeEntry::Vectorize ||
- (UseEI.UserTE->getOpcode() == Instruction::PHI &&
+ (UseEI.UserTE->hasState() &&
+ UseEI.UserTE->getOpcode() == Instruction::PHI &&
!UseEI.UserTE->isAltShuffle()) ||
!all_of(UseEI.UserTE->Scalars, isUsedOutsideBlock))
continue;
@@ -16009,25 +16462,32 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
Instruction *Res = nullptr;
// Get the basic block this bundle is in. All instructions in the bundle
// should be in this block (except for extractelement-like instructions with
- // constant indices or gathered loads).
- auto *Front = E->getMainOp();
+ // constant indices or gathered loads or copyables).
+ Instruction *Front;
+ unsigned Opcode;
+ if (E->hasState()) {
+ Front = E->getMainOp();
+ Opcode = E->getOpcode();
+ } else {
+ Front = cast<Instruction>(*find_if(E->Scalars, IsaPred<Instruction>));
+ Opcode = Front->getOpcode();
+ }
auto *BB = Front->getParent();
- assert(((GatheredLoadsEntriesFirst.has_value() &&
- E->getOpcode() == Instruction::Load && E->isGather() &&
- E->Idx < *GatheredLoadsEntriesFirst) ||
- E->State == TreeEntry::SplitVectorize ||
- all_of(E->Scalars,
- [=](Value *V) -> bool {
- if (E->getOpcode() == Instruction::GetElementPtr &&
- !isa<GetElementPtrInst>(V))
- return true;
- auto *I = dyn_cast<Instruction>(V);
- return !I || !E->getMatchingMainOpOrAltOp(I) ||
- I->getParent() == BB ||
- isVectorLikeInstWithConstOps(I);
- })) &&
- "Expected gathered loads or GEPs or instructions from same basic "
- "block.");
+ assert(
+ ((GatheredLoadsEntriesFirst.has_value() && Opcode == Instruction::Load &&
+ E->isGather() && E->Idx < *GatheredLoadsEntriesFirst) ||
+ E->State == TreeEntry::SplitVectorize || E->hasCopyableElements() ||
+ all_of(E->Scalars,
+ [=](Value *V) -> bool {
+ if (Opcode == Instruction::GetElementPtr &&
+ !isa<GetElementPtrInst>(V))
+ return true;
+ auto *I = dyn_cast<Instruction>(V);
+ return !I || !E->getMatchingMainOpOrAltOp(I) ||
+ I->getParent() == BB || isVectorLikeInstWithConstOps(I);
+ })) &&
+ "Expected gathered loads or GEPs or instructions from same basic "
+ "block.");
auto FindLastInst = [&]() {
Instruction *LastInst = Front;
@@ -16035,18 +16495,20 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
auto *I = dyn_cast<Instruction>(V);
if (!I)
continue;
+ if (E->isCopyableElement(I))
+ continue;
if (LastInst->getParent() == I->getParent()) {
if (LastInst->comesBefore(I))
LastInst = I;
continue;
}
- assert(((E->getOpcode() == Instruction::GetElementPtr &&
+ assert(((Opcode == Instruction::GetElementPtr &&
!isa<GetElementPtrInst>(I)) ||
E->State == TreeEntry::SplitVectorize ||
(isVectorLikeInstWithConstOps(LastInst) &&
isVectorLikeInstWithConstOps(I)) ||
(GatheredLoadsEntriesFirst.has_value() &&
- E->getOpcode() == Instruction::Load && E->isGather() &&
+ Opcode == Instruction::Load && E->isGather() &&
E->Idx < *GatheredLoadsEntriesFirst)) &&
"Expected vector-like or non-GEP in GEP node insts only.");
if (!DT->isReachableFromEntry(LastInst->getParent())) {
@@ -16075,16 +16537,18 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
auto *I = dyn_cast<Instruction>(V);
if (!I)
continue;
+ if (E->isCopyableElement(I))
+ continue;
if (FirstInst->getParent() == I->getParent()) {
if (I->comesBefore(FirstInst))
FirstInst = I;
continue;
}
- assert(((E->getOpcode() == Instruction::GetElementPtr &&
- !isa<GetElementPtrInst>(I)) ||
- (isVectorLikeInstWithConstOps(FirstInst) &&
- isVectorLikeInstWithConstOps(I))) &&
- "Expected vector-like or non-GEP in GEP node insts only.");
+ assert(((Opcode == Instruction::GetElementPtr &&
+ !isa<GetElementPtrInst>(I)) ||
+ (isVectorLikeInstWithConstOps(FirstInst) &&
+ isVectorLikeInstWithConstOps(I))) &&
+ "Expected vector-like or non-GEP in GEP node insts only.");
if (!DT->isReachableFromEntry(FirstInst->getParent())) {
FirstInst = I;
continue;
@@ -16122,7 +16586,7 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
// Set insertpoint for gathered loads to the very first load.
if (GatheredLoadsEntriesFirst.has_value() &&
E->Idx >= *GatheredLoadsEntriesFirst && !E->isGather() &&
- E->getOpcode() == Instruction::Load) {
+ Opcode == Instruction::Load) {
Res = FindFirstInst();
EntryToLastInstruction.try_emplace(E, Res);
return *Res;
@@ -16139,7 +16603,8 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
return nullptr;
for (Value *V : E->Scalars) {
auto *I = dyn_cast<Instruction>(V);
- if (!I || isa<PHINode>(I) || doesNotNeedToBeScheduled(I))
+ if (!I || isa<PHINode>(I) ||
+ (!E->isCopyableElement(I) && doesNotNeedToBeScheduled(I)))
continue;
ArrayRef<ScheduleBundle *> Bundles = It->second->getScheduleBundles(I);
if (Bundles.empty())
@@ -16153,13 +16618,13 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
};
const ScheduleBundle *Bundle = FindScheduleBundle(E);
if (!E->isGather() && !Bundle) {
- if ((E->getOpcode() == Instruction::GetElementPtr &&
+ if ((Opcode == Instruction::GetElementPtr &&
any_of(E->Scalars,
[](Value *V) {
return !isa<GetElementPtrInst>(V) && isa<Instruction>(V);
})) ||
- all_of(E->Scalars, [](Value *V) {
- return isa<PoisonValue>(V) ||
+ all_of(E->Scalars, [&](Value *V) {
+ return isa<PoisonValue>(V) || E->isCopyableElement(V) ||
(!isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V));
}))
Res = FindLastInst();
@@ -18640,6 +19105,7 @@ Value *BoUpSLP::vectorizeTree(
TE->UserTreeIndex.UserTE->State == TreeEntry::Vectorize &&
(TE->UserTreeIndex.UserTE->getOpcode() != Instruction::PHI ||
TE->UserTreeIndex.UserTE->isAltShuffle()) &&
+ !TE->UserTreeIndex.UserTE->hasCopyableElements() &&
all_of(TE->UserTreeIndex.UserTE->Scalars,
[](Value *V) { return isUsedOutsideBlock(V); })) {
Instruction &LastInst =
@@ -18903,7 +19369,7 @@ Value *BoUpSLP::vectorizeTree(
continue;
assert(
(ExternallyUsedValues.count(Scalar) ||
- Scalar->hasNUsesOrMore(UsesLimit) ||
+ ExternalUsesWithNonUsers.count(Scalar) ||
ExternalUsesAsOriginalScalar.contains(Scalar) ||
any_of(
Scalar->users(),
@@ -19182,7 +19648,7 @@ Value *BoUpSLP::vectorizeTree(
if (auto *EE = dyn_cast<ExtractElementInst>(Scalar);
EE && IgnoredExtracts.contains(EE))
continue;
- if (isa<PoisonValue>(Scalar))
+ if (!isa<Instruction>(Scalar) || Entry->isCopyableElement(Scalar))
continue;
#ifndef NDEBUG
Type *Ty = Scalar->getType();
@@ -19424,12 +19890,15 @@ void BoUpSLP::optimizeGatherSequence() {
}
BoUpSLP::ScheduleBundle &
-BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) {
+BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL,
+ const InstructionsState &S) {
auto &BundlePtr =
ScheduledBundlesList.emplace_back(std::make_unique<ScheduleBundle>());
for (Value *V : VL) {
if (doesNotNeedToBeScheduled(V))
continue;
+ if (S.isCopyableElement(V))
+ continue;
ScheduleData *BundleMember = getScheduleData(V);
assert(BundleMember && "no ScheduleData for bundle member "
"(maybe not in same basic block)");
@@ -19450,10 +19919,19 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
const InstructionsState &S) {
// No need to schedule PHIs, insertelement, extractelement and extractvalue
// instructions.
+ bool HasCopyables = S.areInstructionsWithCopyableElements();
if (isa<PHINode>(S.getMainOp()) ||
- isVectorLikeInstWithConstOps(S.getMainOp()) || doesNotNeedToSchedule(VL))
+ isVectorLikeInstWithConstOps(S.getMainOp()) ||
+ (!HasCopyables && doesNotNeedToSchedule(VL)) ||
+ all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))
return nullptr;
+ // TODO Remove once full support for copyables is landed.
+ assert(all_of(VL,
+ [&](Value *V) {
+ return !S.isCopyableElement(V) || S.isNonSchedulable(V);
+ }) &&
+ "Copyable elements should not be schedulable");
// Initialize the instruction bundle.
Instruction *OldScheduleEnd = ScheduleEnd;
LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.getMainOp() << "\n");
@@ -19499,7 +19977,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
// Make sure that the scheduling region contains all
// instructions of the bundle.
for (Value *V : VL) {
- if (doesNotNeedToBeScheduled(V))
+ if (doesNotNeedToBeScheduled(V) || S.isCopyableElement(V))
continue;
if (!extendSchedulingRegion(V, S)) {
// If the scheduling region got new instructions at the lower end (or it
@@ -19516,7 +19994,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
bool ReSchedule = false;
for (Value *V : VL) {
- if (doesNotNeedToBeScheduled(V))
+ if (doesNotNeedToBeScheduled(V) || S.isCopyableElement(V))
continue;
ScheduleData *BundleMember = getScheduleData(V);
assert(BundleMember &&
@@ -19541,7 +20019,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
ReSchedule = true;
}
- ScheduleBundle &Bundle = buildBundle(VL);
+ ScheduleBundle &Bundle = buildBundle(VL, S);
TryScheduleBundleImpl(ReSchedule, Bundle);
if (!Bundle.isReady()) {
for (ScheduleData *BD : Bundle.getBundle()) {
@@ -19558,7 +20036,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
}
ScheduledBundlesList.pop_back();
for (Value *V : VL) {
- if (doesNotNeedToBeScheduled(V))
+ if (doesNotNeedToBeScheduled(V) || S.isCopyableElement(V))
continue;
ScheduledBundles.find(cast<Instruction>(V))->getSecond().pop_back();
}
@@ -20187,7 +20665,7 @@ bool BoUpSLP::collectValuesToDemote(
};
if (E.isGather() || !Visited.insert(&E).second ||
any_of(E.Scalars, [&](Value *V) {
- return !isa<PoisonValue>(V) && all_of(V->users(), [&](User *U) {
+ return !isa<Constant>(V) && all_of(V->users(), [&](User *U) {
return isa<InsertElementInst>(U) && !isVectorized(U);
});
}))
@@ -20555,9 +21033,10 @@ void BoUpSLP::computeMinimumValueSizes() {
if (!isa<CastInst, BinaryOperator, FreezeInst, PHINode,
SelectInst>(U) ||
isa<SIToFPInst, UIToFPInst>(U) ||
- !isa<CastInst, BinaryOperator, FreezeInst, PHINode,
- SelectInst>(UserTE->getMainOp()) ||
- isa<SIToFPInst, UIToFPInst>(UserTE->getMainOp()))
+ (UserTE->hasState() &&
+ (!isa<CastInst, BinaryOperator, FreezeInst, PHINode,
+ SelectInst>(UserTE->getMainOp()) ||
+ isa<SIToFPInst, UIToFPInst>(UserTE->getMainOp()))))
return true;
unsigned UserTESz = DL->getTypeSizeInBits(
UserTE->Scalars.front()->getType());
@@ -20653,7 +21132,12 @@ void BoUpSLP::computeMinimumValueSizes() {
if (!IsKnownPositive)
++BitWidth1;
- APInt Mask = DB->getDemandedBits(cast<Instruction>(Root));
+ auto *I = dyn_cast<Instruction>(Root);
+ if (!I) {
+ MaxBitWidth = std::max(BitWidth1, MaxBitWidth);
+ continue;
+ }
+ APInt Mask = DB->getDemandedBits(I);
unsigned BitWidth2 = Mask.getBitWidth() - Mask.countl_zero();
MaxBitWidth =
std::max<unsigned>(std::min(BitWidth1, BitWidth2), MaxBitWidth);
@@ -20802,6 +21286,7 @@ void BoUpSLP::computeMinimumValueSizes() {
NodeIdx < VectorizableTree.size() &&
VectorizableTree[NodeIdx]->UserTreeIndex &&
VectorizableTree[NodeIdx]->UserTreeIndex.EdgeIdx == 0 &&
+ VectorizableTree[NodeIdx]->UserTreeIndex.UserTE->hasState() &&
VectorizableTree[NodeIdx]->UserTreeIndex.UserTE->getOpcode() ==
Instruction::Trunc &&
!VectorizableTree[NodeIdx]->UserTreeIndex.UserTE->isAltShuffle();
@@ -20982,7 +21467,9 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
for (Value *V : Chain)
ValOps.insert(cast<StoreInst>(V)->getValueOperand());
// Operands are not same/alt opcodes or non-power-of-2 uniques - exit.
- InstructionsState S = getSameOpcode(ValOps.getArrayRef(), *TLI);
+ InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI);
+ InstructionsState S = Analysis.buildInstructionsState(
+ ValOps.getArrayRef(), R, /*TryCopyableElementsVectorization=*/true);
if (all_of(ValOps, IsaPred<Instruction>) && ValOps.size() > 1) {
DenseSet<Value *> Stores(Chain.begin(), Chain.end());
bool IsAllowedSize =
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 40a5565..25b9616 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -962,7 +962,11 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
BackedgeTakenCount->setUnderlyingValue(TCMO);
}
- VectorTripCount.setUnderlyingValue(VectorTripCountV);
+ if (!VectorTripCount.getUnderlyingValue())
+ VectorTripCount.setUnderlyingValue(VectorTripCountV);
+ else
+ assert(VectorTripCount.getUnderlyingValue() == VectorTripCountV &&
+ "VectorTripCount set earlier must much VectorTripCountV");
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
// FIXME: Model VF * UF computation completely in VPlan.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 6655149..a5de593 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1012,6 +1012,10 @@ public:
ReductionStartVector,
// Creates a step vector starting from 0 to VF with a step of 1.
StepVector,
+ /// Extracts a single lane (first operand) from a set of vector operands.
+ /// The lane specifies an index into a vector formed by combining all vector
+ /// operands (all operands after the first one).
+ ExtractLane,
};
@@ -1837,6 +1841,10 @@ public:
getGEPNoWrapFlags(), getDebugLoc());
}
+ /// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that
+ /// this is only accurate after the VPlan has been unrolled.
+ bool isFirstPart() const { return getUnrollPart(*this) == 0; }
+
/// Return the cost of this VPHeaderPHIRecipe.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override {
@@ -2304,14 +2312,15 @@ public:
/// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
/// be omitted (implied by passing an odd number of operands) in which case
/// all other incoming values are merged into it.
- VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
- : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
+ VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands, DebugLoc DL)
+ : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
assert(Operands.size() > 0 && "Expected at least one operand!");
}
VPBlendRecipe *clone() override {
SmallVector<VPValue *> Ops(operands());
- return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
+ return new VPBlendRecipe(cast_or_null<PHINode>(getUnderlyingValue()), Ops,
+ getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPBlendSC)
@@ -4056,6 +4065,10 @@ public:
/// Returns VF * UF of the vector loop region.
VPValue &getVFxUF() { return VFxUF; }
+ LLVMContext &getContext() const {
+ return getScalarHeader()->getIRBasicBlock()->getContext();
+ }
+
void addVF(ElementCount VF) { VFs.insert(VF); }
void setVF(ElementCount VF) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 3499e65..16072f2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -110,6 +110,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::BuildStructVector:
case VPInstruction::BuildVector:
return SetResultTyFromOp();
+ case VPInstruction::ExtractLane:
+ return inferScalarType(R->getOperand(1));
case VPInstruction::FirstActiveLane:
return Type::getIntNTy(Ctx, 64);
case VPInstruction::ExtractLastElement:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 194874a..6c1f53b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -437,9 +437,12 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
// We are about to replace the branch to exit the region. Remove the original
// BranchOnCond, if there is any.
+ DebugLoc LatchDL = DL;
if (!LatchVPBB->empty() &&
- match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue())))
+ match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue()))) {
+ LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
LatchVPBB->getTerminator()->eraseFromParent();
+ }
VPBuilder Builder(LatchVPBB);
// Add a VPInstruction to increment the scalar canonical IV by VF * UF.
@@ -452,7 +455,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
// Add the BranchOnCount VPInstruction to the latch.
Builder.createNaryOp(VPInstruction::BranchOnCount,
- {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
+ {CanonicalIVIncrement, &Plan.getVectorTripCount()},
+ LatchDL);
}
void VPlanTransforms::prepareForVectorization(
@@ -462,28 +466,27 @@ void VPlanTransforms::prepareForVectorization(
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
- VPBlockBase *HeaderVPB = Plan.getEntry()->getSingleSuccessor();
- canonicalHeaderAndLatch(HeaderVPB, VPDT);
- VPBlockBase *LatchVPB = HeaderVPB->getPredecessors()[1];
+ auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
+ canonicalHeaderAndLatch(HeaderVPBB, VPDT);
+ auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]);
VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
- // The canonical LatchVPB has the header block as last successor. If it has
+ // The canonical LatchVPBB has the header block as last successor. If it has
// another successor, this successor is an exit block - insert middle block on
// its edge. Otherwise, add middle block as another successor retaining header
// as last.
- if (LatchVPB->getNumSuccessors() == 2) {
- VPBlockBase *LatchExitVPB = LatchVPB->getSuccessors()[0];
- VPBlockUtils::insertOnEdge(LatchVPB, LatchExitVPB, MiddleVPBB);
+ if (LatchVPBB->getNumSuccessors() == 2) {
+ VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0];
+ VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB);
} else {
- VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB);
- LatchVPB->swapSuccessors();
+ VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB);
+ LatchVPBB->swapSuccessors();
}
- addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
- cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
+ addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
[[maybe_unused]] bool HandledUncountableEarlyExit = false;
// Disconnect all early exits from the loop leaving it with a single exit from
@@ -499,8 +502,7 @@ void VPlanTransforms::prepareForVectorization(
assert(!HandledUncountableEarlyExit &&
"can handle exactly one uncountable early exit");
handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan,
- cast<VPBasicBlock>(HeaderVPB),
- cast<VPBasicBlock>(LatchVPB), Range);
+ HeaderVPBB, LatchVPBB, Range);
HandledUncountableEarlyExit = true;
} else {
for (VPRecipeBase &R : EB->phis())
@@ -564,15 +566,15 @@ void VPlanTransforms::prepareForVectorization(
// the corresponding compare because they may have ended up with different
// line numbers and we want to avoid awkward line stepping while debugging.
// E.g., if the compare has got a line number inside the loop.
- DebugLoc LatchDL = TheLoop->getLoopLatch()->getTerminator()->getDebugLoc();
+ DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
VPBuilder Builder(MiddleVPBB);
VPValue *Cmp;
if (!RequiresScalarEpilogueCheck)
- Cmp = Plan.getOrAddLiveIn(ConstantInt::getFalse(
- IntegerType::getInt1Ty(TripCount->getType()->getContext())));
+ Cmp = Plan.getOrAddLiveIn(
+ ConstantInt::getFalse(IntegerType::getInt1Ty(Plan.getContext())));
else if (TailFolded)
- Cmp = Plan.getOrAddLiveIn(ConstantInt::getTrue(
- IntegerType::getInt1Ty(TripCount->getType()->getContext())));
+ Cmp = Plan.getOrAddLiveIn(
+ ConstantInt::getTrue(IntegerType::getInt1Ty(Plan.getContext())));
else
Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
&Plan.getVectorTripCount(), LatchDL, "cmp.n");
@@ -646,7 +648,7 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
.createNaryOp(VPInstruction::BranchOnCond, {CondVPV},
Plan.getCanonicalIV()->getDebugLoc());
if (AddBranchWeights) {
- MDBuilder MDB(Plan.getScalarHeader()->getIRBasicBlock()->getContext());
+ MDBuilder MDB(Plan.getContext());
MDNode *BranchWeights =
MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false);
Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
index fc8458c..3b3bbc3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -251,8 +251,9 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
}
OperandsWithMask.push_back(EdgeMask);
}
- PHINode *IRPhi = cast<PHINode>(PhiR->getUnderlyingValue());
- auto *Blend = new VPBlendRecipe(IRPhi, OperandsWithMask);
+ PHINode *IRPhi = cast_or_null<PHINode>(PhiR->getUnderlyingValue());
+ auto *Blend =
+ new VPBlendRecipe(IRPhi, OperandsWithMask, PhiR->getDebugLoc());
Builder.insert(Blend);
PhiR->replaceAllUsesWith(Blend);
PhiR->eraseFromParent();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 0d6152b..225658b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -427,6 +427,7 @@ unsigned VPUnrollPartAccessor<PartOpIdx>::getUnrollPart(const VPUser &U) const {
}
namespace llvm {
+template class VPUnrollPartAccessor<1>;
template class VPUnrollPartAccessor<2>;
template class VPUnrollPartAccessor<3>;
}
@@ -863,6 +864,31 @@ Value *VPInstruction::generate(VPTransformState &State) {
Res = Builder.CreateOr(Res, State.get(Op));
return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res);
}
+ case VPInstruction::ExtractLane: {
+ Value *LaneToExtract = State.get(getOperand(0), true);
+ Type *IdxTy = State.TypeAnalysis.inferScalarType(getOperand(0));
+ Value *Res = nullptr;
+ Value *RuntimeVF = getRuntimeVF(State.Builder, IdxTy, State.VF);
+
+ for (unsigned Idx = 1; Idx != getNumOperands(); ++Idx) {
+ Value *VectorStart =
+ Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1));
+ Value *VectorIdx = Idx == 1
+ ? LaneToExtract
+ : Builder.CreateSub(LaneToExtract, VectorStart);
+ Value *Ext = State.VF.isScalar()
+ ? State.get(getOperand(Idx))
+ : Builder.CreateExtractElement(
+ State.get(getOperand(Idx)), VectorIdx);
+ if (Res) {
+ Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart);
+ Res = Builder.CreateSelect(Cmp, Ext, Res);
+ } else {
+ Res = Ext;
+ }
+ }
+ return Res;
+ }
case VPInstruction::FirstActiveLane: {
if (getNumOperands() == 1) {
Value *Mask = State.get(getOperand(0));
@@ -921,7 +947,8 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
}
switch (getOpcode()) {
- case Instruction::ExtractElement: {
+ case Instruction::ExtractElement:
+ case VPInstruction::ExtractLane: {
// Add on the cost of extracting the element.
auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
return Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy,
@@ -983,6 +1010,7 @@ bool VPInstruction::isVectorToScalar() const {
return getOpcode() == VPInstruction::ExtractLastElement ||
getOpcode() == VPInstruction::ExtractPenultimateElement ||
getOpcode() == Instruction::ExtractElement ||
+ getOpcode() == VPInstruction::ExtractLane ||
getOpcode() == VPInstruction::FirstActiveLane ||
getOpcode() == VPInstruction::ComputeAnyOfResult ||
getOpcode() == VPInstruction::ComputeFindIVResult ||
@@ -1048,6 +1076,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::BuildVector:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
+ case VPInstruction::ExtractLane:
case VPInstruction::ExtractLastElement:
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::FirstActiveLane:
@@ -1097,6 +1126,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::ComputeAnyOfResult:
case VPInstruction::ComputeFindIVResult:
return Op == getOperand(1);
+ case VPInstruction::ExtractLane:
+ return Op == getOperand(0);
};
llvm_unreachable("switch should return");
}
@@ -1176,6 +1207,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::BuildVector:
O << "buildvector";
break;
+ case VPInstruction::ExtractLane:
+ O << "extract-lane";
+ break;
case VPInstruction::ExtractLastElement:
O << "extract-last-element";
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 3372bcc..935a4e4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -774,10 +774,10 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
using namespace VPlanPatternMatch;
VPValue *Incoming, *Mask;
- if (!match(Op, m_VPInstruction<Instruction::ExtractElement>(
- m_VPValue(Incoming),
+ if (!match(Op, m_VPInstruction<VPInstruction::ExtractLane>(
m_VPInstruction<VPInstruction::FirstActiveLane>(
- m_VPValue(Mask)))))
+ m_VPValue(Mask)),
+ m_VPValue(Incoming))))
return nullptr;
auto *WideIV = getOptimizableIVOf(Incoming);
@@ -1172,6 +1172,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
if (!Plan->isUnrolled())
return;
+ // VPVectorPointer for part 0 can be replaced by their start pointer.
+ if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) {
+ if (VecPtr->isFirstPart()) {
+ VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
+ return;
+ }
+ }
+
// VPScalarIVSteps for part 0 can be replaced by their start value, if only
// the first lane is demanded.
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) {
@@ -1307,8 +1315,9 @@ static void simplifyBlends(VPlan &Plan) {
OperandsWithMask.push_back(Blend->getMask(I));
}
- auto *NewBlend = new VPBlendRecipe(
- cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
+ auto *NewBlend =
+ new VPBlendRecipe(cast_or_null<PHINode>(Blend->getUnderlyingValue()),
+ OperandsWithMask, Blend->getDebugLoc());
NewBlend->insertBefore(&R);
VPValue *DeadMask = Blend->getMask(StartIndex);
@@ -1361,7 +1370,7 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
unsigned NewBitWidth =
ComputeBitWidth(TC->getValue(), BestVF.getKnownMinValue() * BestUF);
- LLVMContext &Ctx = Plan.getCanonicalIV()->getScalarType()->getContext();
+ LLVMContext &Ctx = Plan.getContext();
auto *NewIVTy = IntegerType::get(Ctx, NewBitWidth);
bool MadeChange = false;
@@ -1883,9 +1892,7 @@ void VPlanTransforms::truncateToMinimalBitwidths(
}
}
-/// Remove BranchOnCond recipes with true or false conditions together with
-/// removing dead edges to their successors.
-static void removeBranchOnConst(VPlan &Plan) {
+void VPlanTransforms::removeBranchOnConst(VPlan &Plan) {
using namespace llvm::VPlanPatternMatch;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry()))) {
@@ -1908,12 +1915,9 @@ static void removeBranchOnConst(VPlan &Plan) {
"There must be a single edge between VPBB and its successor");
// Values coming from VPBB into phi recipes of RemoveSucc are removed from
// these recipes.
- for (VPRecipeBase &R : RemovedSucc->phis()) {
- auto *Phi = cast<VPPhiAccessors>(&R);
- assert((!isa<VPIRPhi>(&R) || RemovedSucc->getNumPredecessors() == 1) &&
- "VPIRPhis must have a single predecessor");
- Phi->removeIncomingValueFor(VPBB);
- }
+ for (VPRecipeBase &R : RemovedSucc->phis())
+ cast<VPPhiAccessors>(&R)->removeIncomingValueFor(VPBB);
+
// Disconnect blocks and remove the terminator. RemovedSucc will be deleted
// automatically on VPlan destruction if it becomes unreachable.
VPBlockUtils::disconnectBlocks(VPBB, RemovedSucc);
@@ -2515,8 +2519,8 @@ void VPlanTransforms::createInterleaveGroups(
DL.getTypeAllocSize(getLoadStoreType(IRInsertPos)) *
IG->getIndex(IRInsertPos),
/*IsSigned=*/true);
- VPValue *OffsetVPV = Plan.getOrAddLiveIn(
- ConstantInt::get(IRInsertPos->getParent()->getContext(), -Offset));
+ VPValue *OffsetVPV =
+ Plan.getOrAddLiveIn(ConstantInt::get(Plan.getContext(), -Offset));
VPBuilder B(InsertPos);
Addr = InBounds ? B.createInBoundsPtrAdd(InsertPos->getAddr(), OffsetVPV)
: B.createPtrAdd(InsertPos->getAddr(), OffsetVPV);
@@ -2842,7 +2846,7 @@ void VPlanTransforms::handleUncountableEarlyExit(
VPInstruction::FirstActiveLane, {CondToEarlyExit}, nullptr,
"first.active.lane");
IncomingFromEarlyExit = EarlyExitB.createNaryOp(
- Instruction::ExtractElement, {IncomingFromEarlyExit, FirstActiveLane},
+ VPInstruction::ExtractLane, {FirstActiveLane, IncomingFromEarlyExit},
nullptr, "early.exit.value");
ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit);
}
@@ -3093,6 +3097,29 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
}
}
+void VPlanTransforms::materializeVectorTripCount(
+ VPlan &Plan, ElementCount BestVF, unsigned BestUF,
+ PredicatedScalarEvolution &PSE) {
+ assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
+ assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
+
+ VPValue *TC = Plan.getTripCount();
+ // Skip cases for which the trip count may be non-trivial to materialize.
+ if (!Plan.hasScalarTail() ||
+ Plan.getMiddleBlock()->getSingleSuccessor() ==
+ Plan.getScalarPreheader() ||
+ !TC->isLiveIn())
+ return;
+ // Materialize vector trip counts for constants early if it can simply
+ // be computed as (Original TC / VF * UF) * VF * UF.
+ ScalarEvolution &SE = *PSE.getSE();
+ auto *TCScev = SE.getSCEV(TC->getLiveInIRValue());
+ const SCEV *VFxUF = SE.getElementCount(TCScev->getType(), BestVF * BestUF);
+ auto VecTCScev = SE.getMulExpr(SE.getUDivExpr(TCScev, VFxUF), VFxUF);
+ if (auto *NewC = dyn_cast<SCEVConstant>(VecTCScev))
+ Plan.getVectorTripCount().setUnderlyingValue(NewC->getValue());
+}
+
/// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be
/// converted to a narrower recipe. \p V is used by a wide recipe that feeds a
/// store interleave group at index \p Idx, \p WideMember0 is the recipe feeding
@@ -3350,7 +3377,7 @@ void VPlanTransforms::addBranchWeightToMiddleTerminator(
if (VF.isScalable() && VScaleForTuning.has_value())
VectorStep *= *VScaleForTuning;
assert(VectorStep > 0 && "trip count should not be zero");
- MDBuilder MDB(Plan.getScalarHeader()->getIRBasicBlock()->getContext());
+ MDBuilder MDB(Plan.getContext());
MDNode *BranchWeights =
MDB.createBranchWeights({1, VectorStep - 1}, /*IsExpected=*/false);
MiddleTerm->addMetadata(LLVMContext::MD_prof, BranchWeights);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index ab189f6..d5af6cd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -224,6 +224,10 @@ struct VPlanTransforms {
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy);
+ /// Remove BranchOnCond recipes with true or false conditions together with
+ /// removing dead edges to their successors.
+ static void removeBranchOnConst(VPlan &Plan);
+
/// If there's a single exit block, optimize its phi recipes that use exiting
/// IV values by feeding them precomputed end values instead, possibly taken
/// one step backwards.
@@ -234,6 +238,12 @@ struct VPlanTransforms {
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
static void materializeBroadcasts(VPlan &Plan);
+ // Materialize vector trip counts for constants early if it can simply be
+ // computed as (Original TC / VF * UF) * VF * UF.
+ static void materializeVectorTripCount(VPlan &Plan, ElementCount BestVF,
+ unsigned BestUF,
+ PredicatedScalarEvolution &PSE);
+
/// Try to convert a plan with interleave groups with VF elements to a plan
/// with the interleave groups replaced by wide loads and stores processing VF
/// elements, if all transformed interleave groups access the full vector
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index b89cd21..871e37e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -363,6 +363,13 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
continue;
}
VPValue *Op0;
+ if (match(&R, m_VPInstruction<VPInstruction::ExtractLane>(
+ m_VPValue(Op0), m_VPValue(Op1)))) {
+ addUniformForAllParts(cast<VPInstruction>(&R));
+ for (unsigned Part = 1; Part != UF; ++Part)
+ R.addOperand(getValueForPart(Op1, Part));
+ continue;
+ }
if (match(&R, m_VPInstruction<VPInstruction::ExtractLastElement>(
m_VPValue(Op0))) ||
match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(