aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/DependenceAnalysis.cpp19
-rw-r--r--llvm/lib/Analysis/RegionPrinter.cpp11
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp11
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CallLowering.cpp7
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp69
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIParser.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineInstr.cpp8
-rw-r--r--llvm/lib/CodeGen/MachineInstrBundle.cpp6
-rw-r--r--llvm/lib/CodeGen/SafeStack.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp166
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h11
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp8
-rw-r--r--llvm/lib/CodeGen/TwoAddressInstructionPass.cpp11
-rw-r--r--llvm/lib/IR/DebugInfoMetadata.cpp21
-rw-r--r--llvm/lib/IR/Verifier.cpp6
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp5
-rw-r--r--llvm/lib/ProfileData/InstrProfWriter.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp41
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp3
-rw-r--r--llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp1
-rw-r--r--llvm/lib/Target/BPF/BPFISelLowering.cpp20
-rw-r--r--llvm/lib/Target/BPF/BPFISelLowering.h14
-rw-r--r--llvm/lib/Target/BPF/BPFInstrInfo.td6
-rw-r--r--llvm/lib/Target/BPF/BPFPreserveDIType.cpp4
-rw-r--r--llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp18
-rw-r--r--llvm/lib/Target/BPF/BPFSelectionDAGInfo.h10
-rw-r--r--llvm/lib/Target/BPF/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/DirectX/DXILDataScalarization.cpp68
-rw-r--r--llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp4
-rw-r--r--llvm/lib/Target/DirectX/DXILOpLowering.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.h1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp145
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp3
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp45
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp7
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td66
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp11
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.td5
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.td6
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp7
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp16
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td3
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp11
-rw-r--r--llvm/lib/TargetParser/TargetParser.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp51
-rw-r--r--llvm/lib/Transforms/Utils/BypassSlowDivision.cpp32
-rw-r--r--llvm/lib/Transforms/Utils/LoopSimplify.cpp60
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp33
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp9
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h65
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp10
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp4
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanValue.h6
62 files changed, 828 insertions, 367 deletions
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 11d8294..e45d1f7 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -1587,6 +1587,15 @@ static const SCEV *minusSCEVNoSignedOverflow(const SCEV *A, const SCEV *B,
return nullptr;
}
+/// Returns \p A * \p B if it guaranteed not to signed wrap. Otherwise returns
+/// nullptr. \p A and \p B must have the same integer type.
+static const SCEV *mulSCEVNoSignedOverflow(const SCEV *A, const SCEV *B,
+ ScalarEvolution &SE) {
+ if (SE.willNotOverflow(Instruction::Mul, /*Signed=*/true, A, B))
+ return SE.getMulExpr(A, B);
+ return nullptr;
+}
+
/// Returns the absolute value of \p A. In the context of dependence analysis,
/// we need an absolute value in a mathematical sense. If \p A is the signed
/// minimum value, we cannot represent it unless extending the original type.
@@ -1686,7 +1695,11 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
assert(0 < Level && Level <= CommonLevels && "level out of range");
Level--;
- const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
+ const SCEV *Delta = minusSCEVNoSignedOverflow(SrcConst, DstConst, *SE);
+ if (!Delta) {
+ Result.Consistent = false;
+ return false;
+ }
LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta);
LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
@@ -1702,7 +1715,9 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
const SCEV *AbsCoeff = absSCEVNoSignedOverflow(Coeff, *SE);
if (!AbsDelta || !AbsCoeff)
return false;
- const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);
+ const SCEV *Product = mulSCEVNoSignedOverflow(UpperBound, AbsCoeff, *SE);
+ if (!Product)
+ return false;
return isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product);
}();
if (IsDeltaLarge) {
diff --git a/llvm/lib/Analysis/RegionPrinter.cpp b/llvm/lib/Analysis/RegionPrinter.cpp
index a83af4e..33e073b 100644
--- a/llvm/lib/Analysis/RegionPrinter.cpp
+++ b/llvm/lib/Analysis/RegionPrinter.cpp
@@ -29,10 +29,9 @@ onlySimpleRegions("only-simple-regions",
cl::Hidden,
cl::init(false));
-namespace llvm {
-
-std::string DOTGraphTraits<RegionNode *>::getNodeLabel(RegionNode *Node,
- RegionNode *Graph) {
+std::string
+llvm::DOTGraphTraits<RegionNode *>::getNodeLabel(RegionNode *Node,
+ RegionNode *Graph) {
if (!Node->isSubRegion()) {
BasicBlock *BB = Node->getNodeAs<BasicBlock>();
@@ -46,7 +45,8 @@ std::string DOTGraphTraits<RegionNode *>::getNodeLabel(RegionNode *Node,
}
template <>
-struct DOTGraphTraits<RegionInfo *> : public DOTGraphTraits<RegionNode *> {
+struct llvm::DOTGraphTraits<RegionInfo *>
+ : public llvm::DOTGraphTraits<RegionNode *> {
DOTGraphTraits (bool isSimple = false)
: DOTGraphTraits<RegionNode*>(isSimple) {}
@@ -125,7 +125,6 @@ struct DOTGraphTraits<RegionInfo *> : public DOTGraphTraits<RegionNode *> {
printRegionCluster(*G->getTopLevelRegion(), GW, 4);
}
};
-} // end namespace llvm
namespace {
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 713277d..3aa245b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2087,6 +2087,17 @@ void AsmPrinter::emitFunctionBody() {
// This is only used to influence register allocation behavior, no
// actual initialization is needed.
break;
+ case TargetOpcode::RELOC_NONE: {
+ // Generate a temporary label for the current PC.
+ MCSymbol *Sym = OutContext.createTempSymbol("reloc_none");
+ OutStreamer->emitLabel(Sym);
+ const MCExpr *Dot = MCSymbolRefExpr::create(Sym, OutContext);
+ const MCExpr *Value = MCSymbolRefExpr::create(
+ OutContext.getOrCreateSymbol(MI.getOperand(0).getSymbolName()),
+ OutContext);
+ OutStreamer->emitRelocDirective(*Dot, "BFD_RELOC_NONE", Value, SMLoc());
+ break;
+ }
default:
emitInstruction(&MI);
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index b3c3125..7be7468 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -292,7 +292,8 @@ void CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
LLVMContext &Ctx = OrigArg.Ty->getContext();
SmallVector<EVT, 4> SplitVTs;
- ComputeValueVTs(*TLI, DL, OrigArg.Ty, SplitVTs, Offsets, 0);
+ ComputeValueVTs(*TLI, DL, OrigArg.Ty, SplitVTs, /*MemVTs=*/nullptr, Offsets,
+ 0);
if (SplitVTs.size() == 0)
return;
@@ -996,7 +997,7 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
SmallVector<EVT, 4> SplitVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0);
+ ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, /*MemVTs=*/nullptr, &Offsets, 0);
assert(VRegs.size() == SplitVTs.size());
@@ -1028,7 +1029,7 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
SmallVector<EVT, 4> SplitVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0);
+ ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, /*MemVTs=*/nullptr, &Offsets, 0);
assert(VRegs.size() == SplitVTs.size());
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 4fd2204..4f6a19f 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2686,6 +2686,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::experimental_convergence_entry:
case Intrinsic::experimental_convergence_loop:
return translateConvergenceControlIntrinsic(CI, ID, MIRBuilder);
+ case Intrinsic::reloc_none: {
+ Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(0))->getMetadata();
+ StringRef SymbolName = cast<MDString>(MD)->getString();
+ MIRBuilder.buildInstr(TargetOpcode::RELOC_NONE)
+ .addExternalSymbol(SymbolName.data());
+ return true;
+ }
}
return false;
}
@@ -2821,20 +2828,34 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (translateKnownIntrinsic(CI, ID, MIRBuilder))
return true;
+ TargetLowering::IntrinsicInfo Info;
+ bool IsTgtMemIntrinsic = TLI->getTgtMemIntrinsic(Info, CI, *MF, ID);
+
+ return translateIntrinsic(CI, ID, MIRBuilder,
+ IsTgtMemIntrinsic ? &Info : nullptr);
+}
+
+/// Translate a call to an intrinsic.
+/// Depending on whether TLI->getTgtMemIntrinsic() is true, TgtMemIntrinsicInfo
+/// is a pointer to the correspondingly populated IntrinsicInfo object.
+/// Otherwise, this pointer is null.
+bool IRTranslator::translateIntrinsic(
+ const CallBase &CB, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder,
+ const TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) {
ArrayRef<Register> ResultRegs;
- if (!CI.getType()->isVoidTy())
- ResultRegs = getOrCreateVRegs(CI);
+ if (!CB.getType()->isVoidTy())
+ ResultRegs = getOrCreateVRegs(CB);
// Ignore the callsite attributes. Backend code is most likely not expecting
// an intrinsic to sometimes have side effects and sometimes not.
MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, ResultRegs);
- if (isa<FPMathOperator>(CI))
- MIB->copyIRFlags(CI);
+ if (isa<FPMathOperator>(CB))
+ MIB->copyIRFlags(CB);
- for (const auto &Arg : enumerate(CI.args())) {
+ for (const auto &Arg : enumerate(CB.args())) {
// If this is required to be an immediate, don't materialize it in a
// register.
- if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
+ if (CB.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) {
// imm arguments are more convenient than cimm (and realistically
// probably sufficient), so use them.
@@ -2863,29 +2884,33 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
}
// Add a MachineMemOperand if it is a target mem intrinsic.
- TargetLowering::IntrinsicInfo Info;
- // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
- if (TLI->getTgtMemIntrinsic(Info, CI, *MF, ID)) {
- Align Alignment = Info.align.value_or(
- DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));
- LLT MemTy = Info.memVT.isSimple()
- ? getLLTForMVT(Info.memVT.getSimpleVT())
- : LLT::scalar(Info.memVT.getStoreSizeInBits());
+ if (TgtMemIntrinsicInfo) {
+ const Function *F = CB.getCalledFunction();
+
+ Align Alignment = TgtMemIntrinsicInfo->align.value_or(DL->getABITypeAlign(
+ TgtMemIntrinsicInfo->memVT.getTypeForEVT(F->getContext())));
+ LLT MemTy =
+ TgtMemIntrinsicInfo->memVT.isSimple()
+ ? getLLTForMVT(TgtMemIntrinsicInfo->memVT.getSimpleVT())
+ : LLT::scalar(TgtMemIntrinsicInfo->memVT.getStoreSizeInBits());
// TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
// didn't yield anything useful.
MachinePointerInfo MPI;
- if (Info.ptrVal)
- MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
- else if (Info.fallbackAddressSpace)
- MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
+ if (TgtMemIntrinsicInfo->ptrVal) {
+ MPI = MachinePointerInfo(TgtMemIntrinsicInfo->ptrVal,
+ TgtMemIntrinsicInfo->offset);
+ } else if (TgtMemIntrinsicInfo->fallbackAddressSpace) {
+ MPI = MachinePointerInfo(*TgtMemIntrinsicInfo->fallbackAddressSpace);
+ }
MIB.addMemOperand(MF->getMachineMemOperand(
- MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata(),
- /*Ranges=*/nullptr, Info.ssid, Info.order, Info.failureOrder));
+ MPI, TgtMemIntrinsicInfo->flags, MemTy, Alignment, CB.getAAMetadata(),
+ /*Ranges=*/nullptr, TgtMemIntrinsicInfo->ssid,
+ TgtMemIntrinsicInfo->order, TgtMemIntrinsicInfo->failureOrder));
}
- if (CI.isConvergent()) {
- if (auto Bundle = CI.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ if (CB.isConvergent()) {
+ if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) {
auto *Token = Bundle->Inputs[0].get();
Register TokenReg = getOrCreateVReg(*Token);
MIB.addUse(TokenReg, RegState::Implicit);
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 4795d81..434a579 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -1161,6 +1161,8 @@ bool MIParser::parse(MachineInstr *&MI) {
MemOperands.push_back(MemOp);
if (Token.isNewlineOrEOF())
break;
+ if (OpCode == TargetOpcode::BUNDLE && Token.is(MIToken::lbrace))
+ break;
if (Token.isNot(MIToken::comma))
return error("expected ',' before the next machine memory operand");
lex();
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 8ad9245..37e5c51 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1547,10 +1547,14 @@ bool MachineInstr::mayAlias(BatchAAResults *AA, const MachineInstr &Other,
// Check each pair of memory operands from both instructions, which can't
// alias only if all pairs won't alias.
- for (auto *MMOa : memoperands())
- for (auto *MMOb : Other.memoperands())
+ for (auto *MMOa : memoperands()) {
+ for (auto *MMOb : Other.memoperands()) {
+ if (!MMOa->isStore() && !MMOb->isStore())
+ continue;
if (MemOperandsHaveAlias(MFI, AA, UseTBAA, MMOa, MMOb))
return true;
+ }
+ }
return false;
}
diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp
index f4c1a8b..fa654f2 100644
--- a/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -143,6 +143,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
SmallSet<Register, 8> KilledUseSet;
SmallSet<Register, 8> UndefUseSet;
SmallVector<std::pair<Register, Register>> TiedOperands;
+ SmallVector<MachineInstr *> MemMIs;
for (auto MII = FirstMI; MII != LastMI; ++MII) {
// Debug instructions have no effects to track.
if (MII->isDebugInstr())
@@ -206,6 +207,9 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
MIB.setMIFlag(MachineInstr::FrameSetup);
if (MII->getFlag(MachineInstr::FrameDestroy))
MIB.setMIFlag(MachineInstr::FrameDestroy);
+
+ if (MII->mayLoadOrStore())
+ MemMIs.push_back(&*MII);
}
for (Register Reg : LocalDefs) {
@@ -231,6 +235,8 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
assert(UseIdx < ExternUses.size());
MIB->tieOperands(DefIdx, LocalDefs.size() + UseIdx);
}
+
+ MIB->cloneMergedMemRefs(MF, MemMIs);
}
/// finalizeBundle - Same functionality as the previous finalizeBundle except
diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp
index e9ffa85..6b747f3 100644
--- a/llvm/lib/CodeGen/SafeStack.cpp
+++ b/llvm/lib/CodeGen/SafeStack.cpp
@@ -196,8 +196,6 @@ public:
bool run();
};
-constexpr Align SafeStack::StackAlignment;
-
uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType());
if (AI->isArrayAllocation()) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index fa0c899..2f598b2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3526,8 +3526,7 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
// Update successor info.
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
- for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
- BasicBlock *Dest = I.getIndirectDest(i);
+ for (BasicBlock *Dest : I.getIndirectDests()) {
MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
Target->setIsInlineAsmBrIndirectTarget();
// If we introduce a type of asm goto statement that is permitted to use an
@@ -4759,7 +4758,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
SmallVector<uint64_t, 4> Offsets;
const Value *SrcV = I.getOperand(0);
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
- SrcV->getType(), ValueVTs, &Offsets, 0);
+ SrcV->getType(), ValueVTs, /*MemVTs=*/nullptr, &Offsets, 0);
assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
"expect a single EVT for swifterror");
@@ -4795,7 +4794,7 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
- ValueVTs, &Offsets, 0);
+ ValueVTs, /*MemVTs=*/nullptr, &Offsets, 0);
assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
"expect a single EVT for swifterror");
@@ -5313,18 +5312,26 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
DAG.setRoot(OutChain);
}
-/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
-/// node.
-void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
- unsigned Intrinsic) {
- // Ignore the callsite's attributes. A specific call site may be marked with
- // readnone, but the lowering code will expect the chain based on the
- // definition.
+/// Check if this intrinsic call depends on the chain (1st return value)
+/// and if it only *loads* memory.
+/// Ignore the callsite's attributes. A specific call site may be marked with
+/// readnone, but the lowering code will expect the chain based on the
+/// definition.
+std::pair<bool, bool>
+SelectionDAGBuilder::getTargetIntrinsicCallProperties(const CallBase &I) {
const Function *F = I.getCalledFunction();
bool HasChain = !F->doesNotAccessMemory();
bool OnlyLoad =
HasChain && F->onlyReadsMemory() && F->willReturn() && F->doesNotThrow();
+ return {HasChain, OnlyLoad};
+}
+
+SmallVector<SDValue, 8> SelectionDAGBuilder::getTargetIntrinsicOperands(
+ const CallBase &I, bool HasChain, bool OnlyLoad,
+ TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
// Build the operand list.
SmallVector<SDValue, 8> Ops;
if (HasChain) { // If this intrinsic has side-effects, chainify it.
@@ -5336,17 +5343,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
}
- // Info is set by getTgtMemIntrinsic
- TargetLowering::IntrinsicInfo Info;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
- DAG.getMachineFunction(),
- Intrinsic);
-
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
- if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
- Info.opc == ISD::INTRINSIC_W_CHAIN)
- Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
+ if (!TgtMemIntrinsicInfo || TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_VOID ||
+ TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_W_CHAIN)
+ Ops.push_back(DAG.getTargetConstant(I.getIntrinsicID(), getCurSDLoc(),
TLI.getPointerTy(DAG.getDataLayout())));
// Add all operands of the call to the operand list.
@@ -5369,13 +5369,85 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
}
+ if (std::optional<OperandBundleUse> Bundle =
+ I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ Value *Token = Bundle->Inputs[0].get();
+ SDValue ConvControlToken = getValue(Token);
+ assert(Ops.back().getValueType() != MVT::Glue &&
+ "Did not expect another glue node here.");
+ ConvControlToken =
+ DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken);
+ Ops.push_back(ConvControlToken);
+ }
+
+ return Ops;
+}
+
+SDVTList SelectionDAGBuilder::getTargetIntrinsicVTList(const CallBase &I,
+ bool HasChain) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
if (HasChain)
ValueVTs.push_back(MVT::Other);
- SDVTList VTs = DAG.getVTList(ValueVTs);
+ return DAG.getVTList(ValueVTs);
+}
+
+/// Get an INTRINSIC node for a target intrinsic which does not touch memory.
+SDValue SelectionDAGBuilder::getTargetNonMemIntrinsicNode(
+ const Type &IntrinsicVT, bool HasChain, ArrayRef<SDValue> Ops,
+ const SDVTList &VTs) {
+ if (!HasChain)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
+ if (!IntrinsicVT.isVoidTy())
+ return DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
+ return DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
+}
+
+/// Set root, convert return type if necessary and check alignment.
+SDValue SelectionDAGBuilder::handleTargetIntrinsicRet(const CallBase &I,
+ bool HasChain,
+ bool OnlyLoad,
+ SDValue Result) {
+ if (HasChain) {
+ SDValue Chain = Result.getValue(Result.getNode()->getNumValues() - 1);
+ if (OnlyLoad)
+ PendingLoads.push_back(Chain);
+ else
+ DAG.setRoot(Chain);
+ }
+
+ if (I.getType()->isVoidTy())
+ return Result;
+
+ if (MaybeAlign Alignment = I.getRetAlign(); InsertAssertAlign && Alignment) {
+ // Insert `assertalign` node if there's an alignment.
+ Result = DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
+ } else if (!isa<VectorType>(I.getType())) {
+ Result = lowerRangeToAssertZExt(DAG, I, Result);
+ }
+
+ return Result;
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
+ unsigned Intrinsic) {
+ auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I);
+
+ // Info is set by getTgtMemIntrinsic
+ TargetLowering::IntrinsicInfo Info;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool IsTgtMemIntrinsic =
+ TLI.getTgtMemIntrinsic(Info, I, DAG.getMachineFunction(), Intrinsic);
+
+ SmallVector<SDValue, 8> Ops = getTargetIntrinsicOperands(
+ I, HasChain, OnlyLoad, IsTgtMemIntrinsic ? &Info : nullptr);
+ SDVTList VTs = getTargetIntrinsicVTList(I, HasChain);
// Propagate fast-math-flags from IR to node(s).
SDNodeFlags Flags;
@@ -5386,19 +5458,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Create the node.
SDValue Result;
- if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
- auto *Token = Bundle->Inputs[0].get();
- SDValue ConvControlToken = getValue(Token);
- assert(Ops.back().getValueType() != MVT::Glue &&
- "Did not expected another glue node here.");
- ConvControlToken =
- DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken);
- Ops.push_back(ConvControlToken);
- }
-
// In some cases, custom collection of operands from CallInst I may be needed.
TLI.CollectTargetIntrinsicOperands(I, Ops, DAG);
- if (IsTgtIntrinsic) {
+ if (IsTgtMemIntrinsic) {
// This is target intrinsic that touches memory
//
// TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
@@ -5418,34 +5480,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
Info.ssid, Info.order, Info.failureOrder);
Result =
DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, MemVT, MMO);
- } else if (!HasChain) {
- Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
- } else if (!I.getType()->isVoidTy()) {
- Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
} else {
- Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
- }
-
- if (HasChain) {
- SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
- if (OnlyLoad)
- PendingLoads.push_back(Chain);
- else
- DAG.setRoot(Chain);
+ Result = getTargetNonMemIntrinsicNode(*I.getType(), HasChain, Ops, VTs);
}
- if (!I.getType()->isVoidTy()) {
- if (!isa<VectorType>(I.getType()))
- Result = lowerRangeToAssertZExt(DAG, I, Result);
-
- MaybeAlign Alignment = I.getRetAlign();
-
- // Insert `assertalign` node if there's an alignment.
- if (InsertAssertAlign && Alignment) {
- Result =
- DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
- }
- }
+ Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result);
setValue(&I, Result);
}
@@ -7772,6 +7811,17 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
}
+ case Intrinsic::reloc_none: {
+ Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
+ StringRef SymbolName = cast<MDString>(MD)->getString();
+ SDValue Ops[2] = {
+ getRoot(),
+ DAG.getTargetExternalSymbol(
+ SymbolName.data(), TLI.getProgramPointerTy(DAG.getDataLayout()))};
+ DAG.setRoot(DAG.getNode(ISD::RELOC_NONE, sdl, MVT::Other, Ops));
+ return;
+ }
+
case Intrinsic::eh_exceptionpointer:
case Intrinsic::eh_exceptioncode: {
// Get the exception pointer vreg, copy from it, and resize it to fit.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 47e19f7..ed63bee 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -727,6 +727,17 @@ private:
MCSymbol *&BeginLabel);
SDValue lowerEndEH(SDValue Chain, const InvokeInst *II,
const BasicBlock *EHPadBB, MCSymbol *BeginLabel);
+
+ std::pair<bool, bool> getTargetIntrinsicCallProperties(const CallBase &I);
+ SmallVector<SDValue, 8> getTargetIntrinsicOperands(
+ const CallBase &I, bool HasChain, bool OnlyLoad,
+ TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo = nullptr);
+ SDVTList getTargetIntrinsicVTList(const CallBase &I, bool HasChain);
+ SDValue getTargetNonMemIntrinsicNode(const Type &IntrinsicVT, bool HasChain,
+ ArrayRef<SDValue> Ops,
+ const SDVTList &VTs);
+ SDValue handleTargetIntrinsicRet(const CallBase &I, bool HasChain,
+ bool OnlyLoad, SDValue Result);
};
/// This struct represents the registers (physical or virtual)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 77377d3..d3e1628 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -472,6 +472,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::LIFETIME_END: return "lifetime.end";
case ISD::FAKE_USE:
return "fake_use";
+ case ISD::RELOC_NONE:
+ return "reloc_none";
case ISD::PSEUDO_PROBE:
return "pseudoprobe";
case ISD::GC_TRANSITION_START: return "gc_transition.start";
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 6c11c5b..8bc5d2f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2550,6 +2550,11 @@ void SelectionDAGISel::Select_FAKE_USE(SDNode *N) {
N->getOperand(1), N->getOperand(0));
}
+void SelectionDAGISel::Select_RELOC_NONE(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::RELOC_NONE, N->getValueType(0),
+ N->getOperand(1), N->getOperand(0));
+}
+
void SelectionDAGISel::Select_FREEZE(SDNode *N) {
// TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now.
// If FREEZE instruction is added later, the code below must be changed as
@@ -3325,6 +3330,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::FAKE_USE:
Select_FAKE_USE(NodeToMatch);
return;
+ case ISD::RELOC_NONE:
+ Select_RELOC_NONE(NodeToMatch);
+ return;
case ISD::FREEZE:
Select_FREEZE(NodeToMatch);
return;
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 414e414..b99e1c7 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1665,6 +1665,17 @@ void TwoAddressInstructionImpl::processTiedPairs(MachineInstr *MI,
// by SubRegB is compatible with RegA with no subregister. So regardless of
// whether the dest oper writes a subreg, the source oper should not.
MO.setSubReg(0);
+
+ // Update uses of RegB to uses of RegA inside the bundle.
+ if (MI->isBundle()) {
+ for (MachineOperand &MO : mi_bundle_ops(*MI)) {
+ if (MO.isReg() && MO.getReg() == RegB) {
+ assert(MO.getSubReg() == 0 && SubRegB == 0 &&
+ "tied subregister uses in bundled instructions not supported");
+ MO.setReg(RegA);
+ }
+ }
+ }
}
if (AllUsesCopied) {
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index fafc325..a98e925 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -962,16 +962,29 @@ DIType *DIDerivedType::getClassType() const {
assert(getTag() == dwarf::DW_TAG_ptr_to_member_type);
return cast_or_null<DIType>(getExtraData());
}
+
+// Helper function to extract ConstantAsMetadata from ExtraData,
+// handling extra data MDTuple unwrapping if needed.
+static ConstantAsMetadata *extractConstantMetadata(Metadata *ExtraData) {
+ Metadata *ED = ExtraData;
+ if (auto *Tuple = dyn_cast_or_null<MDTuple>(ED)) {
+ if (Tuple->getNumOperands() != 1)
+ return nullptr;
+ ED = Tuple->getOperand(0);
+ }
+ return cast_or_null<ConstantAsMetadata>(ED);
+}
+
uint32_t DIDerivedType::getVBPtrOffset() const {
assert(getTag() == dwarf::DW_TAG_inheritance);
- if (auto *CM = cast_or_null<ConstantAsMetadata>(getExtraData()))
+ if (auto *CM = extractConstantMetadata(getExtraData()))
if (auto *CI = dyn_cast_or_null<ConstantInt>(CM->getValue()))
return static_cast<uint32_t>(CI->getZExtValue());
return 0;
}
Constant *DIDerivedType::getStorageOffsetInBits() const {
assert(getTag() == dwarf::DW_TAG_member && isBitField());
- if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData()))
+ if (auto *C = extractConstantMetadata(getExtraData()))
return C->getValue();
return nullptr;
}
@@ -980,13 +993,13 @@ Constant *DIDerivedType::getConstant() const {
assert((getTag() == dwarf::DW_TAG_member ||
getTag() == dwarf::DW_TAG_variable) &&
isStaticMember());
- if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData()))
+ if (auto *C = extractConstantMetadata(getExtraData()))
return C->getValue();
return nullptr;
}
Constant *DIDerivedType::getDiscriminantValue() const {
assert(getTag() == dwarf::DW_TAG_member && !isStaticMember());
- if (auto *C = cast_or_null<ConstantAsMetadata>(getExtraData()))
+ if (auto *C = extractConstantMetadata(getExtraData()))
return C->getValue();
return nullptr;
}
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 24f90bf..f1e473a 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6013,6 +6013,12 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Check(cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue() < 2,
"cache type argument to llvm.prefetch must be 0-1", Call);
break;
+ case Intrinsic::reloc_none: {
+ Check(isa<MDString>(
+ cast<MetadataAsValue>(Call.getArgOperand(0))->getMetadata()),
+ "llvm.reloc.none argument must be a metadata string", &Call);
+ break;
+ }
case Intrinsic::stackprotector:
Check(isa<AllocaInst>(Call.getArgOperand(1)->stripPointerCasts()),
"llvm.stackprotector parameter #2 must resolve to an alloca.", Call);
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 0208735..5498787 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1690,7 +1690,7 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
IndexedInstrProf::ProfVersion::CurrentVersion)
return make_error<InstrProfError>(instrprof_error::unsupported_version);
- static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version12,
+ static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version13,
"Please update the reader as needed when a new field is added "
"or when indexed profile version gets bumped.");
@@ -1723,10 +1723,11 @@ size_t Header::size() const {
// of the header, and byte offset of existing fields shouldn't change when
// indexed profile version gets incremented.
static_assert(
- IndexedInstrProf::ProfVersion::CurrentVersion == Version12,
+ IndexedInstrProf::ProfVersion::CurrentVersion == Version13,
"Please update the size computation below if a new field has "
"been added to the header; for a version bump without new "
"fields, add a case statement to fall through to the latest version.");
+ case 13ull:
case 12ull:
return 72;
case 11ull:
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index a347351..0f15ca8 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -542,7 +542,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
// The WritePrevVersion handling will either need to be removed or updated
// if the version is advanced beyond 12.
static_assert(IndexedInstrProf::ProfVersion::CurrentVersion ==
- IndexedInstrProf::ProfVersion::Version12);
+ IndexedInstrProf::ProfVersion::Version13);
if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
Header.Version |= VARIANT_MASK_IR_PROF;
if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d08f9b9..40e6400 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -50,6 +50,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
@@ -104,7 +105,6 @@
#include <vector>
using namespace llvm;
-using namespace llvm::PatternMatch;
#define DEBUG_TYPE "aarch64-lower"
@@ -1174,6 +1174,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::VECTOR_DEINTERLEAVE);
+ setTargetDAGCombine(ISD::CTPOP);
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
@@ -11330,9 +11331,10 @@ SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
break;
}
+ // Note: This lowering only overrides NEON for v1i64 and v2i64, where we
+ // prefer using SVE if available.
if (VT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(
- VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
+ useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) {
switch (Opcode) {
default:
llvm_unreachable("Wrong instruction");
@@ -17554,6 +17556,7 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
// udot instruction.
if (SrcWidth * 4 <= DstWidth) {
if (all_of(I->users(), [&](auto *U) {
+ using namespace llvm::PatternMatch;
auto *SingleUser = cast<Instruction>(&*U);
if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))))
return true;
@@ -17825,6 +17828,7 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
// into shift / and masks. For the moment we do this just for uitofp (not
// zext) to avoid issues with widening instructions.
if (Shuffles.size() == 4 && all_of(Shuffles, [](ShuffleVectorInst *SI) {
+ using namespace llvm::PatternMatch;
return SI->hasOneUse() && match(SI->user_back(), m_UIToFP(m_Value())) &&
SI->getType()->getScalarSizeInBits() * 4 ==
SI->user_back()->getType()->getScalarSizeInBits();
@@ -27841,6 +27845,35 @@ static SDValue performRNDRCombine(SDNode *N, SelectionDAG &DAG) {
{A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL);
}
+static SDValue performCTPOPCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ using namespace llvm::SDPatternMatch;
+ if (!DCI.isBeforeLegalize())
+ return SDValue();
+
+ // ctpop(zext(bitcast(vector_mask))) -> neg(signed_reduce_add(vector_mask))
+ SDValue Mask;
+ if (!sd_match(N->getOperand(0), m_ZExt(m_BitCast(m_Value(Mask)))))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ EVT MaskVT = Mask.getValueType();
+
+ if (VT.isVector() || !MaskVT.isFixedLengthVector() ||
+ MaskVT.getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ EVT ReduceInVT =
+ EVT::getVectorVT(*DAG.getContext(), VT, MaskVT.getVectorElementCount());
+
+ SDLoc DL(N);
+ // Sign extend to best fit ZeroOrNegativeOneBooleanContent.
+ SDValue ExtMask = DAG.getNode(ISD::SIGN_EXTEND, DL, ReduceInVT, Mask);
+ SDValue NegPopCount = DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, ExtMask);
+ return DAG.getNegative(NegPopCount, DL, VT);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -28186,6 +28219,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performScalarToVectorCombine(N, DCI, DAG);
case ISD::SHL:
return performSHLCombine(N, DCI, DAG);
+ case ISD::CTPOP:
+ return performCTPOPCombine(N, DCI, DAG);
}
return SDValue();
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 1b559a6..f5081a9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1248,7 +1248,8 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
SmallVector<EVT, 16> ValueVTs;
SmallVector<uint64_t, 16> Offsets;
- ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset);
+ ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, /*MemVTs=*/nullptr,
+ &Offsets, ArgOffset);
for (unsigned Value = 0, NumValues = ValueVTs.size();
Value != NumValues; ++Value) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9460145..6ce18ea 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3917,6 +3917,9 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
if (isLDSDMA(MIa) || isLDSDMA(MIb))
return false;
+ if (MIa.isBundle() || MIb.isBundle())
+ return false;
+
// TODO: Should we check the address space from the MachineMemOperand? That
// would allow us to distinguish objects we know don't alias based on the
// underlying address space, even if it was lowered to a different one,
diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
index 8c7bc2f..81303fa 100644
--- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
+++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
@@ -97,7 +97,6 @@
#define DEBUG_TYPE "bpf-abstract-member-access"
namespace llvm {
-constexpr StringRef BPFCoreSharedInfo::AmaAttr;
uint32_t BPFCoreSharedInfo::SeqNum;
Instruction *BPFCoreSharedInfo::insertPassThrough(Module *M, BasicBlock *BB,
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 6e5520c..3c61216 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -803,26 +803,6 @@ SDValue BPFTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
return getAddr(N, DAG);
}
-const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch ((BPFISD::NodeType)Opcode) {
- case BPFISD::FIRST_NUMBER:
- break;
- case BPFISD::RET_GLUE:
- return "BPFISD::RET_GLUE";
- case BPFISD::CALL:
- return "BPFISD::CALL";
- case BPFISD::SELECT_CC:
- return "BPFISD::SELECT_CC";
- case BPFISD::BR_CC:
- return "BPFISD::BR_CC";
- case BPFISD::Wrapper:
- return "BPFISD::Wrapper";
- case BPFISD::MEMCPY:
- return "BPFISD::MEMCPY";
- }
- return nullptr;
-}
-
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index 5243d49..3d6e7c7 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -20,17 +20,6 @@
namespace llvm {
class BPFSubtarget;
-namespace BPFISD {
-enum NodeType : unsigned {
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
- RET_GLUE,
- CALL,
- SELECT_CC,
- BR_CC,
- Wrapper,
- MEMCPY
-};
-}
class BPFTargetLowering : public TargetLowering {
public:
@@ -39,9 +28,6 @@ public:
// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- // This method returns the name of a target specific DAG node.
- const char *getTargetNodeName(unsigned Opcode) const override;
-
// This method decides whether folding a constant offset
// with the given GlobalAddress is legal.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 51c32b2..bdacf9c 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -41,14 +41,12 @@ def BPFcallseq_start: SDNode<"ISD::CALLSEQ_START", SDT_BPFCallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
def BPFcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_BPFCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def BPFbrcc : SDNode<"BPFISD::BR_CC", SDT_BPFBrCC,
- [SDNPHasChain, SDNPOutGlue, SDNPInGlue]>;
+def BPFbrcc : SDNode<"BPFISD::BR_CC", SDT_BPFBrCC, [SDNPHasChain]>;
def BPFselectcc : SDNode<"BPFISD::SELECT_CC", SDT_BPFSelectCC>;
def BPFWrapper : SDNode<"BPFISD::Wrapper", SDT_BPFWrapper>;
def BPFmemcpy : SDNode<"BPFISD::MEMCPY", SDT_BPFMEMCPY,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
- SDNPMayStore, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
def BPFIsLittleEndian : Predicate<"Subtarget->isLittleEndian()">;
def BPFIsBigEndian : Predicate<"!Subtarget->isLittleEndian()">;
def BPFHasALU32 : Predicate<"Subtarget->getHasAlu32()">;
diff --git a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
index d3b0c02..6a11ea6 100644
--- a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
+++ b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
@@ -27,10 +27,6 @@
#define DEBUG_TYPE "bpf-preserve-di-type"
-namespace llvm {
-constexpr StringRef BPFCoreSharedInfo::TypeIdAttr;
-} // namespace llvm
-
using namespace llvm;
namespace {
diff --git a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp
index 3e29e6c..0e6d35d 100644
--- a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp
@@ -10,12 +10,20 @@
//
//===----------------------------------------------------------------------===//
+#include "BPFSelectionDAGInfo.h"
#include "BPFTargetMachine.h"
#include "llvm/CodeGen/SelectionDAG.h"
+
+#define GET_SDNODE_DESC
+#include "BPFGenSDNodeInfo.inc"
+
using namespace llvm;
#define DEBUG_TYPE "bpf-selectiondag-info"
+BPFSelectionDAGInfo::BPFSelectionDAGInfo()
+ : SelectionDAGGenTargetInfo(BPFGenSDNodeInfo) {}
+
SDValue BPFSelectionDAGInfo::EmitTargetCodeForMemcpy(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
@@ -31,11 +39,7 @@ SDValue BPFSelectionDAGInfo::EmitTargetCodeForMemcpy(
if (StoresNumEstimate > getCommonMaxStoresPerMemFunc())
return SDValue();
- SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
-
- Dst = DAG.getNode(BPFISD::MEMCPY, dl, VTs, Chain, Dst, Src,
- DAG.getConstant(CopyLen, dl, MVT::i64),
- DAG.getConstant(Alignment.value(), dl, MVT::i64));
-
- return Dst.getValue(0);
+ return DAG.getNode(BPFISD::MEMCPY, dl, MVT::Other, Chain, Dst, Src,
+ DAG.getConstant(CopyLen, dl, MVT::i64),
+ DAG.getConstant(Alignment.value(), dl, MVT::i64));
}
diff --git a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h
index 79f05e5..7345d2d 100644
--- a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h
+++ b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h
@@ -15,10 +15,15 @@
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#define GET_SDNODE_ENUM
+#include "BPFGenSDNodeInfo.inc"
+
namespace llvm {
-class BPFSelectionDAGInfo : public SelectionDAGTargetInfo {
+class BPFSelectionDAGInfo : public SelectionDAGGenTargetInfo {
public:
+ BPFSelectionDAGInfo();
+
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, Align Alignment,
@@ -27,9 +32,8 @@ public:
MachinePointerInfo SrcPtrInfo) const override;
unsigned getCommonMaxStoresPerMemFunc() const { return 128; }
-
};
-}
+} // namespace llvm
#endif
diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt
index 3678f13..fa539a0 100644
--- a/llvm/lib/Target/BPF/CMakeLists.txt
+++ b/llvm/lib/Target/BPF/CMakeLists.txt
@@ -10,6 +10,7 @@ tablegen(LLVM BPFGenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM BPFGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM BPFGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM BPFGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM BPFGenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM BPFGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM BPFGenGlobalISel.inc -gen-global-isel)
tablegen(LLVM BPFGenRegisterBank.inc -gen-register-bank)
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index d507d71..9f1616f 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -304,40 +304,76 @@ bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
GEPOperator *GOp = cast<GEPOperator>(&GEPI);
Value *PtrOperand = GOp->getPointerOperand();
Type *NewGEPType = GOp->getSourceElementType();
- bool NeedsTransform = false;
// Unwrap GEP ConstantExprs to find the base operand and element type
- while (auto *CE = dyn_cast<ConstantExpr>(PtrOperand)) {
- if (auto *GEPCE = dyn_cast<GEPOperator>(CE)) {
- GOp = GEPCE;
- PtrOperand = GEPCE->getPointerOperand();
- NewGEPType = GEPCE->getSourceElementType();
- } else
- break;
+ while (auto *GEPCE = dyn_cast_or_null<GEPOperator>(
+ dyn_cast<ConstantExpr>(PtrOperand))) {
+ GOp = GEPCE;
+ PtrOperand = GEPCE->getPointerOperand();
+ NewGEPType = GEPCE->getSourceElementType();
}
+ Type *const OrigGEPType = NewGEPType;
+ Value *const OrigOperand = PtrOperand;
+
if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) {
NewGEPType = NewGlobal->getValueType();
PtrOperand = NewGlobal;
- NeedsTransform = true;
} else if (AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrOperand)) {
Type *AllocatedType = Alloca->getAllocatedType();
if (isa<ArrayType>(AllocatedType) &&
- AllocatedType != GOp->getResultElementType()) {
+ AllocatedType != GOp->getResultElementType())
NewGEPType = AllocatedType;
- NeedsTransform = true;
+ } else
+ return false; // Only GEPs into an alloca or global variable are considered
+
+ // Defer changing i8 GEP types until dxil-flatten-arrays
+ if (OrigGEPType->isIntegerTy(8))
+ NewGEPType = OrigGEPType;
+
+ // If the original type is a "sub-type" of the new type, then ensure the gep
+ // correctly zero-indexes the extra dimensions to keep the offset calculation
+ // correct.
+ // Eg:
+ // i32, [4 x i32] and [8 x [4 x i32]] are sub-types of [8 x [4 x i32]], etc.
+ //
+ // So then:
+ // gep [4 x i32] %idx
+ // -> gep [8 x [4 x i32]], i32 0, i32 %idx
+ // gep i32 %idx
+ // -> gep [8 x [4 x i32]], i32 0, i32 0, i32 %idx
+ uint32_t MissingDims = 0;
+ Type *SubType = NewGEPType;
+
+ // The new type will be in its array version; so match accordingly.
+ Type *const GEPArrType = equivalentArrayTypeFromVector(OrigGEPType);
+
+ while (SubType != GEPArrType) {
+ MissingDims++;
+
+ ArrayType *ArrType = dyn_cast<ArrayType>(SubType);
+ if (!ArrType) {
+ assert(SubType == GEPArrType &&
+ "GEP uses an DXIL invalid sub-type of alloca/global variable");
+ break;
}
+
+ SubType = ArrType->getElementType();
}
+ bool NeedsTransform = OrigOperand != PtrOperand ||
+ OrigGEPType != NewGEPType || MissingDims != 0;
+
if (!NeedsTransform)
return false;
- // Keep scalar GEPs scalar; dxil-flatten-arrays will do flattening later
- if (!isa<ArrayType>(GOp->getSourceElementType()))
- NewGEPType = GOp->getSourceElementType();
-
IRBuilder<> Builder(&GEPI);
- SmallVector<Value *, MaxVecSize> Indices(GOp->indices());
+ SmallVector<Value *, MaxVecSize> Indices;
+
+ for (uint32_t I = 0; I < MissingDims; I++)
+ Indices.push_back(Builder.getInt32(0));
+ llvm::append_range(Indices, GOp->indices());
+
Value *NewGEP = Builder.CreateGEP(NewGEPType, PtrOperand, Indices,
GOp->getName(), GOp->getNoWrapFlags());
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index ebb7c26..e0d2dbd 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -197,6 +197,7 @@ static Value *expand16BitIsNormal(CallInst *Orig) {
static bool isIntrinsicExpansion(Function &F) {
switch (F.getIntrinsicID()) {
+ case Intrinsic::assume:
case Intrinsic::abs:
case Intrinsic::atan2:
case Intrinsic::exp:
@@ -988,6 +989,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::abs:
Result = expandAbs(Orig);
break;
+ case Intrinsic::assume:
+ Orig->eraseFromParent();
+ return true;
case Intrinsic::atan2:
Result = expandAtan2Intrinsic(Orig);
break;
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 8720460..e46a393 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -904,8 +904,6 @@ public:
case Intrinsic::dx_resource_casthandle:
// NOTE: llvm.dbg.value is supported as is in DXIL.
case Intrinsic::dbg_value:
- // NOTE: llvm.assume is supported as is in DXIL.
- case Intrinsic::assume:
case Intrinsic::not_intrinsic:
if (F.use_empty())
F.eraseFromParent();
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 47726d6..55bafde 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -4753,6 +4753,19 @@ bool HexagonInstrInfo::getBundleNoShuf(const MachineInstr &MIB) const {
return (Operand.isImm() && (Operand.getImm() & memShufDisabledMask) != 0);
}
+bool HexagonInstrInfo::isQFPMul(const MachineInstr *MI) const {
+ return (MI->getOpcode() == Hexagon::V6_vmpy_qf16_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_sf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf16 ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_qf16 ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32);
+}
+
// Addressing mode relations.
short HexagonInstrInfo::changeAddrMode_abs_io(short Opc) const {
return Opc >= 0 ? Hexagon::changeAddrMode_abs_io(Opc) : Opc;
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index c17e527..48adf82 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -532,6 +532,7 @@ public:
}
MCInst getNop() const override;
+ bool isQFPMul(const MachineInstr *MF) const;
};
/// \brief Create RegSubRegPair from a register MachineOperand
diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
index f29a739..8801f69 100644
--- a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
@@ -58,7 +58,7 @@
// are PHI inst.
//
//===----------------------------------------------------------------------===//
-#include <unordered_set>
+
#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
#include "Hexagon.h"
@@ -86,6 +86,9 @@ using namespace llvm;
cl::opt<bool>
DisableQFOptimizer("disable-qfp-opt", cl::init(false),
cl::desc("Disable optimization of Qfloat operations."));
+cl::opt<bool> DisableQFOptForMul(
+ "disable-qfp-opt-mul", cl::init(true),
+ cl::desc("Disable optimization of Qfloat operations for multiply."));
namespace {
const std::map<unsigned short, unsigned short> QFPInstMap{
@@ -101,11 +104,21 @@ const std::map<unsigned short, unsigned short> QFPInstMap{
{Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
{Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
{Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
- {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
+ {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32},
+ {Hexagon::V6_vilog2_sf, Hexagon::V6_vilog2_qf32},
+ {Hexagon::V6_vilog2_hf, Hexagon::V6_vilog2_qf16},
+ {Hexagon::V6_vabs_qf32_sf, Hexagon::V6_vabs_qf32_qf32},
+ {Hexagon::V6_vabs_qf16_hf, Hexagon::V6_vabs_qf16_qf16},
+ {Hexagon::V6_vneg_qf32_sf, Hexagon::V6_vneg_qf32_qf32},
+ {Hexagon::V6_vneg_qf16_hf, Hexagon::V6_vneg_qf16_qf16}};
} // namespace
-namespace {
+namespace llvm {
+FunctionPass *createHexagonQFPOptimizer();
+void initializeHexagonQFPOptimizerPass(PassRegistry &);
+} // namespace llvm
+namespace {
struct HexagonQFPOptimizer : public MachineFunctionPass {
public:
static char ID;
@@ -116,6 +129,10 @@ public:
bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool optimizeQfpTwoOp(MachineInstr *MI, MachineBasicBlock *MBB);
+
+ bool optimizeQfpOneOp(MachineInstr *MI, MachineBasicBlock *MBB);
+
StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -142,19 +159,69 @@ FunctionPass *llvm::createHexagonQFPOptimizer() {
bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
MachineBasicBlock *MBB) {
- // Early exit:
- // - if instruction is invalid or has too few operands (QFP ops need 2 sources
- // + 1 dest),
- // - or does not have a transformation mapping.
- if (MI->getNumOperands() < 3)
+ if (MI->getNumOperands() == 2)
+ return optimizeQfpOneOp(MI, MBB);
+ else if (MI->getNumOperands() == 3)
+ return optimizeQfpTwoOp(MI, MBB);
+ else
return false;
+}
+
+bool HexagonQFPOptimizer::optimizeQfpOneOp(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+
+ unsigned Op0F = 0;
auto It = QFPInstMap.find(MI->getOpcode());
if (It == QFPInstMap.end())
return false;
+
unsigned short InstTy = It->second;
+ // Get the reachind defs of MI
+ MachineInstr *DefMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ MachineOperand &Res = MI->getOperand(0);
+ if (!Res.isReg())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI->dump());
+ MachineInstr *ReachDefDef = nullptr;
+
+ // Get the reaching def of the reaching def to check for W reg def
+ if (DefMI->getNumOperands() > 1 && DefMI->getOperand(1).isReg() &&
+ DefMI->getOperand(1).getReg().isVirtual())
+ ReachDefDef = MRI->getVRegDef(DefMI->getOperand(1).getReg());
+ unsigned ReachDefOp = DefMI->getOpcode();
+ MachineInstrBuilder MIB;
+
+ // Check if the reaching def is a conversion
+ if (ReachDefOp == Hexagon::V6_vconv_sf_qf32 ||
+ ReachDefOp == Hexagon::V6_vconv_hf_qf16) {
+
+ // Return if the reaching def of reaching def is W type
+ if (ReachDefDef && MRI->getRegClass(ReachDefDef->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass)
+ return false;
+
+ // Analyze the use operands of the conversion to get their KILL status
+ MachineOperand &SrcOp = DefMI->getOperand(1);
+ Op0F = getKillRegState(SrcOp.isKill());
+ SrcOp.setIsKill(false);
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(SrcOp.getReg(), Op0F, SrcOp.getSubReg());
+ LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+ return true;
+ }
+ return false;
+}
+
+bool HexagonQFPOptimizer::optimizeQfpTwoOp(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
unsigned Op0F = 0;
unsigned Op1F = 0;
+ auto It = QFPInstMap.find(MI->getOpcode());
+ if (It == QFPInstMap.end())
+ return false;
+ unsigned short InstTy = It->second;
// Get the reaching defs of MI, DefMI1 and DefMI2
MachineInstr *DefMI1 = nullptr;
MachineInstr *DefMI2 = nullptr;
@@ -167,6 +234,9 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
return false;
MachineOperand &Res = MI->getOperand(0);
+ if (!Res.isReg())
+ return false;
+
MachineInstr *Inst1 = nullptr;
MachineInstr *Inst2 = nullptr;
LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump();
@@ -185,7 +255,8 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
unsigned Def2OP = DefMI2->getOpcode();
MachineInstrBuilder MIB;
- // Case 1: Both reaching defs of MI are qf to sf/hf conversions
+
+ // Check if the both the reaching defs of MI are qf to sf/hf conversions
if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
@@ -226,7 +297,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
return true;
- // Case 2: Left operand is conversion to sf/hf
+ // Check if left operand's reaching def is a conversion to sf/hf
} else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
Def2OP != Hexagon::V6_vconv_sf_qf32) ||
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
@@ -250,7 +321,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
return true;
- // Case 2: Left operand is conversion to sf/hf
+ // Check if right operand's reaching def is a conversion to sf/hf
} else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
(Def1OP != Hexagon::V6_vconv_hf_qf16 &&
@@ -258,13 +329,6 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
!DefMI1->isPHI() &&
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
// The second operand of original instruction is converted.
- // In "mix" instructions, "qf" operand is always the first operand.
-
- // Caveat: vsub is not commutative w.r.t operands.
- if (InstTy == Hexagon::V6_vsub_qf16_mix ||
- InstTy == Hexagon::V6_vsub_qf32_mix)
- return false;
-
if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
&Hexagon::HvxWRRegClass)
return false;
@@ -275,10 +339,26 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
Op1F = getKillRegState(Src2.isKill());
Src2.setIsKill(false);
Op0F = getKillRegState(Src1.isKill());
- MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
- .addReg(Src2.getReg(), Op1F,
- Src2.getSubReg()) // Notice the operands are flipped.
- .addReg(Src1.getReg(), Op0F, Src1.getSubReg());
+ if (InstTy == Hexagon::V6_vsub_qf16_mix ||
+ InstTy == Hexagon::V6_vsub_qf32_mix) {
+ if (!HST->useHVXV81Ops())
+ // vsub_(hf|sf)_mix insts are only avlbl on hvx81+
+ return false;
+ // vsub is not commutative w.r.t. operands -> treat it as a special case
+ // to choose the correct mix instruction.
+ if (Def2OP == Hexagon::V6_vconv_sf_qf32)
+ InstTy = Hexagon::V6_vsub_sf_mix;
+ else if (Def2OP == Hexagon::V6_vconv_hf_qf16)
+ InstTy = Hexagon::V6_vsub_hf_mix;
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg())
+ .addReg(Src2.getReg(), Op1F, Src2.getSubReg());
+ } else {
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src2.getReg(), Op1F,
+ Src2.getSubReg()) // Notice the operands are flipped.
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg());
+ }
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
return true;
}
@@ -309,15 +389,18 @@ bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) {
while (MII != MBBI->instr_end()) {
MachineInstr *MI = &*MII;
++MII; // As MI might be removed.
-
- if (QFPInstMap.count(MI->getOpcode()) &&
- MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 &&
- MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) {
- LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
- if (optimizeQfp(MI, MBB)) {
- MI->eraseFromParent();
- LLVM_DEBUG(dbgs() << "\t....Removing....");
- Changed = true;
+ if (QFPInstMap.count(MI->getOpcode())) {
+ auto OpC = MI->getOpcode();
+ if (DisableQFOptForMul && HII->isQFPMul(MI))
+ continue;
+ if (OpC != Hexagon::V6_vconv_sf_qf32 &&
+ OpC != Hexagon::V6_vconv_hf_qf16) {
+ LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
+ if (optimizeQfp(MI, MBB)) {
+ MI->eraseFromParent();
+ LLVM_DEBUG(dbgs() << "\t....Removing....");
+ Changed = true;
+ }
}
}
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 2f1a7ad..a3deb36 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -305,7 +305,8 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
uint64_t StartingOffset = 0) {
SmallVector<EVT, 16> TempVTs;
SmallVector<uint64_t, 16> TempOffsets;
- ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
+ ComputeValueVTs(TLI, DL, Ty, TempVTs, /*MemVTs=*/nullptr, &TempOffsets,
+ StartingOffset);
for (const auto [VT, Off] : zip(TempVTs, TempOffsets)) {
MVT RegisterVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 780e124..122738c 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -2750,6 +2750,10 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
if (isSpecialLLVMGlobalArrayToSkip(GV) || isSpecialLLVMGlobalArrayForStaticInit(GV))
return;
+ // Ignore non-emitted data.
+ if (GV->getSection() == "llvm.metadata")
+ return;
+
// If the Global Variable has the toc-data attribute, it needs to be emitted
// when we emit the .toc section.
if (GV->hasAttribute("toc-data")) {
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index b37b740..f881c4c 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -789,6 +789,8 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
// Unroll the probe loop depending on the number of iterations.
if (Offset < ProbeSize * 5) {
+ uint64_t CFAAdjust = RealStackSize - Offset;
+
uint64_t CurrentOffset = 0;
while (CurrentOffset + ProbeSize <= Offset) {
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
@@ -802,7 +804,7 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
CurrentOffset += ProbeSize;
if (EmitCFI)
- CFIBuilder.buildDefCFAOffset(CurrentOffset);
+ CFIBuilder.buildDefCFAOffset(CurrentOffset + CFAAdjust);
}
uint64_t Residual = Offset - CurrentOffset;
@@ -810,7 +812,7 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
StackOffset::getFixed(-Residual), Flag, getStackAlign());
if (EmitCFI)
- CFIBuilder.buildDefCFAOffset(Offset);
+ CFIBuilder.buildDefCFAOffset(RealStackSize);
if (DynAllocation) {
// s[d|w] zero, 0(sp)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 995ae75..3b69eda 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -17867,6 +17867,7 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N,
SmallVector<SDNode *> Worklist;
SmallPtrSet<SDNode *, 8> Inserted;
+ SmallPtrSet<SDNode *, 8> ExtensionsToRemove;
Worklist.push_back(N);
Inserted.insert(N);
SmallVector<CombineResult> CombinesToApply;
@@ -17876,22 +17877,25 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N,
NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
- auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
- &Inserted](const NodeExtensionHelper &Op) {
- if (Op.needToPromoteOtherUsers()) {
- for (SDUse &Use : Op.OrigOperand->uses()) {
- SDNode *TheUser = Use.getUser();
- if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
- return false;
- // We only support the first 2 operands of FMA.
- if (Use.getOperandNo() >= 2)
- return false;
- if (Inserted.insert(TheUser).second)
- Worklist.push_back(TheUser);
- }
- }
- return true;
- };
+ auto AppendUsersIfNeeded =
+ [&Worklist, &Subtarget, &Inserted,
+ &ExtensionsToRemove](const NodeExtensionHelper &Op) {
+ if (Op.needToPromoteOtherUsers()) {
+ // Remember that we're supposed to remove this extension.
+ ExtensionsToRemove.insert(Op.OrigOperand.getNode());
+ for (SDUse &Use : Op.OrigOperand->uses()) {
+ SDNode *TheUser = Use.getUser();
+ if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
+ return false;
+ // We only support the first 2 operands of FMA.
+ if (Use.getOperandNo() >= 2)
+ return false;
+ if (Inserted.insert(TheUser).second)
+ Worklist.push_back(TheUser);
+ }
+ }
+ return true;
+ };
// Control the compile time by limiting the number of node we look at in
// total.
@@ -17912,6 +17916,15 @@ static SDValue combineOp_VLToVWOp_VL(SDNode *N,
std::optional<CombineResult> Res =
FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
if (Res) {
+ // If this strategy wouldn't remove an extension we're supposed to
+ // remove, reject it.
+ if (!Res->LHSExt.has_value() &&
+ ExtensionsToRemove.contains(LHS.OrigOperand.getNode()))
+ continue;
+ if (!Res->RHSExt.has_value() &&
+ ExtensionsToRemove.contains(RHS.OrigOperand.getNode()))
+ continue;
+
Matched = true;
CombinesToApply.push_back(*Res);
// All the inputs that are extended need to be folded, otherwise
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 636e31c..bf9de0a 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1583,7 +1583,10 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
if (!TII->isAddImmediate(*DeadMI, Reg))
continue;
LIS->RemoveMachineInstrFromMaps(*DeadMI);
+ Register AddReg = DeadMI->getOperand(1).getReg();
DeadMI->eraseFromParent();
+ if (AddReg.isVirtual())
+ LIS->shrinkToUses(&LIS->getInterval(AddReg));
}
}
}
@@ -1869,11 +1872,15 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
// Loop over the dead AVL values, and delete them now. This has
// to be outside the above loop to avoid invalidating iterators.
for (auto *MI : ToDelete) {
+ assert(MI->getOpcode() == RISCV::ADDI);
+ Register AddReg = MI->getOperand(1).getReg();
if (LIS) {
LIS->removeInterval(MI->getOperand(0).getReg());
LIS->RemoveMachineInstrFromMaps(*MI);
}
MI->eraseFromParent();
+ if (LIS && AddReg.isVirtual())
+ LIS->shrinkToUses(&LIS->getInterval(AddReg));
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
index 24ebbc3..41071b2 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
@@ -654,8 +654,17 @@ foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
+ defvar VIRedLat = GetLMULValue<[5, 5, 5, 7, 11, 19, 35], mx>.c;
+ defvar VIRedOcc = GetLMULValue<[1, 1, 2, 2, 4, 10, 35], mx>.c;
+ let Latency = VIRedLat, ReleaseAtCycles = [VIRedOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
+
+ // Pattern for vredsum: 5/5/5/7/11/19/35
+ // Pattern for vredand, vredor, vredxor: 4/4/4/6/10/18/34
+ // They are grouped together, so we use the worst-case vredsum latency.
+ // TODO: split vredand, vredor, vredxor into separate scheduling classe.
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
+ }
}
}
@@ -663,7 +672,27 @@ foreach mx = SchedMxListWRed in {
foreach sew = SchedSEWSet<mx, 0, 1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
+ defvar VIRedLat = GetLMULValue<[5, 5, 5, 7, 11, 19, 35], mx>.c;
+ defvar VIRedOcc = GetLMULValue<[1, 1, 2, 2, 4, 10, 35], mx>.c;
+ let Latency = VIRedLat, ReleaseAtCycles = [VIRedOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SMX60_VIEU], mx, sew, IsWorstCase>;
+ }
+ }
+}
+
+foreach mx = SchedMxListF in {
+ foreach sew = SchedSEWSet<mx, 1>.val in {
+ defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
+
+ // Latency for vfredmax.vs, vfredmin.vs: 12/12/15/21/33/57
+ // Latency for vfredusum.vs is slightly lower for e16/e32
+ // We use the worst-case
+ defvar VFRedLat = GetLMULValue<[12, 12, 12, 15, 21, 33, 57], mx>.c;
+ defvar VFRedOcc = GetLMULValue<[8, 8, 8, 8, 14, 20, 57], mx>.c;
+ let Latency = VFRedLat, ReleaseAtCycles = [VFRedOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
}
}
@@ -671,9 +700,20 @@ foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, 1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
+ // Compute latency based on SEW
+ defvar VFRedOV_FromLat = !cond(
+ !eq(sew, 16) : ConstValueUntilLMULThenDouble<"MF4", 12, mx>.c,
+ !eq(sew, 32) : ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c,
+ !eq(sew, 64) : ConstValueUntilLMULThenDouble<"M1", 12, mx>.c
+ );
+ defvar VFRedOV_FromOcc = !cond(
+ !eq(sew, 16) : GetLMULValue<[8, 8, 20, 24, 48, 96, 384], mx>.c,
+ !eq(sew, 32) : GetLMULValue<[8, 8, 8, 12, 24, 48, 192], mx>.c,
+ !eq(sew, 64) : GetLMULValue<[6, 6, 6, 6, 12, 24, 96], mx>.c
+ );
+ let Latency = VFRedOV_FromLat, ReleaseAtCycles = [VFRedOV_FromOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
}
}
@@ -681,8 +721,18 @@ foreach mx = SchedMxListFWRed in {
foreach sew = SchedSEWSet<mx, 1, 1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defvar VFRedOVLat = !cond(
+ !eq(sew, 16) : ConstValueUntilLMULThenDouble<"MF4", 16, mx>.c,
+ !eq(sew, 32) : ConstValueUntilLMULThenDouble<"MF2", 16, mx>.c,
+ );
+ defvar VFRedOVOcc = !cond(
+ !eq(sew, 16) : GetLMULValue<[11, 11, 27, 32, 64, 128, 512], mx>.c,
+ !eq(sew, 32) : GetLMULValue<[11, 11, 11, 16, 32, 64, 256], mx>.c,
+ );
+ let Latency = VFRedOVLat, ReleaseAtCycles = [VFRedOVOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
}
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 56a38bb..b2cbdb2 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -2390,6 +2390,15 @@ static bool generateBindlessImageINTELInst(const SPIRV::IncomingCall *Call,
return buildBindlessImageINTELInst(Call, Opcode, MIRBuilder, GR);
}
+static bool generateBlockingPipesInst(const SPIRV::IncomingCall *Call,
+ MachineIRBuilder &MIRBuilder,
+ SPIRVGlobalRegistry *GR) {
+ const SPIRV::DemangledBuiltin *Builtin = Call->Builtin;
+ unsigned Opcode =
+ SPIRV::lookupNativeBuiltin(Builtin->Name, Builtin->Set)->Opcode;
+ return buildOpFromWrapper(MIRBuilder, Opcode, Call, Register(0));
+}
+
static bool
generateTernaryBitwiseFunctionINTELInst(const SPIRV::IncomingCall *Call,
MachineIRBuilder &MIRBuilder,
@@ -3050,6 +3059,8 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall,
return generatePipeInst(Call.get(), MIRBuilder, GR);
case SPIRV::PredicatedLoadStore:
return generatePredicatedLoadStoreInst(Call.get(), MIRBuilder, GR);
+ case SPIRV::BlockingPipes:
+ return generateBlockingPipesInst(Call.get(), MIRBuilder, GR);
}
return false;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index c259cce..492a98e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -71,6 +71,7 @@ def TernaryBitwiseINTEL : BuiltinGroup;
def Block2DLoadStore : BuiltinGroup;
def Pipe : BuiltinGroup;
def PredicatedLoadStore : BuiltinGroup;
+def BlockingPipes : BuiltinGroup;
//===----------------------------------------------------------------------===//
// Class defining a demangled builtin record. The information in the record
@@ -1174,6 +1175,10 @@ defm : DemangledNativeBuiltin<"clock_read_sub_group", OpenCL_std, KernelClock, 0
defm : DemangledNativeBuiltin<"clock_read_hilo_device", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
defm : DemangledNativeBuiltin<"clock_read_hilo_work_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
defm : DemangledNativeBuiltin<"clock_read_hilo_sub_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
+
+//SPV_ALTERA_blocking_pipes
+defm : DemangledNativeBuiltin<"__spirv_WritePipeBlockingINTEL", OpenCL_std, BlockingPipes, 0, 0, OpWritePipeBlockingALTERA>;
+defm : DemangledNativeBuiltin<"__spirv_ReadPipeBlockingINTEL", OpenCL_std, BlockingPipes, 0, 0, OpReadPipeBlockingALTERA>;
defm : DemangledNativeBuiltin<"__spirv_ReadClockKHR", OpenCL_std, KernelClock, 1, 1, OpReadClockKHR>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index 43b2869..f681b0d 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -159,7 +159,9 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>>
{"SPV_KHR_maximal_reconvergence",
SPIRV::Extension::Extension::SPV_KHR_maximal_reconvergence},
{"SPV_INTEL_kernel_attributes",
- SPIRV::Extension::Extension::SPV_INTEL_kernel_attributes}};
+ SPIRV::Extension::Extension::SPV_INTEL_kernel_attributes},
+ {"SPV_ALTERA_blocking_pipes",
+ SPIRV::Extension::Extension::SPV_ALTERA_blocking_pipes}};
bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName,
StringRef ArgValue,
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index a61351e..03bd61b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -993,3 +993,9 @@ def OpPredicatedLoadINTEL: Op<6528, (outs ID:$res), (ins TYPE:$resType, ID:$ptr,
"$res = OpPredicatedLoadINTEL $resType $ptr $predicate $default_value">;
def OpPredicatedStoreINTEL: Op<6529, (outs), (ins ID:$ptr, ID:$object, ID:$predicate, variable_ops),
"OpPredicatedStoreINTEL $ptr $object $predicate">;
+
+//SPV_ALTERA_blocking_pipes
+def OpReadPipeBlockingALTERA :Op<5946, (outs), (ins ID:$pipe, ID:$pointer, ID:$packetSize, ID:$packetAlignment),
+ "OpReadPipeBlockingALTERA $pipe $pointer $packetSize $packetAlignment">;
+def OpWritePipeBlockingALTERA :Op<5946, (outs), (ins ID:$pipe, ID:$pointer, ID:$packetSize, ID:$packetAlignment),
+ "OpWritePipeBlockingALTERA $pipe $pointer $packetSize $packetAlignment">;
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index e5ac76c4..af76016 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1885,6 +1885,13 @@ void addInstrRequirements(const MachineInstr &MI,
Reqs.addCapability(
SPIRV::Capability::CooperativeMatrixCheckedInstructionsINTEL);
break;
+ case SPIRV::OpReadPipeBlockingALTERA:
+ case SPIRV::OpWritePipeBlockingALTERA:
+ if (ST.canUseExtension(SPIRV::Extension::SPV_ALTERA_blocking_pipes)) {
+ Reqs.addExtension(SPIRV::Extension::SPV_ALTERA_blocking_pipes);
+ Reqs.addCapability(SPIRV::Capability::BlockingPipesALTERA);
+ }
+ break;
case SPIRV::OpCooperativeMatrixGetElementCoordINTEL:
if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_joint_matrix))
report_fatal_error("OpCooperativeMatrixGetElementCoordINTEL requires the "
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
index 4e4e6fb..be88f33 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
@@ -56,6 +56,13 @@ public:
}
};
+static cl::list<std::string> SPVAllowUnknownIntrinsics(
+ "spv-allow-unknown-intrinsics", cl::CommaSeparated,
+ cl::desc("Emit unknown intrinsics as calls to external functions. A "
+ "comma-separated input list of intrinsic prefixes must be "
+ "provided, and only intrinsics carrying a listed prefix get "
+ "emitted as described."),
+ cl::value_desc("intrinsic_prefix_0,intrinsic_prefix_1"), cl::ValueOptional);
} // namespace
char SPIRVPrepareFunctions::ID = 0;
@@ -445,6 +452,15 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) {
EraseFromParent);
Changed = true;
break;
+ default:
+ if (TM.getTargetTriple().getVendor() == Triple::AMD ||
+ any_of(SPVAllowUnknownIntrinsics, [II](auto &&Prefix) {
+ if (Prefix.empty())
+ return false;
+ return II->getCalledFunction()->getName().starts_with(Prefix);
+ }))
+ Changed |= lowerIntrinsicToFunction(II);
+ break;
}
}
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 1b4b29b..65a8885 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -309,7 +309,7 @@ defm SPV_KHR_shader_clock : ExtensionOperand<54, [EnvVulkan, EnvOpenCL]>;
defm SPV_INTEL_unstructured_loop_controls : ExtensionOperand<55, [EnvOpenCL]>;
defm SPV_EXT_demote_to_helper_invocation : ExtensionOperand<56, [EnvVulkan]>;
defm SPV_INTEL_fpga_reg : ExtensionOperand<57, [EnvOpenCL]>;
-defm SPV_INTEL_blocking_pipes : ExtensionOperand<58, [EnvOpenCL]>;
+defm SPV_ALTERA_blocking_pipes : ExtensionOperand<58, [EnvOpenCL]>;
defm SPV_GOOGLE_user_type : ExtensionOperand<59, [EnvVulkan]>;
defm SPV_KHR_physical_storage_buffer : ExtensionOperand<60, [EnvVulkan]>;
defm SPV_INTEL_kernel_attributes : ExtensionOperand<61, [EnvOpenCL]>;
@@ -611,6 +611,7 @@ defm TensorFloat32RoundingINTEL : CapabilityOperand<6425, 0, 0, [SPV_INTEL_tenso
defm BFloat16TypeKHR : CapabilityOperand<5116, 0, 0, [SPV_KHR_bfloat16], []>;
defm BFloat16DotProductKHR : CapabilityOperand<5117, 0, 0, [SPV_KHR_bfloat16], [BFloat16TypeKHR]>;
defm BFloat16CooperativeMatrixKHR : CapabilityOperand<5118, 0, 0, [SPV_KHR_bfloat16], [BFloat16TypeKHR, CooperativeMatrixKHR]>;
+defm BlockingPipesALTERA : CapabilityOperand<5945, 0, 0, [SPV_ALTERA_blocking_pipes], []>;
//===----------------------------------------------------------------------===//
// Multiclass used to define SourceLanguage enum values and at the same time
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4d44227b3..d103953 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53354,6 +53354,7 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
// i32 sub value.
static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
using namespace SDPatternMatch;
SDValue StoredVal = St->getValue();
@@ -53442,7 +53443,8 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
}
SDValue NewStore =
- DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(),
+ DAG.getStore(St->getChain(), DL, Res, NewPtr,
+ MachinePointerInfo(St->getPointerInfo().getAddrSpace()),
Align(), St->getMemOperand()->getFlags());
// If there are other uses of StoredVal, replace with a new load of the
@@ -53450,6 +53452,8 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
if (!StoredVal.hasOneUse()) {
SDValue NewLoad =
DAG.getLoad(VT, DL, NewStore, Ld->getBasePtr(), Ld->getMemOperand());
+ for (SDNode *User : StoredVal->users())
+ DCI.AddToWorklist(User);
DAG.ReplaceAllUsesWith(StoredVal, NewLoad);
}
return NewStore;
@@ -53681,7 +53685,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
}
}
- if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget))
+ if (SDValue R = narrowBitOpRMW(St, dl, DAG, DCI, Subtarget))
return R;
// Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC)
@@ -54639,7 +54643,8 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
SDValue NewPtr = DAG.getMemBasePlusOffset(
Ld->getBasePtr(), PtrByteOfs, DL, SDNodeFlags::NoUnsignedWrap);
SDValue NewLoad =
- DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getPointerInfo(),
+ DAG.getLoad(VT, DL, Ld->getChain(), NewPtr,
+ MachinePointerInfo(Ld->getPointerInfo().getAddrSpace()),
Align(), Ld->getMemOperand()->getFlags());
DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
return NewLoad;
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 975a271..96bef0e 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -174,8 +174,8 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx1153"}, {"gfx1153"}, GK_GFX1153, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1200"}, {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
- {{"gfx1250"}, {"gfx1250"}, GK_GFX1250, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
- {{"gfx1251"}, {"gfx1251"}, GK_GFX1251, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
+ {{"gfx1250"}, {"gfx1250"}, GK_GFX1250, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK_ALWAYS},
+ {{"gfx1251"}, {"gfx1251"}, GK_GFX1251, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK_ALWAYS},
{{"gfx9-generic"}, {"gfx9-generic"}, GK_GFX9_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
{{"gfx10-1-generic"}, {"gfx10-1-generic"}, GK_GFX10_1_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index af53fa0..02f06be 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -734,7 +734,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
// Reserve bit 60-63 for other information purpose.
- FunctionHash &= 0x0FFFFFFFFFFFFFFF;
+ FunctionHash &= NamedInstrProfRecord::FUNC_HASH_MASK;
if (IsCS)
NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 42b1fdf..8aa8aa2 100644
--- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -39,36 +39,36 @@ using namespace llvm;
STATISTIC(NumBroken, "Number of blocks inserted");
namespace {
- struct BreakCriticalEdges : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- BreakCriticalEdges() : FunctionPass(ID) {
- initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
- }
+struct BreakCriticalEdges : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ BreakCriticalEdges() : FunctionPass(ID) {
+ initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnFunction(Function &F) override {
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ bool runOnFunction(Function &F) override {
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
- auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
+ auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
+ auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
- auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
- auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
- unsigned N =
- SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI, nullptr, PDT));
- NumBroken += N;
- return N > 0;
- }
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+ unsigned N = SplitAllCriticalEdges(
+ F, CriticalEdgeSplittingOptions(DT, LI, nullptr, PDT));
+ NumBroken += N;
+ return N > 0;
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
- // No loop canonicalization guarantees are broken by this pass.
- AU.addPreservedID(LoopSimplifyID);
- }
- };
-}
+ // No loop canonicalization guarantees are broken by this pass.
+ AU.addPreservedID(LoopSimplifyID);
+ }
+};
+} // namespace
char BreakCriticalEdges::ID = 0;
INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
@@ -76,6 +76,7 @@ INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
// Publicly exposed interface to pass...
char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
+
FunctionPass *llvm::createBreakCriticalEdgesPass() {
return new BreakCriticalEdges();
}
diff --git a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
index 7343c79..9f6d89e 100644
--- a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -40,22 +40,22 @@ using namespace llvm;
namespace {
- struct QuotRemPair {
- Value *Quotient;
- Value *Remainder;
-
- QuotRemPair(Value *InQuotient, Value *InRemainder)
- : Quotient(InQuotient), Remainder(InRemainder) {}
- };
-
- /// A quotient and remainder, plus a BB from which they logically "originate".
- /// If you use Quotient or Remainder in a Phi node, you should use BB as its
- /// corresponding predecessor.
- struct QuotRemWithBB {
- BasicBlock *BB = nullptr;
- Value *Quotient = nullptr;
- Value *Remainder = nullptr;
- };
+struct QuotRemPair {
+ Value *Quotient;
+ Value *Remainder;
+
+ QuotRemPair(Value *InQuotient, Value *InRemainder)
+ : Quotient(InQuotient), Remainder(InRemainder) {}
+};
+
+/// A quotient and remainder, plus a BB from which they logically "originate".
+/// If you use Quotient or Remainder in a Phi node, you should use BB as its
+/// corresponding predecessor.
+struct QuotRemWithBB {
+ BasicBlock *BB = nullptr;
+ Value *Quotient = nullptr;
+ Value *Remainder = nullptr;
+};
using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
using BypassWidthsTy = DenseMap<unsigned, unsigned>;
diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 61ffb49..8da6a980 100644
--- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -378,7 +378,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
if (P != Preheader) BackedgeBlocks.push_back(P);
}
- // Create and insert the new backedge block...
+ // Create and insert the new backedge block.
BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
Header->getName() + ".backedge", F);
BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
@@ -737,39 +737,39 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
}
namespace {
- struct LoopSimplify : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- LoopSimplify() : FunctionPass(ID) {
- initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
- }
+struct LoopSimplify : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ LoopSimplify() : FunctionPass(ID) {
+ initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnFunction(Function &F) override;
+ bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
- // We need loop information to identify the loops...
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
+ // We need loop information to identify the loops.
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
- AU.addPreserved<BasicAAWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addPreserved<SCEVAAWrapperPass>();
- AU.addPreservedID(LCSSAID);
- AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
- AU.addPreserved<BranchProbabilityInfoWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addPreservedID(LCSSAID);
+ AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
+ AU.addPreserved<BranchProbabilityInfoWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+ }
- /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
- void verifyAnalysis() const override;
- };
-}
+ /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+ void verifyAnalysis() const override;
+};
+} // namespace
char LoopSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
@@ -780,12 +780,12 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", "Canonicalize natural loops",
false, false)
-// Publicly exposed interface to pass...
+// Publicly exposed interface to pass.
char &llvm::LoopSimplifyID = LoopSimplify::ID;
Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
/// runOnFunction - Run down all loops in the CFG (recursively, but we could do
-/// it in any convenient order) inserting preheaders...
+/// it in any convenient order) inserting preheaders.
///
bool LoopSimplify::runOnFunction(Function &F) {
bool Changed = false;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 906fa2f..b7224a3 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7933,6 +7933,26 @@ void VPRecipeBuilder::collectScaledReductions(VFRange &Range) {
(!Chain.ExtendB || ExtendIsOnlyUsedByPartialReductions(Chain.ExtendB)))
ScaledReductionMap.try_emplace(Chain.Reduction, Pair.second);
}
+
+ // Check that all partial reductions in a chain are only used by other
+ // partial reductions with the same scale factor. Otherwise we end up creating
+ // users of scaled reductions where the types of the other operands don't
+ // match.
+ for (const auto &[Chain, Scale] : PartialReductionChains) {
+ auto AllUsersPartialRdx = [ScaleVal = Scale, this](const User *U) {
+ auto *UI = cast<Instruction>(U);
+ if (isa<PHINode>(UI) && UI->getParent() == OrigLoop->getHeader()) {
+ return all_of(UI->users(), [ScaleVal, this](const User *U) {
+ auto *UI = cast<Instruction>(U);
+ return ScaledReductionMap.lookup_or(UI, 0) == ScaleVal;
+ });
+ }
+ return ScaledReductionMap.lookup_or(UI, 0) == ScaleVal ||
+ !OrigLoop->contains(UI->getParent());
+ };
+ if (!all_of(Chain.Reduction->users(), AllUsersPartialRdx))
+ ScaledReductionMap.erase(Chain.Reduction);
+ }
}
bool VPRecipeBuilder::getScaledReductions(
@@ -8116,11 +8136,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr))
return tryToWidenMemory(Instr, Operands, Range);
- if (std::optional<unsigned> ScaleFactor = getScalingForReduction(Instr)) {
- if (auto PartialRed =
- tryToCreatePartialReduction(Instr, Operands, ScaleFactor.value()))
- return PartialRed;
- }
+ if (std::optional<unsigned> ScaleFactor = getScalingForReduction(Instr))
+ return tryToCreatePartialReduction(Instr, Operands, ScaleFactor.value());
if (!shouldWiden(Instr, Range))
return nullptr;
@@ -8154,9 +8171,9 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
isa<VPPartialReductionRecipe>(BinOpRecipe))
std::swap(BinOp, Accumulator);
- if (ScaleFactor !=
- vputils::getVFScaleFactor(Accumulator->getDefiningRecipe()))
- return nullptr;
+ assert(ScaleFactor ==
+ vputils::getVFScaleFactor(Accumulator->getDefiningRecipe()) &&
+ "all accumulators in chain must have same scale factor");
unsigned ReductionOpcode = Reduction->getOpcode();
if (ReductionOpcode == Instruction::Sub) {
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bf3f52c..df835a0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -20996,6 +20996,15 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
return false;
}))
return std::nullopt;
+ if (S.areInstructionsWithCopyableElements() && EI && EI.UserTE->hasState() &&
+ EI.UserTE->hasCopyableElements() &&
+ EI.UserTE->getMainOp()->getParent() == S.getMainOp()->getParent() &&
+ all_of(VL, [&](Value *V) {
+ if (S.isCopyableElement(V))
+ return true;
+ return isUsedOutsideBlock(V);
+ }))
+ return std::nullopt;
bool HasCopyables = S.areInstructionsWithCopyableElements();
if (((!HasCopyables && doesNotNeedToSchedule(VL)) ||
all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8670822..3062e1c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1163,10 +1163,10 @@ public:
bool opcodeMayReadOrWriteFromMemory() const;
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override;
+ bool usesFirstLaneOnly(const VPValue *Op) const override;
/// Returns true if the recipe only uses the first part of operand \p Op.
- bool onlyFirstPartUsed(const VPValue *Op) const override;
+ bool usesFirstPartOnly(const VPValue *Op) const override;
/// Returns true if this VPInstruction produces a scalar value from a vector,
/// e.g. by performing a reduction or extracting a lane.
@@ -1393,13 +1393,13 @@ public:
return true;
}
- bool onlyFirstPartUsed(const VPValue *Op) const override {
+ bool usesFirstPartOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
}
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
@@ -1628,7 +1628,7 @@ public:
VPSlotTracker &SlotTracker) const override;
#endif
- bool onlyFirstLaneUsed(const VPValue *Op) const override;
+ bool usesFirstLaneOnly(const VPValue *Op) const override;
};
/// A recipe for widening Call instructions using library calls.
@@ -1767,7 +1767,7 @@ struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags,
}
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return Op == getCond() && isInvariantCond();
@@ -1833,7 +1833,7 @@ public:
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
if (Op == getOperand(0))
@@ -1870,7 +1870,7 @@ public:
void execute(VPTransformState &State) override;
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
@@ -1884,7 +1884,7 @@ public:
}
/// Returns true if the recipe only uses the first part of operand \p Op.
- bool onlyFirstPartUsed(const VPValue *Op) const override {
+ bool usesFirstPartOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
assert(getNumOperands() <= 2 && "must have at most two operands");
@@ -1922,14 +1922,14 @@ public:
Type *getSourceElementType() const { return SourceElementTy; }
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
}
/// Returns true if the recipe only uses the first part of operand \p Op.
- bool onlyFirstPartUsed(const VPValue *Op) const override {
+ bool usesFirstPartOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
assert(getNumOperands() <= 2 && "must have at most two operands");
@@ -2110,7 +2110,7 @@ public:
}
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// The recipe creates its own wide start value, so it only requests the
@@ -2325,7 +2325,7 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return Op == getStartValue();
@@ -2399,7 +2399,7 @@ public:
bool isInLoop() const { return IsInLoop; }
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return isOrdered() || isInLoop();
@@ -2468,13 +2468,13 @@ public:
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// Recursing through Blend recipes only, must terminate at header phi's the
// latest.
return all_of(users(),
- [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
+ [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
}
};
@@ -2562,7 +2562,7 @@ public:
VPCostContext &Ctx) const override;
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override = 0;
+ bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
/// Returns the number of stored operands of this interleave group. Returns 0
/// for load interleave groups.
@@ -2608,7 +2608,7 @@ public:
VPSlotTracker &SlotTracker) const override;
#endif
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
@@ -2656,7 +2656,7 @@ public:
#endif
/// The recipe only uses the first lane of the address, and EVL operand.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
@@ -2862,7 +2862,7 @@ public:
VPValue *getEVL() const { return getOperand(2); }
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return Op == getEVL();
@@ -2924,7 +2924,7 @@ public:
bool isPredicated() const { return IsPredicated; }
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return isSingleScalar();
@@ -3212,9 +3212,8 @@ protected:
Alignment(getLoadStoreAlignment(&I)), Consecutive(Consecutive),
Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
- assert(isa<VPVectorEndPointerRecipe>(getAddr()) ||
- !Reverse &&
- "Reversed acccess without VPVectorEndPointerRecipe address?");
+ assert((isa<VPVectorEndPointerRecipe>(getAddr()) || !Reverse) &&
+ "Reversed acccess without VPVectorEndPointerRecipe address?");
}
public:
@@ -3300,7 +3299,7 @@ struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe,
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// Widened, consecutive loads operations only demand the first lane of
@@ -3341,7 +3340,7 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// Widened loads only demand the first lane of EVL and consecutive loads
@@ -3382,7 +3381,7 @@ struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// Widened, consecutive stores only demand the first lane of their address,
@@ -3425,7 +3424,7 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
if (Op == getEVL()) {
@@ -3509,14 +3508,14 @@ public:
}
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
}
/// Returns true if the recipe only uses the first part of operand \p Op.
- bool onlyFirstPartUsed(const VPValue *Op) const override {
+ bool usesFirstPartOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
@@ -3591,7 +3590,7 @@ public:
}
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
@@ -3701,7 +3700,7 @@ public:
VPValue *getStepValue() const { return getOperand(2); }
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
@@ -3766,7 +3765,7 @@ public:
VPValue *getStepValue() const { return getOperand(1); }
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f792d0a..80cd112 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1276,7 +1276,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
}
}
-bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
+bool VPInstruction::usesFirstLaneOnly(const VPValue *Op) const {
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode()))
return vputils::onlyFirstLaneUsed(this);
@@ -1325,7 +1325,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
llvm_unreachable("switch should return");
}
-bool VPInstruction::onlyFirstPartUsed(const VPValue *Op) const {
+bool VPInstruction::usesFirstPartOnly(const VPValue *Op) const {
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
if (Instruction::isBinaryOp(getOpcode()))
return vputils::onlyFirstPartUsed(this);
@@ -1692,7 +1692,7 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
if (!VFTy->getParamType(I.index())->isVectorTy())
Arg = State.get(I.value(), VPLane(0));
else
- Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
+ Arg = State.get(I.value(), usesFirstLaneOnly(I.value()));
Args.push_back(Arg);
}
@@ -1761,7 +1761,7 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
State.TTI))
Arg = State.get(I.value(), VPLane(0));
else
- Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
+ Arg = State.get(I.value(), usesFirstLaneOnly(I.value()));
if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),
State.TTI))
TysForDecl.push_back(Arg->getType());
@@ -1843,7 +1843,7 @@ StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const {
return Intrinsic::getBaseName(VectorIntrinsicID);
}
-bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
+bool VPWidenIntrinsicRecipe::usesFirstLaneOnly(const VPValue *Op) const {
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
return all_of(enumerate(operands()), [this, &Op](const auto &X) {
auto [Idx, V] = X;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 82bf79e..48bd697 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -204,7 +204,7 @@ static bool sinkScalarOperands(VPlan &Plan) {
return cast<VPRecipeBase>(U)->getParent() != SinkTo;
});
if (any_of(UsersOutsideSinkTo, [SinkCandidate](VPUser *U) {
- return !U->onlyFirstLaneUsed(SinkCandidate);
+ return !U->usesFirstLaneOnly(SinkCandidate);
}))
continue;
bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index d6a0028..d4b8b72b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -582,7 +582,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
/// Users that only demand the first lane can use the definition for lane
/// 0.
DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) {
- return U.onlyFirstLaneUsed(DefR);
+ return U.usesFirstLaneOnly(DefR);
});
// Update each build vector user that currently has DefR as its only
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index c6380d3..e22c5df 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -18,12 +18,12 @@ using namespace llvm::VPlanPatternMatch;
bool vputils::onlyFirstLaneUsed(const VPValue *Def) {
return all_of(Def->users(),
- [Def](const VPUser *U) { return U->onlyFirstLaneUsed(Def); });
+ [Def](const VPUser *U) { return U->usesFirstLaneOnly(Def); });
}
bool vputils::onlyFirstPartUsed(const VPValue *Def) {
return all_of(Def->users(),
- [Def](const VPUser *U) { return U->onlyFirstPartUsed(Def); });
+ [Def](const VPUser *U) { return U->usesFirstPartOnly(Def); });
}
bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 83e3fca..5da7463 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -274,12 +274,12 @@ public:
virtual bool usesScalars(const VPValue *Op) const {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
- return onlyFirstLaneUsed(Op);
+ return usesFirstLaneOnly(Op);
}
/// Returns true if the VPUser only uses the first lane of operand \p Op.
/// Conservatively returns false.
- virtual bool onlyFirstLaneUsed(const VPValue *Op) const {
+ virtual bool usesFirstLaneOnly(const VPValue *Op) const {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return false;
@@ -287,7 +287,7 @@ public:
/// Returns true if the VPUser only uses the first part of operand \p Op.
/// Conservatively returns false.
- virtual bool onlyFirstPartUsed(const VPValue *Op) const {
+ virtual bool usesFirstPartOnly(const VPValue *Op) const {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return false;