aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp62
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp13
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp151
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h11
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp33
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp172
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h7
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp38
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td12
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td140
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp8
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp149
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.h6
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp9
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp81
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp7
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp11
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.td5
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.td6
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp7
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp16
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td3
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp64
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h90
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp10
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp13
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp4
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanValue.h6
36 files changed, 652 insertions, 521 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 4fd2204..be1b51f 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2821,20 +2821,34 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (translateKnownIntrinsic(CI, ID, MIRBuilder))
return true;
+ TargetLowering::IntrinsicInfo Info;
+ bool IsTgtMemIntrinsic = TLI->getTgtMemIntrinsic(Info, CI, *MF, ID);
+
+ return translateIntrinsic(CI, ID, MIRBuilder,
+ IsTgtMemIntrinsic ? &Info : nullptr);
+}
+
+/// Translate a call to an intrinsic.
+/// Depending on whether TLI->getTgtMemIntrinsic() is true, TgtMemIntrinsicInfo
+/// is a pointer to the correspondingly populated IntrinsicInfo object.
+/// Otherwise, this pointer is null.
+bool IRTranslator::translateIntrinsic(
+ const CallBase &CB, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder,
+ const TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) {
ArrayRef<Register> ResultRegs;
- if (!CI.getType()->isVoidTy())
- ResultRegs = getOrCreateVRegs(CI);
+ if (!CB.getType()->isVoidTy())
+ ResultRegs = getOrCreateVRegs(CB);
// Ignore the callsite attributes. Backend code is most likely not expecting
// an intrinsic to sometimes have side effects and sometimes not.
MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, ResultRegs);
- if (isa<FPMathOperator>(CI))
- MIB->copyIRFlags(CI);
+ if (isa<FPMathOperator>(CB))
+ MIB->copyIRFlags(CB);
- for (const auto &Arg : enumerate(CI.args())) {
+ for (const auto &Arg : enumerate(CB.args())) {
// If this is required to be an immediate, don't materialize it in a
// register.
- if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
+ if (CB.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) {
// imm arguments are more convenient than cimm (and realistically
// probably sufficient), so use them.
@@ -2863,29 +2877,33 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
}
// Add a MachineMemOperand if it is a target mem intrinsic.
- TargetLowering::IntrinsicInfo Info;
- // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
- if (TLI->getTgtMemIntrinsic(Info, CI, *MF, ID)) {
- Align Alignment = Info.align.value_or(
- DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));
- LLT MemTy = Info.memVT.isSimple()
- ? getLLTForMVT(Info.memVT.getSimpleVT())
- : LLT::scalar(Info.memVT.getStoreSizeInBits());
+ if (TgtMemIntrinsicInfo) {
+ const Function *F = CB.getCalledFunction();
+
+ Align Alignment = TgtMemIntrinsicInfo->align.value_or(DL->getABITypeAlign(
+ TgtMemIntrinsicInfo->memVT.getTypeForEVT(F->getContext())));
+ LLT MemTy =
+ TgtMemIntrinsicInfo->memVT.isSimple()
+ ? getLLTForMVT(TgtMemIntrinsicInfo->memVT.getSimpleVT())
+ : LLT::scalar(TgtMemIntrinsicInfo->memVT.getStoreSizeInBits());
// TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
// didn't yield anything useful.
MachinePointerInfo MPI;
- if (Info.ptrVal)
- MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
- else if (Info.fallbackAddressSpace)
- MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
+ if (TgtMemIntrinsicInfo->ptrVal) {
+ MPI = MachinePointerInfo(TgtMemIntrinsicInfo->ptrVal,
+ TgtMemIntrinsicInfo->offset);
+ } else if (TgtMemIntrinsicInfo->fallbackAddressSpace) {
+ MPI = MachinePointerInfo(*TgtMemIntrinsicInfo->fallbackAddressSpace);
+ }
MIB.addMemOperand(MF->getMachineMemOperand(
- MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata(),
- /*Ranges=*/nullptr, Info.ssid, Info.order, Info.failureOrder));
+ MPI, TgtMemIntrinsicInfo->flags, MemTy, Alignment, CB.getAAMetadata(),
+ /*Ranges=*/nullptr, TgtMemIntrinsicInfo->ssid,
+ TgtMemIntrinsicInfo->order, TgtMemIntrinsicInfo->failureOrder));
}
- if (CI.isConvergent()) {
- if (auto Bundle = CI.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ if (CB.isConvergent()) {
+ if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) {
auto *Token = Bundle->Inputs[0].get();
Register TokenReg = getOrCreateVReg(*Token);
MIB.addUse(TokenReg, RegState::Implicit);
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 45eca28..5c27a20 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -239,8 +239,7 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
- unsigned MaxFactor,
- bool InterleaveWithShuffles) {
+ unsigned MaxFactor) {
unsigned NumElts = SVI->getShuffleMask().size();
if (NumElts < 4)
return false;
@@ -251,13 +250,6 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
return true;
}
- if (InterleaveWithShuffles) {
- for (unsigned i = 1; MaxFactor * i <= 16; i *= 2) {
- Factor = i * MaxFactor;
- if (SVI->isInterleave(Factor))
- return true;
- }
- }
return false;
}
@@ -536,8 +528,7 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
cast<FixedVectorType>(SVI->getType())->getNumElements();
// Check if the shufflevector is RE-interleave shuffle.
unsigned Factor;
- if (!isReInterleaveMask(SVI, Factor, MaxFactor,
- TLI->isProfitableToInterleaveWithGatherScatter()))
+ if (!isReInterleaveMask(SVI, Factor, MaxFactor))
return false;
assert(NumStoredElements % Factor == 0 &&
"number of stored element should be a multiple of Factor");
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 46c4bb8..816b7ba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4046,6 +4046,8 @@ static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG) {
m_ConstInt(AndMask)))) {
// Type Legalisation Pattern:
// (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
+ if (BitWidthDiff.getZExtValue() >= BitWidth)
+ return SDValue();
unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
return SDValue();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index fa0c899..9961c98 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3526,8 +3526,7 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
// Update successor info.
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
- for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
- BasicBlock *Dest = I.getIndirectDest(i);
+ for (BasicBlock *Dest : I.getIndirectDests()) {
MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
Target->setIsInlineAsmBrIndirectTarget();
// If we introduce a type of asm goto statement that is permitted to use an
@@ -5313,18 +5312,26 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
DAG.setRoot(OutChain);
}
-/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
-/// node.
-void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
- unsigned Intrinsic) {
- // Ignore the callsite's attributes. A specific call site may be marked with
- // readnone, but the lowering code will expect the chain based on the
- // definition.
+/// Check if this intrinsic call depends on the chain (1st return value)
+/// and if it only *loads* memory.
+/// Ignore the callsite's attributes. A specific call site may be marked with
+/// readnone, but the lowering code will expect the chain based on the
+/// definition.
+std::pair<bool, bool>
+SelectionDAGBuilder::getTargetIntrinsicCallProperties(const CallBase &I) {
const Function *F = I.getCalledFunction();
bool HasChain = !F->doesNotAccessMemory();
bool OnlyLoad =
HasChain && F->onlyReadsMemory() && F->willReturn() && F->doesNotThrow();
+ return {HasChain, OnlyLoad};
+}
+
+SmallVector<SDValue, 8> SelectionDAGBuilder::getTargetIntrinsicOperands(
+ const CallBase &I, bool HasChain, bool OnlyLoad,
+ TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
// Build the operand list.
SmallVector<SDValue, 8> Ops;
if (HasChain) { // If this intrinsic has side-effects, chainify it.
@@ -5336,17 +5343,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
}
- // Info is set by getTgtMemIntrinsic
- TargetLowering::IntrinsicInfo Info;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
- DAG.getMachineFunction(),
- Intrinsic);
-
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
- if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
- Info.opc == ISD::INTRINSIC_W_CHAIN)
- Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
+ if (!TgtMemIntrinsicInfo || TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_VOID ||
+ TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_W_CHAIN)
+ Ops.push_back(DAG.getTargetConstant(I.getIntrinsicID(), getCurSDLoc(),
TLI.getPointerTy(DAG.getDataLayout())));
// Add all operands of the call to the operand list.
@@ -5369,13 +5369,85 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
}
+ if (std::optional<OperandBundleUse> Bundle =
+ I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ Value *Token = Bundle->Inputs[0].get();
+ SDValue ConvControlToken = getValue(Token);
+ assert(Ops.back().getValueType() != MVT::Glue &&
+ "Did not expect another glue node here.");
+ ConvControlToken =
+ DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken);
+ Ops.push_back(ConvControlToken);
+ }
+
+ return Ops;
+}
+
+SDVTList SelectionDAGBuilder::getTargetIntrinsicVTList(const CallBase &I,
+ bool HasChain) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
if (HasChain)
ValueVTs.push_back(MVT::Other);
- SDVTList VTs = DAG.getVTList(ValueVTs);
+ return DAG.getVTList(ValueVTs);
+}
+
+/// Get an INTRINSIC node for a target intrinsic which does not touch memory.
+SDValue SelectionDAGBuilder::getTargetNonMemIntrinsicNode(
+ const Type &IntrinsicVT, bool HasChain, ArrayRef<SDValue> Ops,
+ const SDVTList &VTs) {
+ if (!HasChain)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
+ if (!IntrinsicVT.isVoidTy())
+ return DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
+ return DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
+}
+
+/// Set root, convert return type if necessary and check alignment.
+SDValue SelectionDAGBuilder::handleTargetIntrinsicRet(const CallBase &I,
+ bool HasChain,
+ bool OnlyLoad,
+ SDValue Result) {
+ if (HasChain) {
+ SDValue Chain = Result.getValue(Result.getNode()->getNumValues() - 1);
+ if (OnlyLoad)
+ PendingLoads.push_back(Chain);
+ else
+ DAG.setRoot(Chain);
+ }
+
+ if (I.getType()->isVoidTy())
+ return Result;
+
+ if (MaybeAlign Alignment = I.getRetAlign(); InsertAssertAlign && Alignment) {
+ // Insert `assertalign` node if there's an alignment.
+ Result = DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
+ } else if (!isa<VectorType>(I.getType())) {
+ Result = lowerRangeToAssertZExt(DAG, I, Result);
+ }
+
+ return Result;
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
+ unsigned Intrinsic) {
+ auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I);
+
+ // Info is set by getTgtMemIntrinsic
+ TargetLowering::IntrinsicInfo Info;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool IsTgtMemIntrinsic =
+ TLI.getTgtMemIntrinsic(Info, I, DAG.getMachineFunction(), Intrinsic);
+
+ SmallVector<SDValue, 8> Ops = getTargetIntrinsicOperands(
+ I, HasChain, OnlyLoad, IsTgtMemIntrinsic ? &Info : nullptr);
+ SDVTList VTs = getTargetIntrinsicVTList(I, HasChain);
// Propagate fast-math-flags from IR to node(s).
SDNodeFlags Flags;
@@ -5386,19 +5458,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Create the node.
SDValue Result;
- if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
- auto *Token = Bundle->Inputs[0].get();
- SDValue ConvControlToken = getValue(Token);
- assert(Ops.back().getValueType() != MVT::Glue &&
- "Did not expected another glue node here.");
- ConvControlToken =
- DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken);
- Ops.push_back(ConvControlToken);
- }
-
// In some cases, custom collection of operands from CallInst I may be needed.
TLI.CollectTargetIntrinsicOperands(I, Ops, DAG);
- if (IsTgtIntrinsic) {
+ if (IsTgtMemIntrinsic) {
// This is target intrinsic that touches memory
//
// TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
@@ -5418,34 +5480,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
Info.ssid, Info.order, Info.failureOrder);
Result =
DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, MemVT, MMO);
- } else if (!HasChain) {
- Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
- } else if (!I.getType()->isVoidTy()) {
- Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
} else {
- Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
+ Result = getTargetNonMemIntrinsicNode(*I.getType(), HasChain, Ops, VTs);
}
- if (HasChain) {
- SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
- if (OnlyLoad)
- PendingLoads.push_back(Chain);
- else
- DAG.setRoot(Chain);
- }
-
- if (!I.getType()->isVoidTy()) {
- if (!isa<VectorType>(I.getType()))
- Result = lowerRangeToAssertZExt(DAG, I, Result);
-
- MaybeAlign Alignment = I.getRetAlign();
-
- // Insert `assertalign` node if there's an alignment.
- if (InsertAssertAlign && Alignment) {
- Result =
- DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
- }
- }
+ Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result);
setValue(&I, Result);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 47e19f7..ed63bee 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -727,6 +727,17 @@ private:
MCSymbol *&BeginLabel);
SDValue lowerEndEH(SDValue Chain, const InvokeInst *II,
const BasicBlock *EHPadBB, MCSymbol *BeginLabel);
+
+ std::pair<bool, bool> getTargetIntrinsicCallProperties(const CallBase &I);
+ SmallVector<SDValue, 8> getTargetIntrinsicOperands(
+ const CallBase &I, bool HasChain, bool OnlyLoad,
+ TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo = nullptr);
+ SDVTList getTargetIntrinsicVTList(const CallBase &I, bool HasChain);
+ SDValue getTargetNonMemIntrinsicNode(const Type &IntrinsicVT, bool HasChain,
+ ArrayRef<SDValue> Ops,
+ const SDVTList &VTs);
+ SDValue handleTargetIntrinsicRet(const CallBase &I, bool HasChain,
+ bool OnlyLoad, SDValue Result);
};
/// This struct represents the registers (physical or virtual)
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp
index 35da82a..7e1d528 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryResolver.cpp
@@ -184,9 +184,9 @@ class SymbolSearchContext {
public:
SymbolSearchContext(SymbolQuery &Q) : Q(Q) {}
- bool hasSearched(LibraryInfo *Lib) const { return Searched.count(Lib); }
+ bool hasSearched(const LibraryInfo *Lib) const { return Searched.count(Lib); }
- void markSearched(LibraryInfo *Lib) { Searched.insert(Lib); }
+ void markSearched(const LibraryInfo *Lib) { Searched.insert(Lib); }
inline bool allResolved() const { return Q.allResolved(); }
@@ -194,7 +194,7 @@ public:
private:
SymbolQuery &Q;
- DenseSet<LibraryInfo *> Searched;
+ DenseSet<const LibraryInfo *> Searched;
};
void LibraryResolver::resolveSymbolsInLibrary(
@@ -226,19 +226,18 @@ void LibraryResolver::resolveSymbolsInLibrary(
return EnumerateResult::Continue;
},
Opts);
+ };
+ if (!Lib.hasFilter()) {
+ LLVM_DEBUG(dbgs() << "Building filter for library: " << Lib.getFullPath()
+ << "\n";);
+ enumerateSymbolsIfNeeded();
if (DiscoveredSymbols.empty()) {
LLVM_DEBUG(dbgs() << " No symbols and remove library : "
<< Lib.getFullPath() << "\n";);
LibMgr.removeLibrary(Lib.getFullPath());
return;
}
- };
-
- if (!Lib.hasFilter()) {
- LLVM_DEBUG(dbgs() << "Building filter for library: " << Lib.getFullPath()
- << "\n";);
- enumerateSymbolsIfNeeded();
SmallVector<StringRef> SymbolVec;
SymbolVec.reserve(DiscoveredSymbols.size());
for (const auto &KV : DiscoveredSymbols)
@@ -288,11 +287,15 @@ void LibraryResolver::searchSymbolsInLibraries(
SymbolSearchContext Ctx(Q);
while (!Ctx.allResolved()) {
+ std::vector<std::shared_ptr<LibraryInfo>> Libs;
+ LibMgr.getLibraries(S, K, Libs, [&](const LibraryInfo &Lib) {
+ return !Ctx.hasSearched(&Lib);
+ });
- for (auto &Lib : LibMgr.getView(S, K)) {
- if (Ctx.hasSearched(Lib.get()))
- continue;
+ if (Libs.empty() && !scanLibrariesIfNeeded(K, scanBatchSize))
+ break; // no more new libs to scan
+ for (auto &Lib : Libs) {
// can use Async here?
resolveSymbolsInLibrary(*Lib, Ctx.query(), Config.Options);
Ctx.markSearched(Lib.get());
@@ -300,12 +303,6 @@ void LibraryResolver::searchSymbolsInLibraries(
if (Ctx.allResolved())
return;
}
-
- if (Ctx.allResolved())
- return;
-
- if (!scanLibrariesIfNeeded(K, scanBatchSize))
- break; // no more new libs to scan
}
};
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp
index d93f686..32f6dbe 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/LibraryScanner.cpp
@@ -50,7 +50,7 @@ void handleError(Error Err, StringRef context = "") {
}
bool ObjectFileLoader::isArchitectureCompatible(const object::ObjectFile &Obj) {
- Triple HostTriple(sys::getDefaultTargetTriple());
+ Triple HostTriple(sys::getProcessTriple());
Triple ObjTriple = Obj.makeTriple();
LLVM_DEBUG({
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2987468..40e6400 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -50,6 +50,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
@@ -96,7 +97,6 @@
#include <cctype>
#include <cstdint>
#include <cstdlib>
-#include <deque>
#include <iterator>
#include <limits>
#include <optional>
@@ -105,7 +105,6 @@
#include <vector>
using namespace llvm;
-using namespace llvm::PatternMatch;
#define DEBUG_TYPE "aarch64-lower"
@@ -1175,6 +1174,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::VECTOR_DEINTERLEAVE);
+ setTargetDAGCombine(ISD::CTPOP);
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
@@ -11331,9 +11331,10 @@ SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
break;
}
+ // Note: This lowering only overrides NEON for v1i64 and v2i64, where we
+ // prefer using SVE if available.
if (VT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(
- VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) {
+ useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) {
switch (Opcode) {
default:
llvm_unreachable("Wrong instruction");
@@ -17555,6 +17556,7 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
// udot instruction.
if (SrcWidth * 4 <= DstWidth) {
if (all_of(I->users(), [&](auto *U) {
+ using namespace llvm::PatternMatch;
auto *SingleUser = cast<Instruction>(&*U);
if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))))
return true;
@@ -17826,6 +17828,7 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
// into shift / and masks. For the moment we do this just for uitofp (not
// zext) to avoid issues with widening instructions.
if (Shuffles.size() == 4 && all_of(Shuffles, [](ShuffleVectorInst *SI) {
+ using namespace llvm::PatternMatch;
return SI->hasOneUse() && match(SI->user_back(), m_UIToFP(m_Value())) &&
SI->getType()->getScalarSizeInBits() * 4 ==
SI->user_back()->getType()->getScalarSizeInBits();
@@ -17990,17 +17993,11 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store,
unsigned Factor,
const APInt &GapMask) const {
+ assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+ "Invalid interleave factor");
auto *SI = dyn_cast<StoreInst>(Store);
if (!SI)
return false;
-
- if (isProfitableToInterleaveWithGatherScatter() &&
- Factor > getMaxSupportedInterleaveFactor())
- return lowerInterleavedStoreWithShuffle(SI, SVI, Factor);
-
- assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
- "Invalid interleave factor");
-
assert(!LaneMask && GapMask.popcount() == Factor &&
"Unexpected mask on store");
@@ -18146,126 +18143,6 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store,
return true;
}
-/// If the interleaved vector elements are greater than supported MaxFactor,
-/// interleaving the data with additional shuffles can be used to
-/// achieve the same.
-///
-/// Consider the following data with 8 interleaves which are shuffled to store
-/// stN instructions. Data needs to be stored in this order:
-/// [v0, v1, v2, v3, v4, v5, v6, v7]
-///
-/// v0 v4 v2 v6 v1 v5 v3 v7
-/// | | | | | | | |
-/// \ / \ / \ / \ /
-/// [zip v0,v4] [zip v2,v6] [zip v1,v5] [zip v3,v7] ==> stN = 4
-/// | | | |
-/// \ / \ /
-/// \ / \ /
-/// \ / \ /
-/// [zip [v0,v2,v4,v6]] [zip [v1,v3,v5,v7]] ==> stN = 2
-///
-/// For stN = 4, upper half of interleaved data V0, V1, V2, V3 is stored
-/// with one st4 instruction. Lower half, i.e, V4, V5, V6, V7 is stored with
-/// another st4.
-///
-/// For stN = 2, upper half of interleaved data V0, V1 is stored
-/// with one st2 instruction. Second set V2, V3 is stored with another st2.
-/// Total of 4 st2's are required here.
-bool AArch64TargetLowering::lowerInterleavedStoreWithShuffle(
- StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const {
- unsigned MaxSupportedFactor = getMaxSupportedInterleaveFactor();
-
- auto *VecTy = cast<FixedVectorType>(SVI->getType());
- assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
-
- unsigned LaneLen = VecTy->getNumElements() / Factor;
- Type *EltTy = VecTy->getElementType();
- auto *SubVecTy = FixedVectorType::get(EltTy, Factor);
-
- const DataLayout &DL = SI->getModule()->getDataLayout();
- bool UseScalable;
-
- // Skip if we do not have NEON and skip illegal vector types. We can
- // "legalize" wide vector types into multiple interleaved accesses as long as
- // the vector types are divisible by 128.
- if (!Subtarget->hasNEON() ||
- !isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
- return false;
-
- if (UseScalable)
- return false;
-
- std::deque<Value *> Shuffles;
- Shuffles.push_back(SVI);
- unsigned ConcatLevel = Factor;
- // Getting all the interleaved operands.
- while (ConcatLevel > 1) {
- unsigned InterleavedOperands = Shuffles.size();
- for (unsigned i = 0; i < InterleavedOperands; i++) {
- ShuffleVectorInst *SFL = dyn_cast<ShuffleVectorInst>(Shuffles.front());
- if (!SFL)
- return false;
- Shuffles.pop_front();
-
- Value *Op0 = SFL->getOperand(0);
- Value *Op1 = SFL->getOperand(1);
-
- Shuffles.push_back(dyn_cast<Value>(Op0));
- Shuffles.push_back(dyn_cast<Value>(Op1));
- }
- ConcatLevel >>= 1;
- }
-
- IRBuilder<> Builder(SI);
- auto Mask = createInterleaveMask(LaneLen, 2);
- SmallVector<int, 16> UpperHalfMask(LaneLen), LowerHalfMask(LaneLen);
- for (unsigned i = 0; i < LaneLen; i++) {
- LowerHalfMask[i] = Mask[i];
- UpperHalfMask[i] = Mask[i + LaneLen];
- }
-
- unsigned InterleaveFactor = Factor >> 1;
- while (InterleaveFactor >= MaxSupportedFactor) {
- std::deque<Value *> ShufflesIntermediate;
- ShufflesIntermediate.resize(Factor);
- for (unsigned j = 0; j < Factor; j += (InterleaveFactor * 2)) {
- for (unsigned i = 0; i < InterleaveFactor; i++) {
- auto *Shuffle = Builder.CreateShuffleVector(
- Shuffles[i + j], Shuffles[i + j + InterleaveFactor], LowerHalfMask);
- ShufflesIntermediate[i + j] = Shuffle;
- Shuffle = Builder.CreateShuffleVector(
- Shuffles[i + j], Shuffles[i + j + InterleaveFactor], UpperHalfMask);
- ShufflesIntermediate[i + j + InterleaveFactor] = Shuffle;
- }
- }
- Shuffles = ShufflesIntermediate;
- InterleaveFactor >>= 1;
- }
-
- Type *PtrTy = SI->getPointerOperandType();
- auto *STVTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
-
- Value *BaseAddr = SI->getPointerOperand();
- Function *StNFunc = getStructuredStoreFunction(
- SI->getModule(), MaxSupportedFactor, UseScalable, STVTy, PtrTy);
- for (unsigned i = 0; i < (Factor / MaxSupportedFactor); i++) {
- SmallVector<Value *, 5> Ops;
- for (unsigned j = 0; j < MaxSupportedFactor; j++)
- Ops.push_back(Shuffles[i * MaxSupportedFactor + j]);
-
- if (i > 0) {
- // We will compute the pointer operand of each store from the original
- // base address using GEPs. Cast the base address to a pointer to the
- // scalar element type.
- BaseAddr = Builder.CreateConstGEP1_32(
- SubVecTy->getElementType(), BaseAddr, LaneLen * MaxSupportedFactor);
- }
- Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
- Builder.CreateCall(StNFunc, Ops);
- }
- return true;
-}
-
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
Instruction *Load, Value *Mask, IntrinsicInst *DI) const {
const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
@@ -27968,6 +27845,35 @@ static SDValue performRNDRCombine(SDNode *N, SelectionDAG &DAG) {
{A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL);
}
+static SDValue performCTPOPCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ using namespace llvm::SDPatternMatch;
+ if (!DCI.isBeforeLegalize())
+ return SDValue();
+
+ // ctpop(zext(bitcast(vector_mask))) -> neg(signed_reduce_add(vector_mask))
+ SDValue Mask;
+ if (!sd_match(N->getOperand(0), m_ZExt(m_BitCast(m_Value(Mask)))))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ EVT MaskVT = Mask.getValueType();
+
+ if (VT.isVector() || !MaskVT.isFixedLengthVector() ||
+ MaskVT.getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ EVT ReduceInVT =
+ EVT::getVectorVT(*DAG.getContext(), VT, MaskVT.getVectorElementCount());
+
+ SDLoc DL(N);
+ // Sign extend to best fit ZeroOrNegativeOneBooleanContent.
+ SDValue ExtMask = DAG.getNode(ISD::SIGN_EXTEND, DL, ReduceInVT, Mask);
+ SDValue NegPopCount = DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, ExtMask);
+ return DAG.getNegative(NegPopCount, DL, VT);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -28313,6 +28219,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performScalarToVectorCombine(N, DCI, DAG);
case ISD::SHL:
return performSHLCombine(N, DCI, DAG);
+ case ISD::CTPOP:
+ return performCTPOPCombine(N, DCI, DAG);
}
return SDValue();
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index bfd8474..70bfae7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -229,10 +229,6 @@ public:
bool hasPairedLoad(EVT LoadedType, Align &RequiredAlignment) const override;
- bool isProfitableToInterleaveWithGatherScatter() const override {
- return true;
- }
-
unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
bool lowerInterleavedLoad(Instruction *Load, Value *Mask,
@@ -243,9 +239,6 @@ public:
ShuffleVectorInst *SVI, unsigned Factor,
const APInt &GapMask) const override;
- bool lowerInterleavedStoreWithShuffle(StoreInst *SI, ShuffleVectorInst *SVI,
- unsigned Factor) const;
-
bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
IntrinsicInst *DI) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 8729ed3..197aae6 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4922,36 +4922,11 @@ InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost(
if (!VecTy->isScalableTy() && (UseMaskForCond || UseMaskForGaps))
return InstructionCost::getInvalid();
- unsigned NumLoadStores = 1;
- InstructionCost ShuffleCost = 0;
- bool isInterleaveWithShuffle = false;
- unsigned MaxSupportedFactor = TLI->getMaxSupportedInterleaveFactor();
-
- auto *SubVecTy =
- VectorType::get(VecVTy->getElementType(),
- VecVTy->getElementCount().divideCoefficientBy(Factor));
-
- if (TLI->isProfitableToInterleaveWithGatherScatter() &&
- Opcode == Instruction::Store && (0 == Factor % MaxSupportedFactor) &&
- Factor > MaxSupportedFactor) {
- isInterleaveWithShuffle = true;
- SmallVector<int, 16> Mask;
- // preparing interleave Mask.
- for (unsigned i = 0; i < VecVTy->getElementCount().getKnownMinValue() / 2;
- i++) {
- for (unsigned j = 0; j < 2; j++)
- Mask.push_back(j * Factor + i);
- }
-
- NumLoadStores = Factor / MaxSupportedFactor;
- ShuffleCost =
- (Factor * getShuffleCost(TargetTransformInfo::SK_Splice, VecVTy, VecVTy,
- Mask, CostKind, 0, SubVecTy));
- }
-
- if (!UseMaskForGaps &&
- (Factor <= MaxSupportedFactor || isInterleaveWithShuffle)) {
+ if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
+ auto *SubVecTy =
+ VectorType::get(VecVTy->getElementType(),
+ VecVTy->getElementCount().divideCoefficientBy(Factor));
// ldN/stN only support legal vector types of size 64 or 128 in bits.
// Accesses having vector types that are a multiple of 128 bits can be
@@ -4959,10 +4934,7 @@ InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost(
bool UseScalable;
if (MinElts % Factor == 0 &&
TLI->isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
- return (Factor *
- TLI->getNumInterleavedAccesses(SubVecTy, DL, UseScalable) *
- NumLoadStores) +
- ShuffleCost;
+ return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL, UseScalable);
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index e3b0a1b..e62fdb6 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -312,7 +312,7 @@ public:
}
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const {
- if (!ST->hasSVE())
+ if (!ST->isSVEorStreamingSVEAvailable())
return false;
// For fixed vectors, avoid scalarization if using SVE for them.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 4fe194c..54d94b1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2366,18 +2366,6 @@ def isGFX8GFX9NotGFX90A :
" Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>;
-// Pre-90A GFX9s allow the NV bit in FLAT instructions.
-def isNVAllowedInFlat :
- Predicate<"!Subtarget->hasGFX90AInsts() &&"
- " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
- AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureGFX90AInsts), (not FeatureGFX10Insts))>;
-
-// GFX8 or GFX90A+ do not allow the NV bit in FLAT instructions.
-def isNVNotAllowedInFlat :
- Predicate<"(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) ||"
- " ((Subtarget->getGeneration() == AMDGPUSubtarget::GFX9) && Subtarget->hasGFX90AInsts())">,
- AssemblerPredicate <(any_of FeatureVolcanicIslands, FeatureGFX90AInsts)>;
-
def isGFX90AOnly :
Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">,
AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 2808c44..09338c5 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1602,11 +1602,6 @@ public:
bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
- bool isFlatInstAndNVAllowed(const MCInst &Inst) const {
- uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
- return (TSFlags & SIInstrFlags::FLAT) && isGFX9() && !isGFX90A();
- }
-
AMDGPUTargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<AMDGPUTargetStreamer &>(TS);
@@ -5375,7 +5370,7 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
Error(S, "scale_offset is not supported on this GPU");
}
- if ((CPol & CPol::NV) && !isFlatInstAndNVAllowed(Inst)) {
+ if (CPol & CPol::NV) {
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
StringRef CStr(S.getPointer());
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
@@ -7150,13 +7145,6 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
unsigned Enabled = 0, Seen = 0;
for (;;) {
SMLoc S = getLoc();
-
- if (isGFX9() && trySkipId("nv")) {
- Enabled |= CPol::NV;
- Seen |= CPol::NV;
- continue;
- }
-
bool Disabling;
unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
if (!CPol)
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 6ef2241..8ea64d1 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -125,7 +125,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
bits<7> saddr;
bits<10> vdst;
- bits<6> cpol;
+ bits<5> cpol;
// Only valid on gfx9
bits<1> lds = ps.lds; // LDS DMA for global and scratch
@@ -2693,52 +2693,29 @@ class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
!subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
}
-class FLAT_Real_vi_ex_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
- FLAT_Real_vi <op, ps, has_sccb> {
- let AssemblerPredicate = isNVNotAllowedInFlat;
-}
-
-class FLAT_Real_gfx9 <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
- FLAT_Real_vi <op, ps, has_sccb> {
- let AssemblerPredicate = isNVAllowedInFlat;
- let Subtarget = SIEncodingFamily.GFX9;
- let DecoderNamespace = "GFX9";
- let Inst{55} = cpol{CPolBit.NV}; // nv - GFX9 (pre-90A) uses bit 55 as the non-volatile bit.
-}
-
-multiclass FLAT_Real_mc_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> {
- def _vi: FLAT_Real_vi_ex_gfx9<op, ps, has_sccb>;
- def _gfx9: FLAT_Real_gfx9<op, ps, has_sccb>;
-}
-
multiclass FLAT_Real_AllAddr_vi<bits<7> op,
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
- defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
- defm _SADDR : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
-}
-
-multiclass FLAT_Real_AllAddr_vi_ex_gfx9<bits<7> op,
- bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
- def _vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
- def _SADDR_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
+ def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
+ def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
}
class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
FLAT_Real <op, ps>,
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
let AssemblerPredicate = isGFX940Plus;
- let DecoderNamespace = "GFX940";
+ let DecoderNamespace = "GFX9";
let Inst{13} = ps.sve;
let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
}
multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
- let OtherPredicates = [isGFX8GFX9NotGFX940] in {
- defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME)>;
+ def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
+ let AssemblerPredicate = isGFX8GFX9NotGFX940;
+ let OtherPredicates = [isGFX8GFX9NotGFX940];
+ }
+ def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
+ let DecoderNamespace = "GFX9";
}
-
- defm _SADDR_vi : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
-
let AssemblerPredicate = isGFX940Plus in {
def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
@@ -2751,11 +2728,11 @@ multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
let OtherPredicates = [isGFX8GFX9NotGFX940] in {
- let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in {
- defm "" : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
+ def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
+ let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds";
}
- let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in {
- defm _SADDR : FLAT_Real_mc_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
+ def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
+ let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds";
}
}
@@ -2771,66 +2748,47 @@ multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
}
-defm FLAT_LOAD_UBYTE_vi : FLAT_Real_mc_vi <0x10, FLAT_LOAD_UBYTE>;
-defm FLAT_LOAD_SBYTE_vi : FLAT_Real_mc_vi <0x11, FLAT_LOAD_SBYTE>;
-defm FLAT_LOAD_USHORT_vi : FLAT_Real_mc_vi <0x12, FLAT_LOAD_USHORT>;
-defm FLAT_LOAD_SSHORT_vi : FLAT_Real_mc_vi <0x13, FLAT_LOAD_SSHORT>;
-defm FLAT_LOAD_DWORD_vi : FLAT_Real_mc_vi <0x14, FLAT_LOAD_DWORD>;
-defm FLAT_LOAD_DWORDX2_vi : FLAT_Real_mc_vi <0x15, FLAT_LOAD_DWORDX2>;
-defm FLAT_LOAD_DWORDX4_vi : FLAT_Real_mc_vi <0x17, FLAT_LOAD_DWORDX4>;
-defm FLAT_LOAD_DWORDX3_vi : FLAT_Real_mc_vi <0x16, FLAT_LOAD_DWORDX3>;
-
-defm FLAT_STORE_BYTE_vi : FLAT_Real_mc_vi <0x18, FLAT_STORE_BYTE>;
-defm FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_mc_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
-defm FLAT_STORE_SHORT_vi : FLAT_Real_mc_vi <0x1a, FLAT_STORE_SHORT>;
-defm FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
-defm FLAT_STORE_DWORD_vi : FLAT_Real_mc_vi <0x1c, FLAT_STORE_DWORD>;
-defm FLAT_STORE_DWORDX2_vi : FLAT_Real_mc_vi <0x1d, FLAT_STORE_DWORDX2>;
-defm FLAT_STORE_DWORDX4_vi : FLAT_Real_mc_vi <0x1f, FLAT_STORE_DWORDX4>;
-defm FLAT_STORE_DWORDX3_vi : FLAT_Real_mc_vi <0x1e, FLAT_STORE_DWORDX3>;
-
-defm FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_mc_vi <0x20, FLAT_LOAD_UBYTE_D16>;
-defm FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
-defm FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_mc_vi <0x22, FLAT_LOAD_SBYTE_D16>;
-defm FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_mc_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
-defm FLAT_LOAD_SHORT_D16_vi : FLAT_Real_mc_vi <0x24, FLAT_LOAD_SHORT_D16>;
-defm FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_mc_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
+def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
+def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
+def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
+def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
+def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
+def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
+def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
+def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
+
+def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
+def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
+def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
+def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
+def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
+def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
+def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
+def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
+
+def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
+def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
+def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
+def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
+def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
+def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
multiclass FLAT_Real_Atomics_vi <bits<7> op,
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
defvar ps = !cast<FLAT_Pseudo>(NAME);
- defm "" : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
- defm _RTN : FLAT_Real_mc_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
- def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN_agpr"), has_sccb>;
-}
-
-multiclass FLAT_Real_Atomics_vi_ex_gfx9 <bits<7> op,
- bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
- defvar ps = !cast<FLAT_Pseudo>(NAME);
- def _vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
- def _RTN_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
-
- def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN_agpr"), has_sccb>;
+ def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
+ def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
+ def _RTN_agpr_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN_agpr"), has_sccb>;
}
multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
FLAT_Real_AllAddr_vi<op, has_sccb> {
- defm _RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
- defm _SADDR_RTN : FLAT_Real_mc_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
-
- def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_RTN_agpr"), has_sccb>;
- def _SADDR_RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN_agpr"), has_sccb>;
-}
-
-multiclass FLAT_Global_Real_Atomics_vi_ex_gfx9<bits<7> op,
- bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
- FLAT_Real_AllAddr_vi_ex_gfx9<op, has_sccb> {
- def _RTN_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
- def _SADDR_RTN_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
+ def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
+ def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
- def _RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_RTN_agpr"), has_sccb>;
- def _SADDR_RTN_agpr_vi : FLAT_Real_vi_ex_gfx9 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN_agpr"), has_sccb>;
+ def _RTN_agpr_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN_agpr"), has_sccb>;
+ def _SADDR_RTN_agpr_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN_agpr"), has_sccb>;
}
defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40>;
@@ -2992,10 +2950,10 @@ let AssemblerPredicate = isGFX940Plus in {
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>;
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>;
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>;
- defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi_ex_gfx9<0x4d>;
- defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi_ex_gfx9<0x4e>;
- defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi_ex_gfx9<0x52>;
- defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi_ex_gfx9<0x52>;
+ defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>;
+ defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>;
+ defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>;
+ defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
} // End AssemblerPredicate = isGFX940Plus
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 3e6f35d..703ec0a 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -186,12 +186,8 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
O << " dlc";
if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
- if (Imm & ~CPol::ALL_pregfx12) {
- if ((Imm & CPol::NV) && AMDGPU::isGFX9(STI) && !AMDGPU::isGFX90A(STI))
- O << " nv";
- else
- O << " /* unexpected cache policy bit */";
- }
+ if (Imm & ~CPol::ALL_pregfx12)
+ O << " /* unexpected cache policy bit */";
}
void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index c89212d..90a4723 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -756,6 +756,155 @@ LoongArchInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
return ArrayRef(TargetFlags);
}
+bool LoongArchInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
+ Register Reg,
+ const MachineInstr &AddrI,
+ ExtAddrMode &AM) const {
+ enum MemIOffsetType {
+ Imm14Shift2,
+ Imm12,
+ Imm11Shift1,
+ Imm10Shift2,
+ Imm9Shift3,
+ Imm8,
+ Imm8Shift1,
+ Imm8Shift2,
+ Imm8Shift3
+ };
+
+ MemIOffsetType OT;
+ switch (MemI.getOpcode()) {
+ default:
+ return false;
+ case LoongArch::LDPTR_W:
+ case LoongArch::LDPTR_D:
+ case LoongArch::STPTR_W:
+ case LoongArch::STPTR_D:
+ OT = Imm14Shift2;
+ break;
+ case LoongArch::LD_B:
+ case LoongArch::LD_H:
+ case LoongArch::LD_W:
+ case LoongArch::LD_D:
+ case LoongArch::LD_BU:
+ case LoongArch::LD_HU:
+ case LoongArch::LD_WU:
+ case LoongArch::ST_B:
+ case LoongArch::ST_H:
+ case LoongArch::ST_W:
+ case LoongArch::ST_D:
+ case LoongArch::FLD_S:
+ case LoongArch::FLD_D:
+ case LoongArch::FST_S:
+ case LoongArch::FST_D:
+ case LoongArch::VLD:
+ case LoongArch::VST:
+ case LoongArch::XVLD:
+ case LoongArch::XVST:
+ case LoongArch::VLDREPL_B:
+ case LoongArch::XVLDREPL_B:
+ OT = Imm12;
+ break;
+ case LoongArch::VLDREPL_H:
+ case LoongArch::XVLDREPL_H:
+ OT = Imm11Shift1;
+ break;
+ case LoongArch::VLDREPL_W:
+ case LoongArch::XVLDREPL_W:
+ OT = Imm10Shift2;
+ break;
+ case LoongArch::VLDREPL_D:
+ case LoongArch::XVLDREPL_D:
+ OT = Imm9Shift3;
+ break;
+ case LoongArch::VSTELM_B:
+ case LoongArch::XVSTELM_B:
+ OT = Imm8;
+ break;
+ case LoongArch::VSTELM_H:
+ case LoongArch::XVSTELM_H:
+ OT = Imm8Shift1;
+ break;
+ case LoongArch::VSTELM_W:
+ case LoongArch::XVSTELM_W:
+ OT = Imm8Shift2;
+ break;
+ case LoongArch::VSTELM_D:
+ case LoongArch::XVSTELM_D:
+ OT = Imm8Shift3;
+ break;
+ }
+
+ if (MemI.getOperand(0).getReg() == Reg)
+ return false;
+
+ if ((AddrI.getOpcode() != LoongArch::ADDI_W &&
+ AddrI.getOpcode() != LoongArch::ADDI_D) ||
+ !AddrI.getOperand(1).isReg() || !AddrI.getOperand(2).isImm())
+ return false;
+
+ int64_t OldOffset = MemI.getOperand(2).getImm();
+ int64_t Disp = AddrI.getOperand(2).getImm();
+ int64_t NewOffset = OldOffset + Disp;
+ if (!STI.is64Bit())
+ NewOffset = SignExtend64<32>(NewOffset);
+
+ if (!(OT == Imm14Shift2 && isShiftedInt<14, 2>(NewOffset) && STI.hasUAL()) &&
+ !(OT == Imm12 && isInt<12>(NewOffset)) &&
+ !(OT == Imm11Shift1 && isShiftedInt<11, 1>(NewOffset)) &&
+ !(OT == Imm10Shift2 && isShiftedInt<10, 2>(NewOffset)) &&
+ !(OT == Imm9Shift3 && isShiftedInt<9, 3>(NewOffset)) &&
+ !(OT == Imm8 && isInt<8>(NewOffset)) &&
+ !(OT == Imm8Shift1 && isShiftedInt<8, 1>(NewOffset)) &&
+ !(OT == Imm8Shift2 && isShiftedInt<8, 2>(NewOffset)) &&
+ !(OT == Imm8Shift3 && isShiftedInt<8, 3>(NewOffset)))
+ return false;
+
+ AM.BaseReg = AddrI.getOperand(1).getReg();
+ AM.ScaledReg = 0;
+ AM.Scale = 0;
+ AM.Displacement = NewOffset;
+ AM.Form = ExtAddrMode::Formula::Basic;
+ return true;
+}
+
+MachineInstr *
+LoongArchInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
+ const ExtAddrMode &AM) const {
+ const DebugLoc &DL = MemI.getDebugLoc();
+ MachineBasicBlock &MBB = *MemI.getParent();
+
+ assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
+ "Addressing mode not supported for folding");
+
+ unsigned MemIOp = MemI.getOpcode();
+ switch (MemIOp) {
+ default:
+ return BuildMI(MBB, MemI, DL, get(MemIOp))
+ .addReg(MemI.getOperand(0).getReg(),
+ MemI.mayLoad() ? RegState::Define : 0)
+ .addReg(AM.BaseReg)
+ .addImm(AM.Displacement)
+ .setMemRefs(MemI.memoperands())
+ .setMIFlags(MemI.getFlags());
+ case LoongArch::VSTELM_B:
+ case LoongArch::VSTELM_H:
+ case LoongArch::VSTELM_W:
+ case LoongArch::VSTELM_D:
+ case LoongArch::XVSTELM_B:
+ case LoongArch::XVSTELM_H:
+ case LoongArch::XVSTELM_W:
+ case LoongArch::XVSTELM_D:
+ return BuildMI(MBB, MemI, DL, get(MemIOp))
+ .addReg(MemI.getOperand(0).getReg(), 0)
+ .addReg(AM.BaseReg)
+ .addImm(AM.Displacement)
+ .addImm(MemI.getOperand(3).getImm())
+ .setMemRefs(MemI.memoperands())
+ .setMIFlags(MemI.getFlags());
+ }
+}
+
// Returns true if this is the sext.w pattern, addi.w rd, rs, 0.
bool LoongArch::isSEXT_W(const MachineInstr &MI) {
return MI.getOpcode() == LoongArch::ADDI_W && MI.getOperand(1).isReg() &&
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
index f25958a..f69a558 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
@@ -93,6 +93,12 @@ public:
ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const override;
+ bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
+ const MachineInstr &AddrI,
+ ExtAddrMode &AM) const override;
+ MachineInstr *emitLdStWithAddr(MachineInstr &MemI,
+ const ExtAddrMode &AM) const override;
+
protected:
const LoongArchSubtarget &STI;
};
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index 9de4c9d..92a9388 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -62,6 +62,11 @@ static cl::opt<bool>
cl::desc("Enable the merge base offset pass"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ EnableSinkFold("loongarch-enable-sink-fold",
+ cl::desc("Enable sinking and folding of instruction copies"),
+ cl::init(true), cl::Hidden);
+
static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
return RM.value_or(Reloc::Static);
}
@@ -146,7 +151,9 @@ namespace {
class LoongArchPassConfig : public TargetPassConfig {
public:
LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+ setEnableSinkAndFold(EnableSinkFold);
+ }
LoongArchTargetMachine &getLoongArchTargetMachine() const {
return getTM<LoongArchTargetMachine>();
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c3f100e..995ae75 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16496,32 +16496,42 @@ static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
}
static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
- unsigned ShY) {
+ unsigned ShY, bool AddX) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue X = N->getOperand(0);
SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
DAG.getTargetConstant(ShY, DL, VT), X);
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
- DAG.getTargetConstant(ShX, DL, VT), Mul359);
+ DAG.getTargetConstant(ShX, DL, VT), AddX ? X : Mul359);
}
static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
uint64_t MulAmt) {
+ // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
switch (MulAmt) {
case 5 * 3:
- return getShlAddShlAdd(N, DAG, 2, 1);
+ return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false);
case 9 * 3:
- return getShlAddShlAdd(N, DAG, 3, 1);
+ return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false);
case 5 * 5:
- return getShlAddShlAdd(N, DAG, 2, 2);
+ return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false);
case 9 * 5:
- return getShlAddShlAdd(N, DAG, 3, 2);
+ return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false);
case 9 * 9:
- return getShlAddShlAdd(N, DAG, 3, 3);
+ return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false);
default:
- return SDValue();
+ break;
}
+
+ // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
+ int ShX;
+ if (int ShY = isShifted359(MulAmt - 1, ShX)) {
+ assert(ShX != 0 && "MulAmt=4,6,10 handled before");
+ if (ShX <= 3)
+ return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true);
+ }
+ return SDValue();
}
// Try to expand a scalar multiply to a faster sequence.
@@ -16581,41 +16591,30 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(Shift, DL, VT));
}
- // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
- if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt))
- return V;
+ // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
+ // of 25 which happen to be quite common.
+ // (2/4/8 * 3/5/9 + 1) * 2^N
+ Shift = llvm::countr_zero(MulAmt);
+ if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) {
+ if (Shift == 0)
+ return V;
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT));
+ }
// If this is a power 2 + 2/4/8, we can use a shift followed by a single
// shXadd. First check if this a sum of two power of 2s because that's
// easy. Then count how many zeros are up to the first bit.
- if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
- unsigned ScaleShift = llvm::countr_zero(MulAmt);
- if (ScaleShift >= 1 && ScaleShift < 4) {
- unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
- SDLoc DL(N);
- SDValue Shift1 =
- DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
- return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getTargetConstant(ScaleShift, DL, VT), Shift1);
- }
+ if (Shift >= 1 && Shift <= 3 && isPowerOf2_64(MulAmt & (MulAmt - 1))) {
+ unsigned ShiftAmt = llvm::countr_zero((MulAmt & (MulAmt - 1)));
+ SDLoc DL(N);
+ SDValue Shift1 =
+ DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
+ return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
+ DAG.getTargetConstant(Shift, DL, VT), Shift1);
}
- // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
- // This is the two instruction form, there are also three instruction
- // variants we could implement. e.g.
- // (2^(1,2,3) * 3,5,9 + 1) << C2
- // 2^(C1>3) * 3,5,9 +/- 1
- if (int ShXAmount = isShifted359(MulAmt - 1, Shift)) {
- assert(Shift != 0 && "MulAmt=4,6,10 handled before");
- if (Shift <= 3) {
- SDLoc DL(N);
- SDValue Mul359 =
- DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getTargetConstant(ShXAmount, DL, VT), X);
- return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
- DAG.getTargetConstant(Shift, DL, VT), X);
- }
- }
+ // TODO: 2^(C1>3) * 3,5,9 +/- 1
// 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
@@ -16647,14 +16646,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
}
}
-
- // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
- // of 25 which happen to be quite common.
- Shift = llvm::countr_zero(MulAmt);
- if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) {
- SDLoc DL(N);
- return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT));
- }
}
if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 636e31c..bf9de0a 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1583,7 +1583,10 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
if (!TII->isAddImmediate(*DeadMI, Reg))
continue;
LIS->RemoveMachineInstrFromMaps(*DeadMI);
+ Register AddReg = DeadMI->getOperand(1).getReg();
DeadMI->eraseFromParent();
+ if (AddReg.isVirtual())
+ LIS->shrinkToUses(&LIS->getInterval(AddReg));
}
}
}
@@ -1869,11 +1872,15 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
// Loop over the dead AVL values, and delete them now. This has
// to be outside the above loop to avoid invalidating iterators.
for (auto *MI : ToDelete) {
+ assert(MI->getOpcode() == RISCV::ADDI);
+ Register AddReg = MI->getOperand(1).getReg();
if (LIS) {
LIS->removeInterval(MI->getOperand(0).getReg());
LIS->RemoveMachineInstrFromMaps(*MI);
}
MI->eraseFromParent();
+ if (LIS && AddReg.isVirtual())
+ LIS->shrinkToUses(&LIS->getInterval(AddReg));
}
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 56a38bb..b2cbdb2 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -2390,6 +2390,15 @@ static bool generateBindlessImageINTELInst(const SPIRV::IncomingCall *Call,
return buildBindlessImageINTELInst(Call, Opcode, MIRBuilder, GR);
}
+static bool generateBlockingPipesInst(const SPIRV::IncomingCall *Call,
+ MachineIRBuilder &MIRBuilder,
+ SPIRVGlobalRegistry *GR) {
+ const SPIRV::DemangledBuiltin *Builtin = Call->Builtin;
+ unsigned Opcode =
+ SPIRV::lookupNativeBuiltin(Builtin->Name, Builtin->Set)->Opcode;
+ return buildOpFromWrapper(MIRBuilder, Opcode, Call, Register(0));
+}
+
static bool
generateTernaryBitwiseFunctionINTELInst(const SPIRV::IncomingCall *Call,
MachineIRBuilder &MIRBuilder,
@@ -3050,6 +3059,8 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall,
return generatePipeInst(Call.get(), MIRBuilder, GR);
case SPIRV::PredicatedLoadStore:
return generatePredicatedLoadStoreInst(Call.get(), MIRBuilder, GR);
+ case SPIRV::BlockingPipes:
+ return generateBlockingPipesInst(Call.get(), MIRBuilder, GR);
}
return false;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index c259cce..492a98e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -71,6 +71,7 @@ def TernaryBitwiseINTEL : BuiltinGroup;
def Block2DLoadStore : BuiltinGroup;
def Pipe : BuiltinGroup;
def PredicatedLoadStore : BuiltinGroup;
+def BlockingPipes : BuiltinGroup;
//===----------------------------------------------------------------------===//
// Class defining a demangled builtin record. The information in the record
@@ -1174,6 +1175,10 @@ defm : DemangledNativeBuiltin<"clock_read_sub_group", OpenCL_std, KernelClock, 0
defm : DemangledNativeBuiltin<"clock_read_hilo_device", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
defm : DemangledNativeBuiltin<"clock_read_hilo_work_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
defm : DemangledNativeBuiltin<"clock_read_hilo_sub_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
+
+//SPV_ALTERA_blocking_pipes
+defm : DemangledNativeBuiltin<"__spirv_WritePipeBlockingINTEL", OpenCL_std, BlockingPipes, 0, 0, OpWritePipeBlockingALTERA>;
+defm : DemangledNativeBuiltin<"__spirv_ReadPipeBlockingINTEL", OpenCL_std, BlockingPipes, 0, 0, OpReadPipeBlockingALTERA>;
defm : DemangledNativeBuiltin<"__spirv_ReadClockKHR", OpenCL_std, KernelClock, 1, 1, OpReadClockKHR>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index 43b2869..f681b0d 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -159,7 +159,9 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>>
{"SPV_KHR_maximal_reconvergence",
SPIRV::Extension::Extension::SPV_KHR_maximal_reconvergence},
{"SPV_INTEL_kernel_attributes",
- SPIRV::Extension::Extension::SPV_INTEL_kernel_attributes}};
+ SPIRV::Extension::Extension::SPV_INTEL_kernel_attributes},
+ {"SPV_ALTERA_blocking_pipes",
+ SPIRV::Extension::Extension::SPV_ALTERA_blocking_pipes}};
bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName,
StringRef ArgValue,
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index a61351e..03bd61b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -993,3 +993,9 @@ def OpPredicatedLoadINTEL: Op<6528, (outs ID:$res), (ins TYPE:$resType, ID:$ptr,
"$res = OpPredicatedLoadINTEL $resType $ptr $predicate $default_value">;
def OpPredicatedStoreINTEL: Op<6529, (outs), (ins ID:$ptr, ID:$object, ID:$predicate, variable_ops),
"OpPredicatedStoreINTEL $ptr $object $predicate">;
+
+//SPV_ALTERA_blocking_pipes
+def OpReadPipeBlockingALTERA :Op<5946, (outs), (ins ID:$pipe, ID:$pointer, ID:$packetSize, ID:$packetAlignment),
+ "OpReadPipeBlockingALTERA $pipe $pointer $packetSize $packetAlignment">;
+def OpWritePipeBlockingALTERA :Op<5946, (outs), (ins ID:$pipe, ID:$pointer, ID:$packetSize, ID:$packetAlignment),
+ "OpWritePipeBlockingALTERA $pipe $pointer $packetSize $packetAlignment">;
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index e5ac76c4..af76016 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1885,6 +1885,13 @@ void addInstrRequirements(const MachineInstr &MI,
Reqs.addCapability(
SPIRV::Capability::CooperativeMatrixCheckedInstructionsINTEL);
break;
+ case SPIRV::OpReadPipeBlockingALTERA:
+ case SPIRV::OpWritePipeBlockingALTERA:
+ if (ST.canUseExtension(SPIRV::Extension::SPV_ALTERA_blocking_pipes)) {
+ Reqs.addExtension(SPIRV::Extension::SPV_ALTERA_blocking_pipes);
+ Reqs.addCapability(SPIRV::Capability::BlockingPipesALTERA);
+ }
+ break;
case SPIRV::OpCooperativeMatrixGetElementCoordINTEL:
if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_joint_matrix))
report_fatal_error("OpCooperativeMatrixGetElementCoordINTEL requires the "
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
index 4e4e6fb..be88f33 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
@@ -56,6 +56,13 @@ public:
}
};
+static cl::list<std::string> SPVAllowUnknownIntrinsics(
+ "spv-allow-unknown-intrinsics", cl::CommaSeparated,
+ cl::desc("Emit unknown intrinsics as calls to external functions. A "
+ "comma-separated input list of intrinsic prefixes must be "
+ "provided, and only intrinsics carrying a listed prefix get "
+ "emitted as described."),
+ cl::value_desc("intrinsic_prefix_0,intrinsic_prefix_1"), cl::ValueOptional);
} // namespace
char SPIRVPrepareFunctions::ID = 0;
@@ -445,6 +452,15 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) {
EraseFromParent);
Changed = true;
break;
+ default:
+ if (TM.getTargetTriple().getVendor() == Triple::AMD ||
+ any_of(SPVAllowUnknownIntrinsics, [II](auto &&Prefix) {
+ if (Prefix.empty())
+ return false;
+ return II->getCalledFunction()->getName().starts_with(Prefix);
+ }))
+ Changed |= lowerIntrinsicToFunction(II);
+ break;
}
}
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 1b4b29b..65a8885 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -309,7 +309,7 @@ defm SPV_KHR_shader_clock : ExtensionOperand<54, [EnvVulkan, EnvOpenCL]>;
defm SPV_INTEL_unstructured_loop_controls : ExtensionOperand<55, [EnvOpenCL]>;
defm SPV_EXT_demote_to_helper_invocation : ExtensionOperand<56, [EnvVulkan]>;
defm SPV_INTEL_fpga_reg : ExtensionOperand<57, [EnvOpenCL]>;
-defm SPV_INTEL_blocking_pipes : ExtensionOperand<58, [EnvOpenCL]>;
+defm SPV_ALTERA_blocking_pipes : ExtensionOperand<58, [EnvOpenCL]>;
defm SPV_GOOGLE_user_type : ExtensionOperand<59, [EnvVulkan]>;
defm SPV_KHR_physical_storage_buffer : ExtensionOperand<60, [EnvVulkan]>;
defm SPV_INTEL_kernel_attributes : ExtensionOperand<61, [EnvOpenCL]>;
@@ -611,6 +611,7 @@ defm TensorFloat32RoundingINTEL : CapabilityOperand<6425, 0, 0, [SPV_INTEL_tenso
defm BFloat16TypeKHR : CapabilityOperand<5116, 0, 0, [SPV_KHR_bfloat16], []>;
defm BFloat16DotProductKHR : CapabilityOperand<5117, 0, 0, [SPV_KHR_bfloat16], [BFloat16TypeKHR]>;
defm BFloat16CooperativeMatrixKHR : CapabilityOperand<5118, 0, 0, [SPV_KHR_bfloat16], [BFloat16TypeKHR, CooperativeMatrixKHR]>;
+defm BlockingPipesALTERA : CapabilityOperand<5945, 0, 0, [SPV_ALTERA_blocking_pipes], []>;
//===----------------------------------------------------------------------===//
// Multiclass used to define SourceLanguage enum values and at the same time
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index af32298..fc6c290 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -216,7 +216,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
// into conversion ops
setTargetDAGCombine({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
- ISD::FP_ROUND, ISD::CONCAT_VECTORS});
+ ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_ROUND,
+ ISD::CONCAT_VECTORS});
setTargetDAGCombine(ISD::TRUNCATE);
@@ -3580,6 +3581,64 @@ static SDValue performMulCombine(SDNode *N,
}
}
+SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
+ SelectionDAG &DAG) {
+ SDLoc DL(In);
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT InVT = In.getValueType();
+ unsigned NumElems = InVT.getVectorNumElements() * 2;
+ EVT OutVT = EVT::getVectorVT(Ctx, InVT.getVectorElementType(), NumElems);
+ SDValue Concat =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, In, DAG.getPOISON(InVT));
+ if (NumElems < RequiredNumElems) {
+ return DoubleVectorWidth(Concat, RequiredNumElems, DAG);
+ }
+ return Concat;
+}
+
+SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT OutVT = N->getValueType(0);
+ if (!OutVT.isVector())
+ return SDValue();
+
+ EVT OutElTy = OutVT.getVectorElementType();
+ if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
+ return SDValue();
+
+ unsigned NumElems = OutVT.getVectorNumElements();
+ if (!isPowerOf2_32(NumElems))
+ return SDValue();
+
+ EVT FPVT = N->getOperand(0)->getValueType(0);
+ if (FPVT.getVectorElementType() != MVT::f32)
+ return SDValue();
+
+ SDLoc DL(N);
+
+ // First, convert to i32.
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT IntVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
+ SDValue ToInt = DAG.getNode(N->getOpcode(), DL, IntVT, N->getOperand(0));
+ APInt Mask = APInt::getLowBitsSet(IntVT.getScalarSizeInBits(),
+ OutVT.getScalarSizeInBits());
+ // Mask out the top MSBs.
+ SDValue Masked =
+ DAG.getNode(ISD::AND, DL, IntVT, ToInt, DAG.getConstant(Mask, DL, IntVT));
+
+ if (OutVT.getSizeInBits() < 128) {
+ // Create a wide enough vector that we can use narrow.
+ EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
+ unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
+ SDValue WideVector = DoubleVectorWidth(Masked, NumRequiredElems, DAG);
+ SDValue Trunc = truncateVectorWithNARROW(NarrowedVT, WideVector, DL, DAG);
+ return DAG.getBitcast(
+ OutVT, extractSubVector(Trunc, 0, DAG, DL, OutVT.getSizeInBits()));
+ } else {
+ return truncateVectorWithNARROW(OutVT, Masked, DL, DAG);
+ }
+ return SDValue();
+}
+
SDValue
WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
@@ -3606,6 +3665,9 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FP_ROUND:
case ISD::CONCAT_VECTORS:
return performVectorTruncZeroCombine(N, DCI);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return performConvertFPCombine(N, DCI.DAG);
case ISD::TRUNCATE:
return performTruncateCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4d44227b3..168e041 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53442,7 +53442,8 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
}
SDValue NewStore =
- DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(),
+ DAG.getStore(St->getChain(), DL, Res, NewPtr,
+ MachinePointerInfo(St->getPointerInfo().getAddrSpace()),
Align(), St->getMemOperand()->getFlags());
// If there are other uses of StoredVal, replace with a new load of the
@@ -54639,7 +54640,8 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
SDValue NewPtr = DAG.getMemBasePlusOffset(
Ld->getBasePtr(), PtrByteOfs, DL, SDNodeFlags::NoUnsignedWrap);
SDValue NewLoad =
- DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getPointerInfo(),
+ DAG.getLoad(VT, DL, Ld->getChain(), NewPtr,
+ MachinePointerInfo(Ld->getPointerInfo().getAddrSpace()),
Align(), Ld->getMemOperand()->getFlags());
DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
return NewLoad;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e5c3f17..906fa2f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7550,13 +7550,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
}
if (LoadInst *Load = dyn_cast<LoadInst>(I))
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
- Load->getAlign(), VPIRMetadata(*Load, LVer),
- I->getDebugLoc());
+ VPIRMetadata(*Load, LVer), I->getDebugLoc());
StoreInst *Store = cast<StoreInst>(I);
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
- Reverse, Store->getAlign(),
- VPIRMetadata(*Store, LVer), I->getDebugLoc());
+ Reverse, VPIRMetadata(*Store, LVer),
+ I->getDebugLoc());
}
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 22ea083..3062e1c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1163,10 +1163,10 @@ public:
bool opcodeMayReadOrWriteFromMemory() const;
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override;
+ bool usesFirstLaneOnly(const VPValue *Op) const override;
/// Returns true if the recipe only uses the first part of operand \p Op.
- bool onlyFirstPartUsed(const VPValue *Op) const override;
+ bool usesFirstPartOnly(const VPValue *Op) const override;
/// Returns true if this VPInstruction produces a scalar value from a vector,
/// e.g. by performing a reduction or extracting a lane.
@@ -1393,13 +1393,13 @@ public:
return true;
}
- bool onlyFirstPartUsed(const VPValue *Op) const override {
+ bool usesFirstPartOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
}
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
@@ -1628,7 +1628,7 @@ public:
VPSlotTracker &SlotTracker) const override;
#endif
- bool onlyFirstLaneUsed(const VPValue *Op) const override;
+ bool usesFirstLaneOnly(const VPValue *Op) const override;
};
/// A recipe for widening Call instructions using library calls.
@@ -1767,7 +1767,7 @@ struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags,
}
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return Op == getCond() && isInvariantCond();
@@ -1833,7 +1833,7 @@ public:
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
if (Op == getOperand(0))
@@ -1870,7 +1870,7 @@ public:
void execute(VPTransformState &State) override;
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
@@ -1884,7 +1884,7 @@ public:
}
/// Returns true if the recipe only uses the first part of operand \p Op.
- bool onlyFirstPartUsed(const VPValue *Op) const override {
+ bool usesFirstPartOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
assert(getNumOperands() <= 2 && "must have at most two operands");
@@ -1922,14 +1922,14 @@ public:
Type *getSourceElementType() const { return SourceElementTy; }
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
}
/// Returns true if the recipe only uses the first part of operand \p Op.
- bool onlyFirstPartUsed(const VPValue *Op) const override {
+ bool usesFirstPartOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
assert(getNumOperands() <= 2 && "must have at most two operands");
@@ -2110,7 +2110,7 @@ public:
}
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// The recipe creates its own wide start value, so it only requests the
@@ -2325,7 +2325,7 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return Op == getStartValue();
@@ -2399,7 +2399,7 @@ public:
bool isInLoop() const { return IsInLoop; }
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return isOrdered() || isInLoop();
@@ -2468,13 +2468,13 @@ public:
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// Recursing through Blend recipes only, must terminate at header phi's the
// latest.
return all_of(users(),
- [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
+ [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
}
};
@@ -2562,7 +2562,7 @@ public:
VPCostContext &Ctx) const override;
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override = 0;
+ bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
/// Returns the number of stored operands of this interleave group. Returns 0
/// for load interleave groups.
@@ -2608,7 +2608,7 @@ public:
VPSlotTracker &SlotTracker) const override;
#endif
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
@@ -2656,7 +2656,7 @@ public:
#endif
/// The recipe only uses the first lane of the address, and EVL operand.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
@@ -2862,7 +2862,7 @@ public:
VPValue *getEVL() const { return getOperand(2); }
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return Op == getEVL();
@@ -2924,7 +2924,7 @@ public:
bool isPredicated() const { return IsPredicated; }
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return isSingleScalar();
@@ -3206,14 +3206,14 @@ protected:
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
std::initializer_list<VPValue *> Operands,
- bool Consecutive, bool Reverse, Align Alignment,
+ bool Consecutive, bool Reverse,
const VPIRMetadata &Metadata, DebugLoc DL)
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
- Alignment(Alignment), Consecutive(Consecutive), Reverse(Reverse) {
+ Alignment(getLoadStoreAlignment(&I)), Consecutive(Consecutive),
+ Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
- assert(isa<VPVectorEndPointerRecipe>(getAddr()) ||
- !Reverse &&
- "Reversed acccess without VPVectorEndPointerRecipe address?");
+ assert((isa<VPVectorEndPointerRecipe>(getAddr()) || !Reverse) &&
+ "Reversed acccess without VPVectorEndPointerRecipe address?");
}
public:
@@ -3273,18 +3273,18 @@ public:
struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe,
public VPValue {
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
- bool Consecutive, bool Reverse, Align Alignment,
+ bool Consecutive, bool Reverse,
const VPIRMetadata &Metadata, DebugLoc DL)
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
- Reverse, Alignment, Metadata, DL),
+ Reverse, Metadata, DL),
VPValue(this, &Load) {
setMask(Mask);
}
VPWidenLoadRecipe *clone() override {
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
- getMask(), Consecutive, Reverse, getAlign(),
- *this, getDebugLoc());
+ getMask(), Consecutive, Reverse, *this,
+ getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
@@ -3299,7 +3299,7 @@ struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe,
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// Widened, consecutive loads operations only demand the first lane of
@@ -3315,8 +3315,8 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL,
VPValue *Mask)
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
- {Addr, &EVL}, L.isConsecutive(), L.isReverse(),
- L.getAlign(), L, L.getDebugLoc()),
+ {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
+ L.getDebugLoc()),
VPValue(this, &getIngredient()) {
setMask(Mask);
}
@@ -3340,7 +3340,7 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// Widened loads only demand the first lane of EVL and consecutive loads
@@ -3354,16 +3354,16 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
VPValue *Mask, bool Consecutive, bool Reverse,
- Align Alignment, const VPIRMetadata &Metadata, DebugLoc DL)
+ const VPIRMetadata &Metadata, DebugLoc DL)
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
- Consecutive, Reverse, Alignment, Metadata, DL) {
+ Consecutive, Reverse, Metadata, DL) {
setMask(Mask);
}
VPWidenStoreRecipe *clone() override {
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
getStoredValue(), getMask(), Consecutive,
- Reverse, getAlign(), *this, getDebugLoc());
+ Reverse, *this, getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
@@ -3381,7 +3381,7 @@ struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// Widened, consecutive stores only demand the first lane of their address,
@@ -3398,7 +3398,7 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
VPValue *Mask)
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
{Addr, S.getStoredValue(), &EVL}, S.isConsecutive(),
- S.isReverse(), S.getAlign(), S, S.getDebugLoc()) {
+ S.isReverse(), S, S.getDebugLoc()) {
setMask(Mask);
}
@@ -3424,7 +3424,7 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
if (Op == getEVL()) {
@@ -3508,14 +3508,14 @@ public:
}
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
}
/// Returns true if the recipe only uses the first part of operand \p Op.
- bool onlyFirstPartUsed(const VPValue *Op) const override {
+ bool usesFirstPartOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
@@ -3590,7 +3590,7 @@ public:
}
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
@@ -3700,7 +3700,7 @@ public:
VPValue *getStepValue() const { return getOperand(2); }
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
@@ -3765,7 +3765,7 @@ public:
VPValue *getStepValue() const { return getOperand(1); }
/// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ bool usesFirstLaneOnly(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f792d0a..80cd112 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1276,7 +1276,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
}
}
-bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
+bool VPInstruction::usesFirstLaneOnly(const VPValue *Op) const {
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode()))
return vputils::onlyFirstLaneUsed(this);
@@ -1325,7 +1325,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
llvm_unreachable("switch should return");
}
-bool VPInstruction::onlyFirstPartUsed(const VPValue *Op) const {
+bool VPInstruction::usesFirstPartOnly(const VPValue *Op) const {
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
if (Instruction::isBinaryOp(getOpcode()))
return vputils::onlyFirstPartUsed(this);
@@ -1692,7 +1692,7 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
if (!VFTy->getParamType(I.index())->isVectorTy())
Arg = State.get(I.value(), VPLane(0));
else
- Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
+ Arg = State.get(I.value(), usesFirstLaneOnly(I.value()));
Args.push_back(Arg);
}
@@ -1761,7 +1761,7 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
State.TTI))
Arg = State.get(I.value(), VPLane(0));
else
- Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
+ Arg = State.get(I.value(), usesFirstLaneOnly(I.value()));
if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),
State.TTI))
TysForDecl.push_back(Arg->getType());
@@ -1843,7 +1843,7 @@ StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const {
return Intrinsic::getBaseName(VectorIntrinsicID);
}
-bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
+bool VPWidenIntrinsicRecipe::usesFirstLaneOnly(const VPValue *Op) const {
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
return all_of(enumerate(operands()), [this, &Op](const auto &X) {
auto [Idx, V] = X;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8ad772f..48bd697 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -91,14 +91,13 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
NewRecipe = new VPWidenLoadRecipe(
*Load, Ingredient.getOperand(0), nullptr /*Mask*/,
- false /*Consecutive*/, false /*Reverse*/, Load->getAlign(),
- VPIRMetadata(*Load), Ingredient.getDebugLoc());
+ false /*Consecutive*/, false /*Reverse*/, VPIRMetadata(*Load),
+ Ingredient.getDebugLoc());
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
NewRecipe = new VPWidenStoreRecipe(
*Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/,
- Store->getAlign(), VPIRMetadata(*Store),
- Ingredient.getDebugLoc());
+ VPIRMetadata(*Store), Ingredient.getDebugLoc());
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
@@ -205,7 +204,7 @@ static bool sinkScalarOperands(VPlan &Plan) {
return cast<VPRecipeBase>(U)->getParent() != SinkTo;
});
if (any_of(UsersOutsideSinkTo, [SinkCandidate](VPUser *U) {
- return !U->onlyFirstLaneUsed(SinkCandidate);
+ return !U->usesFirstLaneOnly(SinkCandidate);
}))
continue;
bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
@@ -4208,7 +4207,7 @@ narrowInterleaveGroupOp(VPValue *V, SmallPtrSetImpl<VPValue *> &NarrowedOps) {
auto *LI = cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
auto *L = new VPWidenLoadRecipe(
*LI, LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true,
- /*Reverse=*/false, LI->getAlign(), {}, LoadGroup->getDebugLoc());
+ /*Reverse=*/false, {}, LoadGroup->getDebugLoc());
L->insertBefore(LoadGroup);
NarrowedOps.insert(L);
return L;
@@ -4345,7 +4344,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
cast<StoreInst>(StoreGroup->getInterleaveGroup()->getInsertPos());
auto *S = new VPWidenStoreRecipe(
*SI, StoreGroup->getAddr(), Res, nullptr, /*Consecutive=*/true,
- /*Reverse=*/false, SI->getAlign(), {}, StoreGroup->getDebugLoc());
+ /*Reverse=*/false, {}, StoreGroup->getDebugLoc());
S->insertBefore(StoreGroup);
StoreGroup->eraseFromParent();
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index d6a0028..d4b8b72b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -582,7 +582,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
/// Users that only demand the first lane can use the definition for lane
/// 0.
DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) {
- return U.onlyFirstLaneUsed(DefR);
+ return U.usesFirstLaneOnly(DefR);
});
// Update each build vector user that currently has DefR as its only
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index c6380d3..e22c5df 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -18,12 +18,12 @@ using namespace llvm::VPlanPatternMatch;
bool vputils::onlyFirstLaneUsed(const VPValue *Def) {
return all_of(Def->users(),
- [Def](const VPUser *U) { return U->onlyFirstLaneUsed(Def); });
+ [Def](const VPUser *U) { return U->usesFirstLaneOnly(Def); });
}
bool vputils::onlyFirstPartUsed(const VPValue *Def) {
return all_of(Def->users(),
- [Def](const VPUser *U) { return U->onlyFirstPartUsed(Def); });
+ [Def](const VPUser *U) { return U->usesFirstPartOnly(Def); });
}
bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 83e3fca..5da7463 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -274,12 +274,12 @@ public:
virtual bool usesScalars(const VPValue *Op) const {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
- return onlyFirstLaneUsed(Op);
+ return usesFirstLaneOnly(Op);
}
/// Returns true if the VPUser only uses the first lane of operand \p Op.
/// Conservatively returns false.
- virtual bool onlyFirstLaneUsed(const VPValue *Op) const {
+ virtual bool usesFirstLaneOnly(const VPValue *Op) const {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return false;
@@ -287,7 +287,7 @@ public:
/// Returns true if the VPUser only uses the first part of operand \p Op.
/// Conservatively returns false.
- virtual bool onlyFirstPartUsed(const VPValue *Op) const {
+ virtual bool usesFirstPartOnly(const VPValue *Op) const {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return false;