aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rwxr-xr-xllvm/lib/Analysis/ConstantFolding.cpp4
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp11
-rw-r--r--llvm/lib/Analysis/MLInlineAdvisor.cpp58
-rw-r--r--llvm/lib/Analysis/MemoryLocation.cpp4
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp84
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp5
-rw-r--r--llvm/lib/CodeGen/ExpandFp.cpp61
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp6
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp72
-rw-r--r--llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp6
-rw-r--r--llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp16
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp70
-rw-r--r--llvm/lib/IR/IRBuilder.cpp30
-rw-r--r--llvm/lib/IR/Intrinsics.cpp13
-rw-r--r--llvm/lib/IR/Verifier.cpp27
-rw-r--r--llvm/lib/MC/MCParser/MasmParser.cpp2
-rw-r--r--llvm/lib/Support/APFloat.cpp9
-rw-r--r--llvm/lib/Support/Unix/Signals.inc3
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandImm.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp144
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsanInstrumentation.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h1
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td24
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp87
-rw-r--r--llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp20
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp17
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td8
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td8
-rw-r--r--llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp20
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td124
-rw-r--r--llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp6
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp11
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp14
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp14
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h14
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp21
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp6
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVUtils.cpp15
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVUtils.h3
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td8
-rw-r--r--llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp48
-rw-r--r--llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp9
-rw-r--r--llvm/lib/Target/X86/X86.td1
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp2
-rw-r--r--llvm/lib/TargetParser/ARMTargetParser.cpp8
-rw-r--r--llvm/lib/TargetParser/X86TargetParser.cpp3
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp23
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp27
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp7
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp24
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/EarlyCSE.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/GVNSink.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/InferAlignment.cpp17
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInterchange.cpp49
-rw-r--r--llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp43
-rw-r--r--llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp1
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h2
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp3
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp9
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h17
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp1
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp35
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp71
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.h7
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp20
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp10
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.h6
-rw-r--r--llvm/lib/WindowsDriver/MSVCPaths.cpp75
78 files changed, 886 insertions, 737 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index a5ba197..e9e2e7d 100755
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -4056,8 +4056,8 @@ static Constant *ConstantFoldFixedVectorCall(
switch (IntrinsicID) {
case Intrinsic::masked_load: {
auto *SrcPtr = Operands[0];
- auto *Mask = Operands[2];
- auto *Passthru = Operands[3];
+ auto *Mask = Operands[1];
+ auto *Passthru = Operands[2];
Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, FVTy, DL);
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index e08ef60..dc813f6 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5440,9 +5440,10 @@ static Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
// ptrtoint (ptradd (Ptr, X - ptrtoint(Ptr))) -> X
Value *Ptr, *X;
- if (CastOpc == Instruction::PtrToInt &&
- match(Op, m_PtrAdd(m_Value(Ptr),
- m_Sub(m_Value(X), m_PtrToInt(m_Deferred(Ptr))))) &&
+ if ((CastOpc == Instruction::PtrToInt || CastOpc == Instruction::PtrToAddr) &&
+ match(Op,
+ m_PtrAdd(m_Value(Ptr),
+ m_Sub(m_Value(X), m_PtrToIntOrAddr(m_Deferred(Ptr))))) &&
X->getType() == Ty && Ty == Q.DL.getIndexType(Ptr->getType()))
return X;
@@ -6987,8 +6988,8 @@ static Value *simplifyIntrinsic(CallBase *Call, Value *Callee,
switch (IID) {
case Intrinsic::masked_load:
case Intrinsic::masked_gather: {
- Value *MaskArg = Args[2];
- Value *PassthruArg = Args[3];
+ Value *MaskArg = Args[1];
+ Value *PassthruArg = Args[2];
// If the mask is all zeros or undef, the "passthru" argument is the result.
if (maskIsAllZeroOrUndef(MaskArg))
return PassthruArg;
diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
index 1d1a5560..9a5ae2a 100644
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -324,32 +324,44 @@ void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice,
FAM.invalidate(*Caller, PA);
}
Advice.updateCachedCallerFPI(FAM);
- int64_t IRSizeAfter =
- getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize);
- CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize);
+ if (Caller == Callee) {
+ assert(!CalleeWasDeleted);
+ // We double-counted CallerAndCalleeEdges - since the caller and callee
+ // would be the same
+ assert(Advice.CallerAndCalleeEdges % 2 == 0);
+ CurrentIRSize += getIRSize(*Caller) - Advice.CallerIRSize;
+ EdgeCount += getCachedFPI(*Caller).DirectCallsToDefinedFunctions -
+ Advice.CallerAndCalleeEdges / 2;
+ // The NodeCount would stay the same.
+ } else {
+ int64_t IRSizeAfter =
+ getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize);
+ CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize);
+
+ // We can delta-update module-wide features. We know the inlining only
+ // changed the caller, and maybe the callee (by deleting the latter). Nodes
+ // are simple to update. For edges, we 'forget' the edges that the caller
+ // and callee used to have before inlining, and add back what they currently
+ // have together.
+ int64_t NewCallerAndCalleeEdges =
+ getCachedFPI(*Caller).DirectCallsToDefinedFunctions;
+
+ // A dead function's node is not actually removed from the call graph until
+ // the end of the call graph walk, but the node no longer belongs to any
+ // valid SCC.
+ if (CalleeWasDeleted) {
+ --NodeCount;
+ NodesInLastSCC.erase(CG.lookup(*Callee));
+ DeadFunctions.insert(Callee);
+ } else {
+ NewCallerAndCalleeEdges +=
+ getCachedFPI(*Callee).DirectCallsToDefinedFunctions;
+ }
+ EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
+ }
if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize)
ForceStop = true;
- // We can delta-update module-wide features. We know the inlining only changed
- // the caller, and maybe the callee (by deleting the latter).
- // Nodes are simple to update.
- // For edges, we 'forget' the edges that the caller and callee used to have
- // before inlining, and add back what they currently have together.
- int64_t NewCallerAndCalleeEdges =
- getCachedFPI(*Caller).DirectCallsToDefinedFunctions;
-
- // A dead function's node is not actually removed from the call graph until
- // the end of the call graph walk, but the node no longer belongs to any valid
- // SCC.
- if (CalleeWasDeleted) {
- --NodeCount;
- NodesInLastSCC.erase(CG.lookup(*Callee));
- DeadFunctions.insert(Callee);
- } else {
- NewCallerAndCalleeEdges +=
- getCachedFPI(*Callee).DirectCallsToDefinedFunctions;
- }
- EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0);
}
diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp
index dcc5117..1c5f08e 100644
--- a/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/llvm/lib/Analysis/MemoryLocation.cpp
@@ -245,7 +245,7 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
assert(ArgIdx == 0 && "Invalid argument index");
auto *Ty = cast<VectorType>(II->getType());
- if (auto KnownType = getKnownTypeFromMaskedOp(II->getOperand(2), Ty))
+ if (auto KnownType = getKnownTypeFromMaskedOp(II->getOperand(1), Ty))
return MemoryLocation(Arg, DL.getTypeStoreSize(*KnownType), AATags);
return MemoryLocation(
@@ -255,7 +255,7 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
assert(ArgIdx == 1 && "Invalid argument index");
auto *Ty = cast<VectorType>(II->getArgOperand(0)->getType());
- if (auto KnownType = getKnownTypeFromMaskedOp(II->getOperand(3), Ty))
+ if (auto KnownType = getKnownTypeFromMaskedOp(II->getOperand(2), Ty))
return MemoryLocation(Arg, DL.getTypeStoreSize(*KnownType), AATags);
return MemoryLocation(
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 425420f..6f7dd79 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -15473,6 +15473,38 @@ void ScalarEvolution::LoopGuards::collectFromPHI(
}
}
+// Return a new SCEV that modifies \p Expr to the closest number divides by
+// \p Divisor and less or equal than Expr. For now, only handle constant
+// Expr.
+static const SCEV *getPreviousSCEVDivisibleByDivisor(const SCEV *Expr,
+ const APInt &DivisorVal,
+ ScalarEvolution &SE) {
+ const APInt *ExprVal;
+ if (!match(Expr, m_scev_APInt(ExprVal)) || ExprVal->isNegative() ||
+ DivisorVal.isNonPositive())
+ return Expr;
+ APInt Rem = ExprVal->urem(DivisorVal);
+ // return the SCEV: Expr - Expr % Divisor
+ return SE.getConstant(*ExprVal - Rem);
+}
+
+// Return a new SCEV that modifies \p Expr to the closest number divides by
+// \p Divisor and greater or equal than Expr. For now, only handle constant
+// Expr.
+static const SCEV *getNextSCEVDivisibleByDivisor(const SCEV *Expr,
+ const APInt &DivisorVal,
+ ScalarEvolution &SE) {
+ const APInt *ExprVal;
+ if (!match(Expr, m_scev_APInt(ExprVal)) || ExprVal->isNegative() ||
+ DivisorVal.isNonPositive())
+ return Expr;
+ APInt Rem = ExprVal->urem(DivisorVal);
+ if (Rem.isZero())
+ return Expr;
+ // return the SCEV: Expr + Divisor - Expr % Divisor
+ return SE.getConstant(*ExprVal + DivisorVal - Rem);
+}
+
void ScalarEvolution::LoopGuards::collectFromBlock(
ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
const BasicBlock *Block, const BasicBlock *Pred,
@@ -15540,36 +15572,6 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
match(LHS, m_scev_APInt(C)) && C->isNonNegative();
};
- // Return a new SCEV that modifies \p Expr to the closest number divides by
- // \p Divisor and greater or equal than Expr. For now, only handle constant
- // Expr.
- auto GetNextSCEVDividesByDivisor = [&](const SCEV *Expr,
- const APInt &DivisorVal) {
- const APInt *ExprVal;
- if (!match(Expr, m_scev_APInt(ExprVal)) || ExprVal->isNegative() ||
- DivisorVal.isNonPositive())
- return Expr;
- APInt Rem = ExprVal->urem(DivisorVal);
- if (Rem.isZero())
- return Expr;
- // return the SCEV: Expr + Divisor - Expr % Divisor
- return SE.getConstant(*ExprVal + DivisorVal - Rem);
- };
-
- // Return a new SCEV that modifies \p Expr to the closest number divides by
- // \p Divisor and less or equal than Expr. For now, only handle constant
- // Expr.
- auto GetPreviousSCEVDividesByDivisor = [&](const SCEV *Expr,
- const APInt &DivisorVal) {
- const APInt *ExprVal;
- if (!match(Expr, m_scev_APInt(ExprVal)) || ExprVal->isNegative() ||
- DivisorVal.isNonPositive())
- return Expr;
- APInt Rem = ExprVal->urem(DivisorVal);
- // return the SCEV: Expr - Expr % Divisor
- return SE.getConstant(*ExprVal - Rem);
- };
-
// Apply divisibilty by \p Divisor on MinMaxExpr with constant values,
// recursively. This is done by aligning up/down the constant value to the
// Divisor.
@@ -15591,8 +15593,9 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
assert(SE.isKnownNonNegative(MinMaxLHS) &&
"Expected non-negative operand!");
auto *DivisibleExpr =
- IsMin ? GetPreviousSCEVDividesByDivisor(MinMaxLHS, DivisorVal)
- : GetNextSCEVDividesByDivisor(MinMaxLHS, DivisorVal);
+ IsMin
+ ? getPreviousSCEVDivisibleByDivisor(MinMaxLHS, DivisorVal, SE)
+ : getNextSCEVDivisibleByDivisor(MinMaxLHS, DivisorVal, SE);
SmallVector<const SCEV *> Ops = {
ApplyDivisibiltyOnMinMaxExpr(MinMaxRHS, Divisor), DivisibleExpr};
return SE.getMinMaxExpr(SCTy, Ops);
@@ -15669,21 +15672,21 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
[[fallthrough]];
case CmpInst::ICMP_SLT: {
RHS = SE.getMinusSCEV(RHS, One);
- RHS = GetPreviousSCEVDividesByDivisor(RHS, DividesBy);
+ RHS = getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE);
break;
}
case CmpInst::ICMP_UGT:
case CmpInst::ICMP_SGT:
RHS = SE.getAddExpr(RHS, One);
- RHS = GetNextSCEVDividesByDivisor(RHS, DividesBy);
+ RHS = getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE);
break;
case CmpInst::ICMP_ULE:
case CmpInst::ICMP_SLE:
- RHS = GetPreviousSCEVDividesByDivisor(RHS, DividesBy);
+ RHS = getPreviousSCEVDivisibleByDivisor(RHS, DividesBy, SE);
break;
case CmpInst::ICMP_UGE:
case CmpInst::ICMP_SGE:
- RHS = GetNextSCEVDividesByDivisor(RHS, DividesBy);
+ RHS = getNextSCEVDivisibleByDivisor(RHS, DividesBy, SE);
break;
default:
break;
@@ -15737,7 +15740,7 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
case CmpInst::ICMP_NE:
if (match(RHS, m_scev_Zero())) {
const SCEV *OneAlignedUp =
- GetNextSCEVDividesByDivisor(One, DividesBy);
+ getNextSCEVDivisibleByDivisor(One, DividesBy, SE);
To = SE.getUMaxExpr(FromRewritten, OneAlignedUp);
} else {
// LHS != RHS can be rewritten as (LHS - RHS) = UMax(1, LHS - RHS),
@@ -15963,8 +15966,11 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
if (MatchBinarySub(S, LHS, RHS)) {
if (LHS > RHS)
std::swap(LHS, RHS);
- if (NotEqual.contains({LHS, RHS}))
- return SE.getUMaxExpr(S, SE.getOne(S->getType()));
+ if (NotEqual.contains({LHS, RHS})) {
+ const SCEV *OneAlignedUp = getNextSCEVDivisibleByDivisor(
+ SE.getOne(S->getType()), SE.getConstantMultiple(S), SE);
+ return SE.getUMaxExpr(OneAlignedUp, S);
+ }
}
return nullptr;
};
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 9655c88..0a72076 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -7695,6 +7695,11 @@ static bool isGuaranteedNotToBeUndefOrPoison(
}
if (IsWellDefined)
return true;
+ } else if (auto *Splat = isa<ShuffleVectorInst>(Opr) ? getSplatValue(Opr)
+ : nullptr) {
+ // For splats we only need to check the value being splatted.
+ if (OpCheck(Splat))
+ return true;
} else if (all_of(Opr->operands(), OpCheck))
return true;
}
diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp
index 04c7008..2b5ced3 100644
--- a/llvm/lib/CodeGen/ExpandFp.cpp
+++ b/llvm/lib/CodeGen/ExpandFp.cpp
@@ -993,7 +993,6 @@ static void addToWorklist(Instruction &I,
static bool runImpl(Function &F, const TargetLowering &TLI,
AssumptionCache *AC) {
SmallVector<Instruction *, 4> Worklist;
- bool Modified = false;
unsigned MaxLegalFpConvertBitWidth =
TLI.getMaxLargeFPConvertBitWidthSupported();
@@ -1003,50 +1002,49 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
return false;
- for (auto It = inst_begin(&F), End = inst_end(F); It != End;) {
- Instruction &I = *It++;
+ auto ShouldHandleInst = [&](Instruction &I) {
Type *Ty = I.getType();
// TODO: This pass doesn't handle scalable vectors.
if (Ty->isScalableTy())
- continue;
+ return false;
switch (I.getOpcode()) {
case Instruction::FRem:
- if (!targetSupportsFrem(TLI, Ty) &&
- FRemExpander::canExpandType(Ty->getScalarType())) {
- addToWorklist(I, Worklist);
- Modified = true;
- }
- break;
+ return !targetSupportsFrem(TLI, Ty) &&
+ FRemExpander::canExpandType(Ty->getScalarType());
+
case Instruction::FPToUI:
case Instruction::FPToSI: {
auto *IntTy = cast<IntegerType>(Ty->getScalarType());
- if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
- continue;
-
- addToWorklist(I, Worklist);
- Modified = true;
- break;
+ return IntTy->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
}
+
case Instruction::UIToFP:
case Instruction::SIToFP: {
auto *IntTy =
cast<IntegerType>(I.getOperand(0)->getType()->getScalarType());
- if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
- continue;
-
- addToWorklist(I, Worklist);
- Modified = true;
- break;
+ return IntTy->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
}
- default:
- break;
}
+
+ return false;
+ };
+
+ bool Modified = false;
+ for (auto It = inst_begin(&F), End = inst_end(F); It != End;) {
+ Instruction &I = *It++;
+ if (!ShouldHandleInst(I))
+ continue;
+
+ addToWorklist(I, Worklist);
+ Modified = true;
}
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
- if (I->getOpcode() == Instruction::FRem) {
+
+ switch (I->getOpcode()) {
+ case Instruction::FRem: {
auto SQ = [&]() -> std::optional<SimplifyQuery> {
if (AC) {
auto Res = std::make_optional<SimplifyQuery>(
@@ -1058,11 +1056,18 @@ static bool runImpl(Function &F, const TargetLowering &TLI,
}();
expandFRem(cast<BinaryOperator>(*I), SQ);
- } else if (I->getOpcode() == Instruction::FPToUI ||
- I->getOpcode() == Instruction::FPToSI) {
+ break;
+ }
+
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
expandFPToI(I);
- } else {
+ break;
+
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
expandIToFP(I);
+ break;
}
}
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index a6a9b50..5c27a20 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -258,13 +258,11 @@ static Value *getMaskOperand(IntrinsicInst *II) {
default:
llvm_unreachable("Unexpected intrinsic");
case Intrinsic::vp_load:
- return II->getOperand(1);
case Intrinsic::masked_load:
- return II->getOperand(2);
+ return II->getOperand(1);
case Intrinsic::vp_store:
- return II->getOperand(2);
case Intrinsic::masked_store:
- return II->getOperand(3);
+ return II->getOperand(2);
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c97300d..6bf9008 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -26876,6 +26876,8 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
// TODO: handle more extension/truncation cases as cases arise.
if (EltSizeInBits != ExtSrcSizeInBits)
return SDValue();
+ if (VT.getSizeInBits() != N00.getValueSizeInBits())
+ return SDValue();
// We can remove *extend_vector_inreg only if the truncation happens at
// the same scale as the extension.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index cb0038c..20a0efd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4837,29 +4837,10 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
bool IsCompressing) {
SDLoc sdl = getCurSDLoc();
- auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
- Align &Alignment) {
- // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
- Src0 = I.getArgOperand(0);
- Ptr = I.getArgOperand(1);
- Alignment = cast<ConstantInt>(I.getArgOperand(2))->getAlignValue();
- Mask = I.getArgOperand(3);
- };
- auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
- Align &Alignment) {
- // llvm.masked.compressstore.*(Src0, Ptr, Mask)
- Src0 = I.getArgOperand(0);
- Ptr = I.getArgOperand(1);
- Mask = I.getArgOperand(2);
- Alignment = I.getParamAlign(1).valueOrOne();
- };
-
- Value *PtrOperand, *MaskOperand, *Src0Operand;
- Align Alignment;
- if (IsCompressing)
- getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
- else
- getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+ Value *Src0Operand = I.getArgOperand(0);
+ Value *PtrOperand = I.getArgOperand(1);
+ Value *MaskOperand = I.getArgOperand(2);
+ Align Alignment = I.getParamAlign(1).valueOrOne();
SDValue Ptr = getValue(PtrOperand);
SDValue Src0 = getValue(Src0Operand);
@@ -4964,14 +4945,12 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
- // llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)
+ // llvm.masked.scatter.*(Src0, Ptrs, Mask)
const Value *Ptr = I.getArgOperand(1);
SDValue Src0 = getValue(I.getArgOperand(0));
- SDValue Mask = getValue(I.getArgOperand(3));
+ SDValue Mask = getValue(I.getArgOperand(2));
EVT VT = Src0.getValueType();
- Align Alignment = cast<ConstantInt>(I.getArgOperand(2))
- ->getMaybeAlignValue()
- .value_or(DAG.getEVTAlign(VT.getScalarType()));
+ Align Alignment = I.getParamAlign(1).valueOrOne();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Base;
@@ -5008,29 +4987,10 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDLoc sdl = getCurSDLoc();
- auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
- Align &Alignment) {
- // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
- Ptr = I.getArgOperand(0);
- Alignment = cast<ConstantInt>(I.getArgOperand(1))->getAlignValue();
- Mask = I.getArgOperand(2);
- Src0 = I.getArgOperand(3);
- };
- auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
- Align &Alignment) {
- // @llvm.masked.expandload.*(Ptr, Mask, Src0)
- Ptr = I.getArgOperand(0);
- Alignment = I.getParamAlign(0).valueOrOne();
- Mask = I.getArgOperand(1);
- Src0 = I.getArgOperand(2);
- };
-
- Value *PtrOperand, *MaskOperand, *Src0Operand;
- Align Alignment;
- if (IsExpanding)
- getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
- else
- getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+ Value *PtrOperand = I.getArgOperand(0);
+ Value *MaskOperand = I.getArgOperand(1);
+ Value *Src0Operand = I.getArgOperand(2);
+ Align Alignment = I.getParamAlign(0).valueOrOne();
SDValue Ptr = getValue(PtrOperand);
SDValue Src0 = getValue(Src0Operand);
@@ -5077,16 +5037,14 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
- // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
+ // @llvm.masked.gather.*(Ptrs, Mask, Src0)
const Value *Ptr = I.getArgOperand(0);
- SDValue Src0 = getValue(I.getArgOperand(3));
- SDValue Mask = getValue(I.getArgOperand(2));
+ SDValue Src0 = getValue(I.getArgOperand(2));
+ SDValue Mask = getValue(I.getArgOperand(1));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- Align Alignment = cast<ConstantInt>(I.getArgOperand(1))
- ->getMaybeAlignValue()
- .value_or(DAG.getEVTAlign(VT.getScalarType()));
+ Align Alignment = I.getParamAlign(0).valueOrOne();
const MDNode *Ranges = getRangeMetadata(I);
diff --git a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp
index 8052773..8637b55 100644
--- a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp
+++ b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp
@@ -2427,11 +2427,13 @@ void DWARFLinker::DIECloner::generateLineTableForUnit(CompileUnit &Unit) {
uint64_t OrigStmtSeq = StmtSeq.get();
// 1. Get the original row index from the stmt list offset.
auto OrigRowIter = SeqOffToOrigRow.find(OrigStmtSeq);
+ const uint64_t InvalidOffset =
+ Unit.getOrigUnit().getFormParams().getDwarfMaxOffset();
// Check whether we have an output sequence for the StmtSeq offset.
// Some sequences are discarded by the DWARFLinker if they are invalid
// (empty).
if (OrigRowIter == SeqOffToOrigRow.end()) {
- StmtSeq.set(UINT64_MAX);
+ StmtSeq.set(InvalidOffset);
continue;
}
size_t OrigRowIndex = OrigRowIter->second;
@@ -2441,7 +2443,7 @@ void DWARFLinker::DIECloner::generateLineTableForUnit(CompileUnit &Unit) {
if (NewRowIter == OrigRowToNewRow.end()) {
// If the original row index is not found in the map, update the
// stmt_sequence attribute to the 'invalid offset' magic value.
- StmtSeq.set(UINT64_MAX);
+ StmtSeq.set(InvalidOffset);
continue;
}
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index fa39603..a326a01 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -320,12 +320,16 @@ static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI,
// Attempt to retrieve DW_AT_LLVM_stmt_sequence if present.
std::optional<uint64_t> StmtSeqOffset;
if (auto StmtSeqAttr = Die.find(llvm::dwarf::DW_AT_LLVM_stmt_sequence)) {
- // The `DW_AT_LLVM_stmt_sequence` attribute might be set to `UINT64_MAX`
- // when it refers to an empty line sequence. In such cases, the DWARF linker
- // will exclude the empty sequence from the final output and assign
- // `UINT64_MAX` to the `DW_AT_LLVM_stmt_sequence` attribute.
- uint64_t StmtSeqVal = dwarf::toSectionOffset(StmtSeqAttr, UINT64_MAX);
- if (StmtSeqVal != UINT64_MAX)
+ // The `DW_AT_LLVM_stmt_sequence` attribute might be set to an invalid
+ // sentinel value when it refers to an empty line sequence. In such cases,
+ // the DWARF linker will exclude the empty sequence from the final output
+ // and assign the sentinel value to the `DW_AT_LLVM_stmt_sequence`
+ // attribute. The sentinel value is UINT32_MAX for DWARF32 and UINT64_MAX
+ // for DWARF64.
+ const uint64_t InvalidOffset =
+ Die.getDwarfUnit()->getFormParams().getDwarfMaxOffset();
+ uint64_t StmtSeqVal = dwarf::toSectionOffset(StmtSeqAttr, InvalidOffset);
+ if (StmtSeqVal != InvalidOffset)
StmtSeqOffset = StmtSeqVal;
}
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index d8374b6..10f915d 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1426,6 +1426,28 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
Intrinsic::memset, ParamTypes);
return true;
}
+
+ unsigned MaskedID =
+ StringSwitch<unsigned>(Name)
+ .StartsWith("masked.load", Intrinsic::masked_load)
+ .StartsWith("masked.gather", Intrinsic::masked_gather)
+ .StartsWith("masked.store", Intrinsic::masked_store)
+ .StartsWith("masked.scatter", Intrinsic::masked_scatter)
+ .Default(0);
+ if (MaskedID && F->arg_size() == 4) {
+ rename(F);
+ if (MaskedID == Intrinsic::masked_load ||
+ MaskedID == Intrinsic::masked_gather) {
+ NewFn = Intrinsic::getOrInsertDeclaration(
+ F->getParent(), MaskedID,
+ {F->getReturnType(), F->getArg(0)->getType()});
+ return true;
+ }
+ NewFn = Intrinsic::getOrInsertDeclaration(
+ F->getParent(), MaskedID,
+ {F->getArg(0)->getType(), F->getArg(1)->getType()});
+ return true;
+ }
break;
}
case 'n': {
@@ -5231,6 +5253,54 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
break;
}
+ case Intrinsic::masked_load:
+ case Intrinsic::masked_gather:
+ case Intrinsic::masked_store:
+ case Intrinsic::masked_scatter: {
+ if (CI->arg_size() != 4) {
+ DefaultCase();
+ return;
+ }
+
+ const DataLayout &DL = CI->getDataLayout();
+ switch (NewFn->getIntrinsicID()) {
+ case Intrinsic::masked_load:
+ NewCall = Builder.CreateMaskedLoad(
+ CI->getType(), CI->getArgOperand(0),
+ cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue(),
+ CI->getArgOperand(2), CI->getArgOperand(3));
+ break;
+ case Intrinsic::masked_gather:
+ NewCall = Builder.CreateMaskedGather(
+ CI->getType(), CI->getArgOperand(0),
+ DL.getValueOrABITypeAlignment(
+ cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue(),
+ CI->getType()->getScalarType()),
+ CI->getArgOperand(2), CI->getArgOperand(3));
+ break;
+ case Intrinsic::masked_store:
+ NewCall = Builder.CreateMaskedStore(
+ CI->getArgOperand(0), CI->getArgOperand(1),
+ cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue(),
+ CI->getArgOperand(3));
+ break;
+ case Intrinsic::masked_scatter:
+ NewCall = Builder.CreateMaskedScatter(
+ CI->getArgOperand(0), CI->getArgOperand(1),
+ DL.getValueOrABITypeAlignment(
+ cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue(),
+ CI->getArgOperand(0)->getType()->getScalarType()),
+ CI->getArgOperand(3));
+ break;
+ default:
+ llvm_unreachable("Unexpected intrinsic ID");
+ }
+ // Previous metadata is still valid.
+ NewCall->copyMetadata(*CI);
+ NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
+ break;
+ }
+
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end: {
if (CI->arg_size() != 2) {
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 15c0198..88dbd17 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -495,9 +495,11 @@ CallInst *IRBuilderBase::CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment,
if (!PassThru)
PassThru = PoisonValue::get(Ty);
Type *OverloadedTypes[] = { Ty, PtrTy };
- Value *Ops[] = {Ptr, getInt32(Alignment.value()), Mask, PassThru};
- return CreateMaskedIntrinsic(Intrinsic::masked_load, Ops,
- OverloadedTypes, Name);
+ Value *Ops[] = {Ptr, Mask, PassThru};
+ CallInst *CI =
+ CreateMaskedIntrinsic(Intrinsic::masked_load, Ops, OverloadedTypes, Name);
+ CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), Alignment));
+ return CI;
}
/// Create a call to a Masked Store intrinsic.
@@ -513,8 +515,11 @@ CallInst *IRBuilderBase::CreateMaskedStore(Value *Val, Value *Ptr,
assert(DataTy->isVectorTy() && "Val should be a vector");
assert(Mask && "Mask should not be all-ones (null)");
Type *OverloadedTypes[] = { DataTy, PtrTy };
- Value *Ops[] = {Val, Ptr, getInt32(Alignment.value()), Mask};
- return CreateMaskedIntrinsic(Intrinsic::masked_store, Ops, OverloadedTypes);
+ Value *Ops[] = {Val, Ptr, Mask};
+ CallInst *CI =
+ CreateMaskedIntrinsic(Intrinsic::masked_store, Ops, OverloadedTypes);
+ CI->addParamAttr(1, Attribute::getWithAlignment(CI->getContext(), Alignment));
+ return CI;
}
/// Create a call to a Masked intrinsic, with given intrinsic Id,
@@ -552,12 +557,14 @@ CallInst *IRBuilderBase::CreateMaskedGather(Type *Ty, Value *Ptrs,
PassThru = PoisonValue::get(Ty);
Type *OverloadedTypes[] = {Ty, PtrsTy};
- Value *Ops[] = {Ptrs, getInt32(Alignment.value()), Mask, PassThru};
+ Value *Ops[] = {Ptrs, Mask, PassThru};
// We specify only one type when we create this intrinsic. Types of other
// arguments are derived from this type.
- return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, OverloadedTypes,
- Name);
+ CallInst *CI = CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops,
+ OverloadedTypes, Name);
+ CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), Alignment));
+ return CI;
}
/// Create a call to a Masked Scatter intrinsic.
@@ -577,11 +584,14 @@ CallInst *IRBuilderBase::CreateMaskedScatter(Value *Data, Value *Ptrs,
Mask = getAllOnesMask(NumElts);
Type *OverloadedTypes[] = {DataTy, PtrsTy};
- Value *Ops[] = {Data, Ptrs, getInt32(Alignment.value()), Mask};
+ Value *Ops[] = {Data, Ptrs, Mask};
// We specify only one type when we create this intrinsic. Types of other
// arguments are derived from this type.
- return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, OverloadedTypes);
+ CallInst *CI =
+ CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, OverloadedTypes);
+ CI->addParamAttr(1, Attribute::getWithAlignment(CI->getContext(), Alignment));
+ return CI;
}
/// Create a call to Masked Expand Load intrinsic
diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index 6797a10..526800e 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -725,6 +725,19 @@ Function *Intrinsic::getOrInsertDeclaration(Module *M, ID id,
// There can never be multiple globals with the same name of different types,
// because intrinsics must be a specific type.
auto *FT = getType(M->getContext(), id, Tys);
+ Function *F = cast<Function>(
+ M->getOrInsertFunction(
+ Tys.empty() ? getName(id) : getName(id, Tys, M, FT), FT)
+ .getCallee());
+ if (F->getFunctionType() == FT)
+ return F;
+
+ // It's possible that a declaration for this intrinsic already exists with an
+ // incorrect signature, if the signature has changed, but this particular
+ // declaration has not been auto-upgraded yet. In that case, rename the
+ // invalid declaration and insert a new one with the correct signature. The
+ // invalid declaration will get upgraded later.
+ F->setName(F->getName() + ".invalid");
return cast<Function>(
M->getOrInsertFunction(
Tys.empty() ? getName(id) : getName(id, Tys, M, FT), FT)
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 3572852..03da154 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6211,13 +6211,10 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Check(Call.getType()->isVectorTy(), "masked_load: must return a vector",
Call);
- ConstantInt *Alignment = cast<ConstantInt>(Call.getArgOperand(1));
- Value *Mask = Call.getArgOperand(2);
- Value *PassThru = Call.getArgOperand(3);
+ Value *Mask = Call.getArgOperand(1);
+ Value *PassThru = Call.getArgOperand(2);
Check(Mask->getType()->isVectorTy(), "masked_load: mask must be vector",
Call);
- Check(Alignment->getValue().isPowerOf2(),
- "masked_load: alignment must be a power of 2", Call);
Check(PassThru->getType() == Call.getType(),
"masked_load: pass through and return type must match", Call);
Check(cast<VectorType>(Mask->getType())->getElementCount() ==
@@ -6227,33 +6224,15 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
}
case Intrinsic::masked_store: {
Value *Val = Call.getArgOperand(0);
- ConstantInt *Alignment = cast<ConstantInt>(Call.getArgOperand(2));
- Value *Mask = Call.getArgOperand(3);
+ Value *Mask = Call.getArgOperand(2);
Check(Mask->getType()->isVectorTy(), "masked_store: mask must be vector",
Call);
- Check(Alignment->getValue().isPowerOf2(),
- "masked_store: alignment must be a power of 2", Call);
Check(cast<VectorType>(Mask->getType())->getElementCount() ==
cast<VectorType>(Val->getType())->getElementCount(),
"masked_store: vector mask must be same length as value", Call);
break;
}
- case Intrinsic::masked_gather: {
- const APInt &Alignment =
- cast<ConstantInt>(Call.getArgOperand(1))->getValue();
- Check(Alignment.isZero() || Alignment.isPowerOf2(),
- "masked_gather: alignment must be 0 or a power of 2", Call);
- break;
- }
- case Intrinsic::masked_scatter: {
- const APInt &Alignment =
- cast<ConstantInt>(Call.getArgOperand(2))->getValue();
- Check(Alignment.isZero() || Alignment.isPowerOf2(),
- "masked_scatter: alignment must be 0 or a power of 2", Call);
- break;
- }
-
case Intrinsic::experimental_guard: {
Check(isa<CallInst>(Call), "experimental_guard cannot be invoked", Call);
Check(Call.countOperandBundlesOfType(LLVMContext::OB_deopt) == 1,
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 7f0ea78..d4901d9 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -2903,7 +2903,7 @@ bool MasmParser::parseIdentifier(StringRef &Res,
if (Position == StartOfStatement &&
StringSwitch<bool>(Res)
.CaseLower("echo", true)
- .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true)
+ .CasesLower({"ifdef", "ifndef", "elseifdef", "elseifndef"}, true)
.Default(false)) {
ExpandNextToken = DoNotExpandMacros;
}
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index b4de79a..4787604 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -2600,8 +2600,7 @@ APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
int exponentChange = omsb - fromSemantics.precision;
if (exponent + exponentChange < toSemantics.minExponent)
exponentChange = toSemantics.minExponent - exponent;
- if (exponentChange < shift)
- exponentChange = shift;
+ exponentChange = std::max(exponentChange, shift);
if (exponentChange < 0) {
shift -= exponentChange;
exponent += exponentChange;
@@ -3043,8 +3042,7 @@ IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
if (decSig.exponent < semantics->minExponent) {
excessPrecision += (semantics->minExponent - decSig.exponent);
truncatedBits = excessPrecision;
- if (excessPrecision > calcSemantics.precision)
- excessPrecision = calcSemantics.precision;
+ excessPrecision = std::min(excessPrecision, calcSemantics.precision);
}
/* Extra half-ulp lost in reciprocal of exponent. */
powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
@@ -3441,8 +3439,7 @@ char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
/* Convert as much of "part" to hexdigits as we can. */
unsigned int curDigits = integerPartWidth / 4;
- if (curDigits > outputDigits)
- curDigits = outputDigits;
+ curDigits = std::min(curDigits, outputDigits);
dst += partAsHex (dst, part, curDigits, hexDigitChars);
outputDigits -= curDigits;
}
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 573ad82..78d6540 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -868,8 +868,7 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) {
nwidth = strlen(name) - 1;
}
- if (nwidth > width)
- width = nwidth;
+ width = std::max(nwidth, width);
}
for (int i = 0; i < depth; ++i) {
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
index 9801627..e9660ac1 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -585,7 +585,7 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
uint64_t ShiftedMask = (0xFFFFULL << Shift);
uint64_t ZeroChunk = UImm & ~ShiftedMask;
uint64_t OneChunk = UImm | ShiftedMask;
- uint64_t RotatedImm = (UImm << 32) | (UImm >> 32);
+ uint64_t RotatedImm = llvm::rotl(UImm, 32);
uint64_t ReplicateChunk = ZeroChunk | (RotatedImm & ShiftedMask);
if (AArch64_AM::processLogicalImmediate(ZeroChunk, BitSize, Encoding) ||
AArch64_AM::processLogicalImmediate(OneChunk, BitSize, Encoding) ||
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 12c600f..d5117da 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -700,7 +700,7 @@ static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
// csel instruction. If so, return the folded opcode, and the replacement
// register.
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
- unsigned *NewVReg = nullptr) {
+ unsigned *NewReg = nullptr) {
VReg = removeCopies(MRI, VReg);
if (!Register::isVirtualRegister(VReg))
return 0;
@@ -708,8 +708,37 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
const MachineInstr *DefMI = MRI.getVRegDef(VReg);
unsigned Opc = 0;
- unsigned SrcOpNum = 0;
+ unsigned SrcReg = 0;
switch (DefMI->getOpcode()) {
+ case AArch64::SUBREG_TO_REG:
+ // Check for the following way to define an 64-bit immediate:
+ // %0:gpr32 = MOVi32imm 1
+ // %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32
+ if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 0)
+ return 0;
+ if (!DefMI->getOperand(2).isReg())
+ return 0;
+ if (!DefMI->getOperand(3).isImm() ||
+ DefMI->getOperand(3).getImm() != AArch64::sub_32)
+ return 0;
+ DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg());
+ if (DefMI->getOpcode() != AArch64::MOVi32imm)
+ return 0;
+ if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
+ return 0;
+ assert(Is64Bit);
+ SrcReg = AArch64::XZR;
+ Opc = AArch64::CSINCXr;
+ break;
+
+ case AArch64::MOVi32imm:
+ case AArch64::MOVi64imm:
+ if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
+ return 0;
+ SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+ Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
+ break;
+
case AArch64::ADDSXri:
case AArch64::ADDSWri:
// if NZCV is used, do not fold.
@@ -724,7 +753,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
DefMI->getOperand(3).getImm() != 0)
return 0;
- SrcOpNum = 1;
+ SrcReg = DefMI->getOperand(1).getReg();
Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
break;
@@ -734,7 +763,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
return 0;
- SrcOpNum = 2;
+ SrcReg = DefMI->getOperand(2).getReg();
Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
break;
}
@@ -753,17 +782,17 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
return 0;
- SrcOpNum = 2;
+ SrcReg = DefMI->getOperand(2).getReg();
Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
break;
}
default:
return 0;
}
- assert(Opc && SrcOpNum && "Missing parameters");
+ assert(Opc && SrcReg && "Missing parameters");
- if (NewVReg)
- *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
+ if (NewReg)
+ *NewReg = SrcReg;
return Opc;
}
@@ -964,28 +993,34 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
// Try folding simple instructions into the csel.
if (TryFold) {
- unsigned NewVReg = 0;
- unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
+ unsigned NewReg = 0;
+ unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewReg);
if (FoldedOpc) {
// The folded opcodes csinc, csinc and csneg apply the operation to
// FalseReg, so we need to invert the condition.
CC = AArch64CC::getInvertedCondCode(CC);
TrueReg = FalseReg;
} else
- FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
+ FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewReg);
// Fold the operation. Leave any dead instructions for DCE to clean up.
if (FoldedOpc) {
- FalseReg = NewVReg;
+ FalseReg = NewReg;
Opc = FoldedOpc;
- // The extends the live range of NewVReg.
- MRI.clearKillFlags(NewVReg);
+ // Extend the live range of NewReg.
+ MRI.clearKillFlags(NewReg);
}
}
// Pull all virtual register into the appropriate class.
MRI.constrainRegClass(TrueReg, RC);
- MRI.constrainRegClass(FalseReg, RC);
+ // FalseReg might be WZR or XZR if the folded operand is a literal 1.
+ assert(
+ (FalseReg.isVirtual() || FalseReg == AArch64::WZR ||
+ FalseReg == AArch64::XZR) &&
+ "FalseReg was folded into a non-virtual register other than WZR or XZR");
+ if (FalseReg.isVirtual())
+ MRI.constrainRegClass(FalseReg, RC);
// Insert the csel.
BuildMI(MBB, I, DL, get(Opc), DstReg)
@@ -5063,7 +5098,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool RenamableDest,
bool RenamableSrc) const {
if (AArch64::GPR32spRegClass.contains(DestReg) &&
- (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
+ AArch64::GPR32spRegClass.contains(SrcReg)) {
if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
// If either operand is WSP, expand to ADD #0.
if (Subtarget.hasZeroCycleRegMoveGPR64() &&
@@ -5088,21 +5123,14 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
}
- } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGPR32()) {
- BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
- .addImm(0)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
} else if (Subtarget.hasZeroCycleRegMoveGPR64() &&
!Subtarget.hasZeroCycleRegMoveGPR32()) {
// Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
&AArch64::GPR64spRegClass);
assert(DestRegX.isValid() && "Destination super-reg not valid");
- MCRegister SrcRegX =
- SrcReg == AArch64::WZR
- ? AArch64::XZR
- : RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
- &AArch64::GPR64spRegClass);
+ MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
+ &AArch64::GPR64spRegClass);
assert(SrcRegX.isValid() && "Source super-reg not valid");
// This instruction is reading and writing X registers. This may upset
// the register scavenger and machine verifier, so we need to indicate
@@ -5121,6 +5149,51 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ // GPR32 zeroing
+ if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) {
+ if (Subtarget.hasZeroCycleZeroingGPR32()) {
+ BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ } else {
+ BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
+ .addReg(AArch64::WZR)
+ .addReg(AArch64::WZR);
+ }
+ return;
+ }
+
+ if (AArch64::GPR64spRegClass.contains(DestReg) &&
+ AArch64::GPR64spRegClass.contains(SrcReg)) {
+ if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
+ // If either operand is SP, expand to ADD #0.
+ BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ } else {
+ // Otherwise, expand to ORR XZR.
+ BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
+ .addReg(AArch64::XZR)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ }
+ return;
+ }
+
+ // GPR64 zeroing
+ if (AArch64::GPR64spRegClass.contains(DestReg) && SrcReg == AArch64::XZR) {
+ if (Subtarget.hasZeroCycleZeroingGPR64()) {
+ BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ } else {
+ BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::XZR);
+ }
+ return;
+ }
+
// Copy a Predicate register by ORRing with itself.
if (AArch64::PPRRegClass.contains(DestReg) &&
AArch64::PPRRegClass.contains(SrcReg)) {
@@ -5205,27 +5278,6 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- if (AArch64::GPR64spRegClass.contains(DestReg) &&
- (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
- if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
- // If either operand is SP, expand to ADD #0.
- BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc))
- .addImm(0)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
- } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGPR64()) {
- BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
- .addImm(0)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
- } else {
- // Otherwise, expand to ORR XZR.
- BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
- .addReg(AArch64::XZR)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
- return;
- }
-
// Copy a DDDD register quad by copying the individual sub-registers.
if (AArch64::DDDDRegClass.contains(DestReg) &&
AArch64::DDDDRegClass.contains(SrcReg)) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsanInstrumentation.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsanInstrumentation.cpp
index 19e2a6a..93732a7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsanInstrumentation.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsanInstrumentation.cpp
@@ -244,11 +244,8 @@ void getInterestingMemoryOperands(
// Masked store has an initial operand for the value.
unsigned OpOffset = IsWrite ? 1 : 0;
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
- MaybeAlign Alignment = Align(1);
- // Otherwise no alignment guarantees. We probably got Undef.
- if (auto *Op = dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
- Alignment = Op->getMaybeAlignValue();
- Value *Mask = CI->getOperand(2 + OpOffset);
+ MaybeAlign Alignment = CI->getParamAlign(OpOffset);
+ Value *Mask = CI->getOperand(1 + OpOffset);
Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask);
break;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index f291e37..c8bbcbb 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -169,7 +169,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
: // clang-format off
AMDGPUGenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS),
AMDGPUSubtarget(TT),
- TargetTriple(TT),
TargetID(*this),
InstrItins(getInstrItineraryForCPU(GPU)),
InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index c2e6078..a466780 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -60,7 +60,6 @@ private:
protected:
// Basic subtarget description.
- Triple TargetTriple;
AMDGPU::IsaInfo::AMDGPUTargetID TargetID;
unsigned Gen = INVALID;
InstrItineraryData InstrItins;
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 0189e7b..5c39f7a 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1034,16 +1034,13 @@ void SIFrameLowering::emitCSRSpillStores(
StoreWWMRegisters(WWMCalleeSavedRegs);
if (FuncInfo->isWholeWaveFunction()) {
- // SI_WHOLE_WAVE_FUNC_SETUP has outlived its purpose, so we can remove
- // it now. If we have already saved some WWM CSR registers, then the EXEC is
- // already -1 and we don't need to do anything else. Otherwise, set EXEC to
- // -1 here.
+ // If we have already saved some WWM CSR registers, then the EXEC is already
+ // -1 and we don't need to do anything else. Otherwise, set EXEC to -1 here.
if (!ScratchExecCopy)
buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL, /*IsProlog*/ true,
/*EnableInactiveLanes*/ true);
else if (WWMCalleeSavedRegs.empty())
EnableAllLanes();
- TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();
} else if (ScratchExecCopy) {
// FIXME: Split block and make terminator.
BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)
@@ -1340,6 +1337,11 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
"Needed to save BP but didn't save it anywhere");
assert((HasBP || !BPSaved) && "Saved BP but didn't need it");
+
+ if (FuncInfo->isWholeWaveFunction()) {
+ // SI_WHOLE_WAVE_FUNC_SETUP has outlived its purpose.
+ TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();
+ }
}
void SIFrameLowering::emitEpilogue(MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 27e5ee9c..74d4153 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3481,30 +3481,6 @@ def : GCNPat<
>;
} // End True16Predicate
-let True16Predicate = UseRealTrue16Insts in {
-def : GCNPat<
- (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
- (V_MUL_F16_t16_e64 0, (i16 CONST.FP16_ONE), $src_mods, $src, 0/*Clamp*/, /*omod*/0, /*opsel*/0)
->;
-
-def : GCNPat<
- (fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))),
- (V_MUL_F16_t16_e64 0, (i16 CONST.FP16_NEG_ONE), $src_mods, $src, 0/*Clamp*/, /*omod*/0, /*opsel*/0)
->;
-} // End True16Predicate
-
-let True16Predicate = UseFakeTrue16Insts in {
-def : GCNPat<
- (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
- (V_MUL_F16_fake16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src)
->;
-
-def : GCNPat<
- (fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))),
- (V_MUL_F16_fake16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src)
->;
-} // End True16Predicate
-
def : GCNPat<
(fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
(V_PK_MUL_F16 0, (i32 CONST.FP16_ONE), $src_mods, $src, DSTCLAMP.NONE)
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 484861d..362ef14 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -360,11 +360,13 @@ public:
/// between memory instructions to enforce the order they become visible as
/// observed by other memory instructions executing in memory scope \p Scope.
/// \p IsCrossAddrSpaceOrdering indicates if the memory ordering is between
- /// address spaces. Returns true iff any instructions inserted.
+ /// address spaces. If \p AtomicsOnly is true, only insert waits for counters
+ /// that are used by atomic instructions.
+ /// Returns true iff any instructions inserted.
virtual bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsCrossAddrSpaceOrdering, Position Pos,
- AtomicOrdering Order) const = 0;
+ AtomicOrdering Order, bool AtomicsOnly) const = 0;
/// Inserts any necessary instructions at position \p Pos relative to
/// instruction \p MI to ensure any subsequent memory instructions of this
@@ -437,7 +439,7 @@ public:
bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsCrossAddrSpaceOrdering, Position Pos,
- AtomicOrdering Order) const override;
+ AtomicOrdering Order, bool AtomicsOnly) const override;
bool insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -484,7 +486,7 @@ public:
bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsCrossAddrSpaceOrdering, Position Pos,
- AtomicOrdering Order) const override;
+ AtomicOrdering Order, bool AtomicsOnly) const override;
bool insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -572,7 +574,7 @@ public:
bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsCrossAddrSpaceOrdering, Position Pos,
- AtomicOrdering Order) const override;
+ AtomicOrdering Order, bool AtomicsOnly) const override;
bool insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -629,7 +631,7 @@ public:
bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsCrossAddrSpaceOrdering, Position Pos,
- AtomicOrdering Order) const override;
+ AtomicOrdering Order, bool AtomicsOnly) const override;
bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, Position Pos) const override;
@@ -1120,7 +1122,8 @@ bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
// observable outside the program, so no need to cause a waitcnt for LDS
// address space operations.
Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
- Position::AFTER, AtomicOrdering::Unordered);
+ Position::AFTER, AtomicOrdering::Unordered,
+ /*AtomicsOnly=*/false);
return Changed;
}
@@ -1140,7 +1143,8 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsCrossAddrSpaceOrdering, Position Pos,
- AtomicOrdering Order) const {
+ AtomicOrdering Order,
+ bool AtomicsOnly) const {
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
@@ -1294,7 +1298,8 @@ bool SIGfx6CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
bool IsCrossAddrSpaceOrdering,
Position Pos) const {
return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
- IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
+ IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,
+ /*AtomicsOnly=*/false);
}
bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
@@ -1447,7 +1452,8 @@ bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
// observable outside the program, so no need to cause a waitcnt for LDS
// address space operations.
Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
- Position::AFTER, AtomicOrdering::Unordered);
+ Position::AFTER, AtomicOrdering::Unordered,
+ /*AtomicsOnly=*/false);
return Changed;
}
@@ -1467,8 +1473,8 @@ bool SIGfx90ACacheControl::insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsCrossAddrSpaceOrdering,
- Position Pos,
- AtomicOrdering Order) const {
+ Position Pos, AtomicOrdering Order,
+ bool AtomicsOnly) const {
if (ST.isTgSplitEnabled()) {
// In threadgroup split mode the waves of a work-group can be executing on
// different CUs. Therefore need to wait for global or GDS memory operations
@@ -1488,7 +1494,8 @@ bool SIGfx90ACacheControl::insertWait(MachineBasicBlock::iterator &MI,
AddrSpace &= ~SIAtomicAddrSpace::LDS;
}
return SIGfx7CacheControl::insertWait(MI, Scope, AddrSpace, Op,
- IsCrossAddrSpaceOrdering, Pos, Order);
+ IsCrossAddrSpaceOrdering, Pos, Order,
+ AtomicsOnly);
}
bool SIGfx90ACacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
@@ -1747,7 +1754,8 @@ bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
// observable outside the program, so no need to cause a waitcnt for LDS
// address space operations.
Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
- Position::AFTER, AtomicOrdering::Unordered);
+ Position::AFTER, AtomicOrdering::Unordered,
+ /*AtomicsOnly=*/false);
return Changed;
}
@@ -1904,7 +1912,8 @@ bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
// Ensure the necessary S_WAITCNT needed by any "BUFFER_WBL2" as well as other
// S_WAITCNT needed.
Changed |= insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
- IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
+ IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,
+ /*AtomicsOnly=*/false);
return Changed;
}
@@ -1984,7 +1993,8 @@ bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
// observable outside the program, so no need to cause a waitcnt for LDS
// address space operations.
Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
- Position::AFTER, AtomicOrdering::Unordered);
+ Position::AFTER, AtomicOrdering::Unordered,
+ /*AtomicsOnly=*/false);
return Changed;
}
@@ -2007,7 +2017,8 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsCrossAddrSpaceOrdering,
- Position Pos, AtomicOrdering Order) const {
+ Position Pos, AtomicOrdering Order,
+ bool AtomicsOnly) const {
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
@@ -2281,7 +2292,8 @@ bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
// observable outside the program, so no need to cause a waitcnt for LDS
// address space operations.
Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
- Position::AFTER, AtomicOrdering::Unordered);
+ Position::AFTER, AtomicOrdering::Unordered,
+ /*AtomicsOnly=*/false);
return Changed;
}
@@ -2354,7 +2366,8 @@ bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsCrossAddrSpaceOrdering,
- Position Pos, AtomicOrdering Order) const {
+ Position Pos, AtomicOrdering Order,
+ bool AtomicsOnly) const {
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
@@ -2444,7 +2457,7 @@ bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
//
// This also applies to fences. Fences cannot pair with an instruction
// tracked with bvh/samplecnt as we don't have any atomics that do that.
- if (Order != AtomicOrdering::Acquire && ST.hasImageInsts()) {
+ if (!AtomicsOnly && ST.hasImageInsts()) {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_BVHCNT_soft)).addImm(0);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_SAMPLECNT_soft)).addImm(0);
}
@@ -2587,7 +2600,8 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
// complete, whether we inserted a WB or not. If we inserted a WB (storecnt),
// we of course need to wait for that as well.
Changed |= insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
- IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release);
+ IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,
+ /*AtomicsOnly=*/false);
return Changed;
}
@@ -2624,7 +2638,8 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
// observable outside the program, so no need to cause a waitcnt for LDS
// address space operations.
Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
- Position::AFTER, AtomicOrdering::Unordered);
+ Position::AFTER, AtomicOrdering::Unordered,
+ /*AtomicsOnly=*/false);
}
return Changed;
@@ -2748,13 +2763,15 @@ bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
Changed |= CC->insertWait(MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
SIMemOp::LOAD | SIMemOp::STORE,
MOI.getIsCrossAddressSpaceOrdering(),
- Position::BEFORE, Order);
+ Position::BEFORE, Order, /*AtomicsOnly=*/false);
if (Order == AtomicOrdering::Acquire ||
Order == AtomicOrdering::SequentiallyConsistent) {
- Changed |= CC->insertWait(
- MI, MOI.getScope(), MOI.getInstrAddrSpace(), SIMemOp::LOAD,
- MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER, Order);
+ // The wait below only needs to wait on the prior atomic.
+ Changed |=
+ CC->insertWait(MI, MOI.getScope(), MOI.getInstrAddrSpace(),
+ SIMemOp::LOAD, MOI.getIsCrossAddressSpaceOrdering(),
+ Position::AFTER, Order, /*AtomicsOnly=*/true);
Changed |= CC->insertAcquire(MI, MOI.getScope(),
MOI.getOrderingAddrSpace(),
Position::AFTER);
@@ -2830,9 +2847,11 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
if (MOI.isAtomic()) {
const AtomicOrdering Order = MOI.getOrdering();
if (Order == AtomicOrdering::Acquire) {
- Changed |= CC->insertWait(
- MI, MOI.getScope(), OrderingAddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
- MOI.getIsCrossAddressSpaceOrdering(), Position::BEFORE, Order);
+ // Acquire fences only need to wait on the previous atomic they pair with.
+ Changed |= CC->insertWait(MI, MOI.getScope(), OrderingAddrSpace,
+ SIMemOp::LOAD | SIMemOp::STORE,
+ MOI.getIsCrossAddressSpaceOrdering(),
+ Position::BEFORE, Order, /*AtomicsOnly=*/true);
}
if (Order == AtomicOrdering::Release ||
@@ -2897,10 +2916,12 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
Order == AtomicOrdering::SequentiallyConsistent ||
MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
- Changed |= CC->insertWait(
- MI, MOI.getScope(), MOI.getInstrAddrSpace(),
- isAtomicRet(*MI) ? SIMemOp::LOAD : SIMemOp::STORE,
- MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER, Order);
+ // Only wait on the previous atomic.
+ Changed |=
+ CC->insertWait(MI, MOI.getScope(), MOI.getInstrAddrSpace(),
+ isAtomicRet(*MI) ? SIMemOp::LOAD : SIMemOp::STORE,
+ MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER,
+ Order, /*AtomicsOnly=*/true);
Changed |= CC->insertAcquire(MI, MOI.getScope(),
MOI.getOrderingAddrSpace(),
Position::AFTER);
diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
index ce59ae0..2cd5f02 100644
--- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
+++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -407,9 +407,9 @@ Instruction *MVEGatherScatterLowering::lowerGather(IntrinsicInst *I) {
// Potentially optimising the addressing modes as we do so.
auto *Ty = cast<FixedVectorType>(I->getType());
Value *Ptr = I->getArgOperand(0);
- Align Alignment = cast<ConstantInt>(I->getArgOperand(1))->getAlignValue();
- Value *Mask = I->getArgOperand(2);
- Value *PassThru = I->getArgOperand(3);
+ Align Alignment = I->getParamAlign(0).valueOrOne();
+ Value *Mask = I->getArgOperand(1);
+ Value *PassThru = I->getArgOperand(2);
if (!isLegalTypeAndAlignment(Ty->getNumElements(), Ty->getScalarSizeInBits(),
Alignment))
@@ -458,7 +458,7 @@ Instruction *MVEGatherScatterLowering::tryCreateMaskedGatherBase(
if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32)
// Can't build an intrinsic for this
return nullptr;
- Value *Mask = I->getArgOperand(2);
+ Value *Mask = I->getArgOperand(1);
if (match(Mask, m_One()))
return Builder.CreateIntrinsic(Intrinsic::arm_mve_vldr_gather_base,
{Ty, Ptr->getType()},
@@ -479,7 +479,7 @@ Instruction *MVEGatherScatterLowering::tryCreateMaskedGatherBaseWB(
if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32)
// Can't build an intrinsic for this
return nullptr;
- Value *Mask = I->getArgOperand(2);
+ Value *Mask = I->getArgOperand(1);
if (match(Mask, m_One()))
return Builder.CreateIntrinsic(Intrinsic::arm_mve_vldr_gather_base_wb,
{Ty, Ptr->getType()},
@@ -552,7 +552,7 @@ Instruction *MVEGatherScatterLowering::tryCreateMaskedGatherOffset(
return nullptr;
Root = Extend;
- Value *Mask = I->getArgOperand(2);
+ Value *Mask = I->getArgOperand(1);
Instruction *Load = nullptr;
if (!match(Mask, m_One()))
Load = Builder.CreateIntrinsic(
@@ -584,7 +584,7 @@ Instruction *MVEGatherScatterLowering::lowerScatter(IntrinsicInst *I) {
// Potentially optimising the addressing modes as we do so.
Value *Input = I->getArgOperand(0);
Value *Ptr = I->getArgOperand(1);
- Align Alignment = cast<ConstantInt>(I->getArgOperand(2))->getAlignValue();
+ Align Alignment = I->getParamAlign(1).valueOrOne();
auto *Ty = cast<FixedVectorType>(Input->getType());
if (!isLegalTypeAndAlignment(Ty->getNumElements(), Ty->getScalarSizeInBits(),
@@ -622,7 +622,7 @@ Instruction *MVEGatherScatterLowering::tryCreateMaskedScatterBase(
// Can't build an intrinsic for this
return nullptr;
}
- Value *Mask = I->getArgOperand(3);
+ Value *Mask = I->getArgOperand(2);
// int_arm_mve_vstr_scatter_base(_predicated) addr, offset, data(, mask)
LLVM_DEBUG(dbgs() << "masked scatters: storing to a vector of pointers\n");
if (match(Mask, m_One()))
@@ -646,7 +646,7 @@ Instruction *MVEGatherScatterLowering::tryCreateMaskedScatterBaseWB(
if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32)
// Can't build an intrinsic for this
return nullptr;
- Value *Mask = I->getArgOperand(3);
+ Value *Mask = I->getArgOperand(2);
if (match(Mask, m_One()))
return Builder.CreateIntrinsic(Intrinsic::arm_mve_vstr_scatter_base_wb,
{Ptr->getType(), Input->getType()},
@@ -662,7 +662,7 @@ Instruction *MVEGatherScatterLowering::tryCreateMaskedScatterOffset(
IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder) {
using namespace PatternMatch;
Value *Input = I->getArgOperand(0);
- Value *Mask = I->getArgOperand(3);
+ Value *Mask = I->getArgOperand(2);
Type *InputTy = Input->getType();
Type *MemoryTy = InputTy;
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index e4c0a16..9ab5202 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -300,7 +300,6 @@ private:
const_iterator end() const { return Blocks.end(); }
};
- Align getAlignFromValue(const Value *V) const;
std::optional<AddrInfo> getAddrInfo(Instruction &In) const;
bool isHvx(const AddrInfo &AI) const;
// This function is only used for assertions at the moment.
@@ -612,12 +611,6 @@ auto AlignVectors::ByteSpan::values() const -> SmallVector<Value *, 8> {
return Values;
}
-auto AlignVectors::getAlignFromValue(const Value *V) const -> Align {
- const auto *C = dyn_cast<ConstantInt>(V);
- assert(C && "Alignment must be a compile-time constant integer");
- return C->getAlignValue();
-}
-
auto AlignVectors::getAddrInfo(Instruction &In) const
-> std::optional<AddrInfo> {
if (auto *L = isCandidate<LoadInst>(&In))
@@ -631,11 +624,11 @@ auto AlignVectors::getAddrInfo(Instruction &In) const
switch (ID) {
case Intrinsic::masked_load:
return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),
- getAlignFromValue(II->getArgOperand(1)));
+ II->getParamAlign(0).valueOrOne());
case Intrinsic::masked_store:
return AddrInfo(HVC, II, II->getArgOperand(1),
II->getArgOperand(0)->getType(),
- getAlignFromValue(II->getArgOperand(2)));
+ II->getParamAlign(1).valueOrOne());
}
}
return std::nullopt;
@@ -660,9 +653,9 @@ auto AlignVectors::getMask(Value *Val) const -> Value * {
if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
switch (II->getIntrinsicID()) {
case Intrinsic::masked_load:
- return II->getArgOperand(2);
+ return II->getArgOperand(1);
case Intrinsic::masked_store:
- return II->getArgOperand(3);
+ return II->getArgOperand(2);
}
}
@@ -675,7 +668,7 @@ auto AlignVectors::getMask(Value *Val) const -> Value * {
auto AlignVectors::getPassThrough(Value *Val) const -> Value * {
if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
if (II->getIntrinsicID() == Intrinsic::masked_load)
- return II->getArgOperand(3);
+ return II->getArgOperand(2);
}
return UndefValue::get(getPayload(Val)->getType());
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 5143d53..613dea6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -2025,10 +2025,10 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)),
sub_128)>;
// abs
-def : Pat<(abs v32i8:$xj), (XVMAX_B v32i8:$xj, (XVNEG_B v32i8:$xj))>;
-def : Pat<(abs v16i16:$xj), (XVMAX_H v16i16:$xj, (XVNEG_H v16i16:$xj))>;
-def : Pat<(abs v8i32:$xj), (XVMAX_W v8i32:$xj, (XVNEG_W v8i32:$xj))>;
-def : Pat<(abs v4i64:$xj), (XVMAX_D v4i64:$xj, (XVNEG_D v4i64:$xj))>;
+def : Pat<(abs v32i8:$xj), (XVSIGNCOV_B v32i8:$xj, v32i8:$xj)>;
+def : Pat<(abs v16i16:$xj), (XVSIGNCOV_H v16i16:$xj, v16i16:$xj)>;
+def : Pat<(abs v8i32:$xj), (XVSIGNCOV_W v8i32:$xj, v8i32:$xj)>;
+def : Pat<(abs v4i64:$xj), (XVSIGNCOV_D v4i64:$xj, v4i64:$xj)>;
// XVABSD_{B/H/W/D}[U]
defm : PatXrXr<abds, "XVABSD">;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 8d1dc99..4619c6b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -2155,10 +2155,10 @@ def : Pat<(f64 f64imm_vldi:$in),
(f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>;
// abs
-def : Pat<(abs v16i8:$vj), (VMAX_B v16i8:$vj, (VNEG_B v16i8:$vj))>;
-def : Pat<(abs v8i16:$vj), (VMAX_H v8i16:$vj, (VNEG_H v8i16:$vj))>;
-def : Pat<(abs v4i32:$vj), (VMAX_W v4i32:$vj, (VNEG_W v4i32:$vj))>;
-def : Pat<(abs v2i64:$vj), (VMAX_D v2i64:$vj, (VNEG_D v2i64:$vj))>;
+def : Pat<(abs v16i8:$vj), (VSIGNCOV_B v16i8:$vj, v16i8:$vj)>;
+def : Pat<(abs v8i16:$vj), (VSIGNCOV_H v8i16:$vj, v8i16:$vj)>;
+def : Pat<(abs v4i32:$vj), (VSIGNCOV_W v4i32:$vj, v4i32:$vj)>;
+def : Pat<(abs v2i64:$vj), (VSIGNCOV_D v2i64:$vj, v2i64:$vj)>;
// VABSD_{B/H/W/D}[U]
defm : PatVrVr<abds, "VABSD">;
diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
index ab93bba..b00589a 100644
--- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
+++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
@@ -68,7 +68,7 @@ const llvm::StringRef RISCVSEWInstrument::DESC_NAME = "RISCV-SEW";
bool RISCVSEWInstrument::isDataValid(llvm::StringRef Data) {
// Return true if not one of the valid SEW strings
return StringSwitch<bool>(Data)
- .Cases("E8", "E16", "E32", "E64", true)
+ .Cases({"E8", "E16", "E32", "E64"}, true)
.Default(false);
}
diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
index 52dc53e..25b5af8 100644
--- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
@@ -495,18 +495,19 @@ RISCVGatherScatterLowering::determineBaseAndStride(Instruction *Ptr,
bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II) {
VectorType *DataType;
Value *StoreVal = nullptr, *Ptr, *Mask, *EVL = nullptr;
- MaybeAlign MA;
+ Align Alignment;
switch (II->getIntrinsicID()) {
case Intrinsic::masked_gather:
DataType = cast<VectorType>(II->getType());
Ptr = II->getArgOperand(0);
- MA = cast<ConstantInt>(II->getArgOperand(1))->getMaybeAlignValue();
- Mask = II->getArgOperand(2);
+ Alignment = II->getParamAlign(0).valueOrOne();
+ Mask = II->getArgOperand(1);
break;
case Intrinsic::vp_gather:
DataType = cast<VectorType>(II->getType());
Ptr = II->getArgOperand(0);
- MA = II->getParamAlign(0).value_or(
+ // FIXME: Falling back to ABI alignment is incorrect.
+ Alignment = II->getParamAlign(0).value_or(
DL->getABITypeAlign(DataType->getElementType()));
Mask = II->getArgOperand(1);
EVL = II->getArgOperand(2);
@@ -515,14 +516,15 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II) {
DataType = cast<VectorType>(II->getArgOperand(0)->getType());
StoreVal = II->getArgOperand(0);
Ptr = II->getArgOperand(1);
- MA = cast<ConstantInt>(II->getArgOperand(2))->getMaybeAlignValue();
- Mask = II->getArgOperand(3);
+ Alignment = II->getParamAlign(1).valueOrOne();
+ Mask = II->getArgOperand(2);
break;
case Intrinsic::vp_scatter:
DataType = cast<VectorType>(II->getArgOperand(0)->getType());
StoreVal = II->getArgOperand(0);
Ptr = II->getArgOperand(1);
- MA = II->getParamAlign(1).value_or(
+ // FIXME: Falling back to ABI alignment is incorrect.
+ Alignment = II->getParamAlign(1).value_or(
DL->getABITypeAlign(DataType->getElementType()));
Mask = II->getArgOperand(2);
EVL = II->getArgOperand(3);
@@ -533,7 +535,7 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II) {
// Make sure the operation will be supported by the backend.
EVT DataTypeVT = TLI->getValueType(*DL, DataType);
- if (!MA || !TLI->isLegalStridedLoadStore(DataTypeVT, *MA))
+ if (!TLI->isLegalStridedLoadStore(DataTypeVT, Alignment))
return false;
// FIXME: Let the backend type legalize by splitting/widening?
@@ -571,7 +573,7 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II) {
// Merge llvm.masked.gather's passthru
if (II->getIntrinsicID() == Intrinsic::masked_gather)
- Call = Builder.CreateSelect(Mask, Call, II->getArgOperand(3));
+ Call = Builder.CreateSelect(Mask, Call, II->getArgOperand(2));
} else
Call = Builder.CreateIntrinsic(
Intrinsic::experimental_vp_strided_store,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index 9358486..f7d1a09 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -438,130 +438,6 @@ let Predicates = [HasStdExtZvfbfmin] in {
FRM_DYN,
fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
-
- defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllBF16Vectors>;
- defm : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER",
- AllBF16Vectors, uimm5>;
- defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16",
- eew=16, vtilist=AllBF16Vectors>;
- defm : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllBF16Vectors, uimm5>;
- defm : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllBF16Vectors, uimm5>;
-
- foreach fvti = AllBF16Vectors in {
- defm : VPatBinaryCarryInTAIL<"int_riscv_vmerge", "PseudoVMERGE", "VVM",
- fvti.Vector,
- fvti.Vector, fvti.Vector, fvti.Mask,
- fvti.Log2SEW, fvti.LMul, fvti.RegClass,
- fvti.RegClass, fvti.RegClass>;
- defm : VPatBinaryCarryInTAIL<"int_riscv_vfmerge", "PseudoVFMERGE",
- "V"#fvti.ScalarSuffix#"M",
- fvti.Vector,
- fvti.Vector, fvti.Scalar, fvti.Mask,
- fvti.Log2SEW, fvti.LMul, fvti.RegClass,
- fvti.RegClass, fvti.ScalarRegClass>;
- defvar instr = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX);
- def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$passthru),
- (fvti.Vector fvti.RegClass:$rs2),
- (fvti.Scalar (fpimm0)),
- (fvti.Mask VMV0:$vm), VLOpFrag)),
- (instr fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0,
- (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>;
-
- defvar ivti = GetIntVTypeInfo<fvti>.Vti;
- def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), fvti.RegClass:$rs1,
- fvti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
- (fvti.Vector (IMPLICIT_DEF)),
- fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm),
- fvti.AVL, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
- (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))),
- fvti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX)
- (fvti.Vector (IMPLICIT_DEF)),
- fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
- (SplatFPOp (fvti.Scalar fpimm0)),
- fvti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
- (fvti.Vector (IMPLICIT_DEF)),
- fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
- (SplatFPOp fvti.ScalarRegClass:$rs1),
- fvti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
- (fvti.Vector (IMPLICIT_DEF)),
- fvti.RegClass:$rs2,
- (fvti.Scalar fvti.ScalarRegClass:$rs1),
- (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
- fvti.RegClass:$rs1,
- fvti.RegClass:$rs2,
- fvti.RegClass:$passthru,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
- fvti.RegClass:$passthru, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm),
- GPR:$vl, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
- (SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))),
- fvti.RegClass:$rs2,
- fvti.RegClass:$passthru,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX)
- fvti.RegClass:$passthru, fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask VMV0:$vm),
- GPR:$vl, fvti.Log2SEW)>;
-
-
- def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
- (SplatFPOp (fvti.Scalar fpimm0)),
- fvti.RegClass:$rs2,
- fvti.RegClass:$passthru,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
- fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm),
- GPR:$vl, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
- (SplatFPOp fvti.ScalarRegClass:$rs1),
- fvti.RegClass:$rs2,
- fvti.RegClass:$passthru,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
- fvti.RegClass:$passthru, fvti.RegClass:$rs2,
- (fvti.Scalar fvti.ScalarRegClass:$rs1),
- (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector
- (riscv_vrgather_vv_vl fvti.RegClass:$rs2,
- (ivti.Vector fvti.RegClass:$rs1),
- fvti.RegClass:$passthru,
- (fvti.Mask VMV0:$vm),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VV_"# fvti.LMul.MX#"_E"# fvti.SEW#"_MASK")
- fvti.RegClass:$passthru, fvti.RegClass:$rs2, fvti.RegClass:$rs1,
- (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fvti.Vector (riscv_vrgather_vx_vl fvti.RegClass:$rs2, GPR:$rs1,
- fvti.RegClass:$passthru,
- (fvti.Mask VMV0:$vm),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VX_"# fvti.LMul.MX#"_MASK")
- fvti.RegClass:$passthru, fvti.RegClass:$rs2, GPR:$rs1,
- (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fvti.Vector
- (riscv_vrgather_vx_vl fvti.RegClass:$rs2,
- uimm5:$imm,
- fvti.RegClass:$passthru,
- (fvti.Mask VMV0:$vm),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VI_"# fvti.LMul.MX#"_MASK")
- fvti.RegClass:$passthru, fvti.RegClass:$rs2, uimm5:$imm,
- (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
- }
}
let Predicates = [HasStdExtZvfbfwma] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index 5e10631..528bbdf 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -169,9 +169,9 @@ static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy,
}
case Intrinsic::masked_load: {
Ptr = II->getOperand(0);
- Alignment = cast<ConstantInt>(II->getArgOperand(1))->getAlignValue();
+ Alignment = II->getParamAlign(0).valueOrOne();
- if (!isa<UndefValue>(II->getOperand(3)))
+ if (!isa<UndefValue>(II->getOperand(2)))
return false;
assert(Mask && "masked.load needs a mask!");
@@ -183,7 +183,7 @@ static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy,
}
case Intrinsic::masked_store: {
Ptr = II->getOperand(1);
- Alignment = cast<ConstantInt>(II->getArgOperand(2))->getAlignValue();
+ Alignment = II->getParamAlign(1).valueOrOne();
assert(Mask && "masked.store needs a mask!");
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index dbe8e18..d91923b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -507,7 +507,9 @@ static Register buildLoadInst(SPIRVType *BaseType, Register PtrRegister,
static Register buildBuiltinVariableLoad(
MachineIRBuilder &MIRBuilder, SPIRVType *VariableType,
SPIRVGlobalRegistry *GR, SPIRV::BuiltIn::BuiltIn BuiltinValue, LLT LLType,
- Register Reg = Register(0), bool isConst = true, bool hasLinkageTy = true) {
+ Register Reg = Register(0), bool isConst = true,
+ const std::optional<SPIRV::LinkageType::LinkageType> &LinkageTy = {
+ SPIRV::LinkageType::Import}) {
Register NewRegister =
MIRBuilder.getMRI()->createVirtualRegister(&SPIRV::pIDRegClass);
MIRBuilder.getMRI()->setType(
@@ -521,9 +523,8 @@ static Register buildBuiltinVariableLoad(
// Set up the global OpVariable with the necessary builtin decorations.
Register Variable = GR->buildGlobalVariable(
NewRegister, PtrType, getLinkStringForBuiltIn(BuiltinValue), nullptr,
- SPIRV::StorageClass::Input, nullptr, /* isConst= */ isConst,
- /* HasLinkageTy */ hasLinkageTy, SPIRV::LinkageType::Import, MIRBuilder,
- false);
+ SPIRV::StorageClass::Input, nullptr, /* isConst= */ isConst, LinkageTy,
+ MIRBuilder, false);
// Load the value from the global variable.
Register LoadedRegister =
@@ -1851,7 +1852,7 @@ static bool generateWaveInst(const SPIRV::IncomingCall *Call,
return buildBuiltinVariableLoad(
MIRBuilder, Call->ReturnType, GR, Value, LLType, Call->ReturnRegister,
- /* isConst= */ false, /* hasLinkageTy= */ false);
+ /* isConst= */ false, /* LinkageType= */ std::nullopt);
}
// We expect a builtin
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index 1a7c02c..9e11c3a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -479,19 +479,9 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
.addImm(static_cast<uint32_t>(getExecutionModel(*ST, F)))
.addUse(FuncVReg);
addStringImm(F.getName(), MIB);
- } else if (F.getLinkage() != GlobalValue::InternalLinkage &&
- F.getLinkage() != GlobalValue::PrivateLinkage &&
- F.getVisibility() != GlobalValue::HiddenVisibility) {
- SPIRV::LinkageType::LinkageType LnkTy =
- F.isDeclaration()
- ? SPIRV::LinkageType::Import
- : (F.getLinkage() == GlobalValue::LinkOnceODRLinkage &&
- ST->canUseExtension(
- SPIRV::Extension::SPV_KHR_linkonce_odr)
- ? SPIRV::LinkageType::LinkOnceODR
- : SPIRV::LinkageType::Export);
+ } else if (const auto LnkTy = getSpirvLinkageTypeFor(*ST, F)) {
buildOpDecorate(FuncVReg, MIRBuilder, SPIRV::Decoration::LinkageAttributes,
- {static_cast<uint32_t>(LnkTy)}, F.getName());
+ {static_cast<uint32_t>(*LnkTy)}, F.getName());
}
// Handle function pointers decoration
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 6fd1c7e..6181abb 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -712,9 +712,9 @@ SPIRVGlobalRegistry::buildConstantSampler(Register ResReg, unsigned AddrMode,
Register SPIRVGlobalRegistry::buildGlobalVariable(
Register ResVReg, SPIRVType *BaseType, StringRef Name,
const GlobalValue *GV, SPIRV::StorageClass::StorageClass Storage,
- const MachineInstr *Init, bool IsConst, bool HasLinkageTy,
- SPIRV::LinkageType::LinkageType LinkageType, MachineIRBuilder &MIRBuilder,
- bool IsInstSelector) {
+ const MachineInstr *Init, bool IsConst,
+ const std::optional<SPIRV::LinkageType::LinkageType> &LinkageType,
+ MachineIRBuilder &MIRBuilder, bool IsInstSelector) {
const GlobalVariable *GVar = nullptr;
if (GV) {
GVar = cast<const GlobalVariable>(GV);
@@ -792,9 +792,9 @@ Register SPIRVGlobalRegistry::buildGlobalVariable(
buildOpDecorate(Reg, MIRBuilder, SPIRV::Decoration::Alignment, {Alignment});
}
- if (HasLinkageTy)
+ if (LinkageType)
buildOpDecorate(Reg, MIRBuilder, SPIRV::Decoration::LinkageAttributes,
- {static_cast<uint32_t>(LinkageType)}, Name);
+ {static_cast<uint32_t>(*LinkageType)}, Name);
SPIRV::BuiltIn::BuiltIn BuiltInId;
if (getSpirvBuiltInIdByName(Name, BuiltInId))
@@ -821,8 +821,8 @@ Register SPIRVGlobalRegistry::getOrCreateGlobalVariableWithBinding(
MIRBuilder.getMRI()->createVirtualRegister(&SPIRV::iIDRegClass);
buildGlobalVariable(VarReg, VarType, Name, nullptr,
- getPointerStorageClass(VarType), nullptr, false, false,
- SPIRV::LinkageType::Import, MIRBuilder, false);
+ getPointerStorageClass(VarType), nullptr, false,
+ std::nullopt, MIRBuilder, false);
buildOpDecorate(VarReg, MIRBuilder, SPIRV::Decoration::DescriptorSet, {Set});
buildOpDecorate(VarReg, MIRBuilder, SPIRV::Decoration::Binding, {Binding});
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index a648def..c230e62 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -548,14 +548,12 @@ public:
MachineIRBuilder &MIRBuilder);
Register getOrCreateUndef(MachineInstr &I, SPIRVType *SpvType,
const SPIRVInstrInfo &TII);
- Register buildGlobalVariable(Register Reg, SPIRVType *BaseType,
- StringRef Name, const GlobalValue *GV,
- SPIRV::StorageClass::StorageClass Storage,
- const MachineInstr *Init, bool IsConst,
- bool HasLinkageTy,
- SPIRV::LinkageType::LinkageType LinkageType,
- MachineIRBuilder &MIRBuilder,
- bool IsInstSelector);
+ Register buildGlobalVariable(
+ Register Reg, SPIRVType *BaseType, StringRef Name, const GlobalValue *GV,
+ SPIRV::StorageClass::StorageClass Storage, const MachineInstr *Init,
+ bool IsConst,
+ const std::optional<SPIRV::LinkageType::LinkageType> &LinkageType,
+ MachineIRBuilder &MIRBuilder, bool IsInstSelector);
Register getOrCreateGlobalVariableWithBinding(const SPIRVType *VarType,
uint32_t Set, uint32_t Binding,
StringRef Name,
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index a0cff4d..5591d9f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -4350,15 +4350,8 @@ bool SPIRVInstructionSelector::selectGlobalValue(
if (hasInitializer(GlobalVar) && !Init)
return true;
- bool HasLnkTy = !GV->hasInternalLinkage() && !GV->hasPrivateLinkage() &&
- !GV->hasHiddenVisibility();
- SPIRV::LinkageType::LinkageType LnkType =
- GV->isDeclarationForLinker()
- ? SPIRV::LinkageType::Import
- : (GV->hasLinkOnceODRLinkage() &&
- STI.canUseExtension(SPIRV::Extension::SPV_KHR_linkonce_odr)
- ? SPIRV::LinkageType::LinkOnceODR
- : SPIRV::LinkageType::Export);
+ const std::optional<SPIRV::LinkageType::LinkageType> LnkType =
+ getSpirvLinkageTypeFor(STI, *GV);
const unsigned AddrSpace = GV->getAddressSpace();
SPIRV::StorageClass::StorageClass StorageClass =
@@ -4366,7 +4359,7 @@ bool SPIRVInstructionSelector::selectGlobalValue(
SPIRVType *ResType = GR.getOrCreateSPIRVPointerType(GVType, I, StorageClass);
Register Reg = GR.buildGlobalVariable(
ResVReg, ResType, GlobalIdent, GV, StorageClass, Init,
- GlobalVar->isConstant(), HasLnkTy, LnkType, MIRBuilder, true);
+ GlobalVar->isConstant(), LnkType, MIRBuilder, true);
return Reg.isValid();
}
@@ -4517,8 +4510,8 @@ bool SPIRVInstructionSelector::loadVec3BuiltinInputID(
// builtin variable.
Register Variable = GR.buildGlobalVariable(
NewRegister, PtrType, getLinkStringForBuiltIn(BuiltInValue), nullptr,
- SPIRV::StorageClass::Input, nullptr, true, false,
- SPIRV::LinkageType::Import, MIRBuilder, false);
+ SPIRV::StorageClass::Input, nullptr, true, std::nullopt, MIRBuilder,
+ false);
// Create new register for loading value.
MachineRegisterInfo *MRI = MIRBuilder.getMRI();
@@ -4570,8 +4563,8 @@ bool SPIRVInstructionSelector::loadBuiltinInputID(
// builtin variable.
Register Variable = GR.buildGlobalVariable(
NewRegister, PtrType, getLinkStringForBuiltIn(BuiltInValue), nullptr,
- SPIRV::StorageClass::Input, nullptr, true, false,
- SPIRV::LinkageType::Import, MIRBuilder, false);
+ SPIRV::StorageClass::Input, nullptr, true, std::nullopt, MIRBuilder,
+ false);
// Load uint value from the global variable.
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpLoad))
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index 61a0bbe..f7cdfcb 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -547,9 +547,9 @@ void SPIRVModuleAnalysis::collectFuncNames(MachineInstr &MI,
if (MI.getOpcode() == SPIRV::OpDecorate) {
// If it's got Import linkage.
auto Dec = MI.getOperand(1).getImm();
- if (Dec == static_cast<unsigned>(SPIRV::Decoration::LinkageAttributes)) {
+ if (Dec == SPIRV::Decoration::LinkageAttributes) {
auto Lnk = MI.getOperand(MI.getNumOperands() - 1).getImm();
- if (Lnk == static_cast<unsigned>(SPIRV::LinkageType::Import)) {
+ if (Lnk == SPIRV::LinkageType::Import) {
// Map imported function name to function ID register.
const Function *ImportedFunc =
F->getParent()->getFunction(getStringImm(MI, 2));
@@ -635,7 +635,7 @@ static void collectOtherInstr(MachineInstr &MI, SPIRV::ModuleAnalysisInfo &MAI,
void SPIRVModuleAnalysis::processOtherInstrs(const Module &M) {
InstrTraces IS;
for (auto F = M.begin(), E = M.end(); F != E; ++F) {
- if ((*F).isDeclaration())
+ if (F->isDeclaration())
continue;
MachineFunction *MF = MMI->getMachineFunction(*F);
assert(MF);
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
index d8376cd..2d19f6de 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
@@ -169,9 +169,7 @@ struct ModuleAnalysisInfo {
MCRegister getFuncReg(const Function *F) {
assert(F && "Function is null");
- auto FuncPtrRegPair = FuncMap.find(F);
- return FuncPtrRegPair == FuncMap.end() ? MCRegister()
- : FuncPtrRegPair->second;
+ return FuncMap.lookup(F);
}
MCRegister getExtInstSetReg(unsigned SetNum) { return ExtInstSetMap[SetNum]; }
InstrList &getMSInstrs(unsigned MSType) { return MS[MSType]; }
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
index 1d47c89..4e2cc88 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -1040,4 +1040,19 @@ getFirstValidInstructionInsertPoint(MachineBasicBlock &BB) {
: VarPos;
}
+std::optional<SPIRV::LinkageType::LinkageType>
+getSpirvLinkageTypeFor(const SPIRVSubtarget &ST, const GlobalValue &GV) {
+ if (GV.hasLocalLinkage() || GV.hasHiddenVisibility())
+ return std::nullopt;
+
+ if (GV.isDeclarationForLinker())
+ return SPIRV::LinkageType::Import;
+
+ if (GV.hasLinkOnceODRLinkage() &&
+ ST.canUseExtension(SPIRV::Extension::SPV_KHR_linkonce_odr))
+ return SPIRV::LinkageType::LinkOnceODR;
+
+ return SPIRV::LinkageType::Export;
+}
+
} // namespace llvm
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index 5777a24..99d9d40 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -559,5 +559,8 @@ unsigned getArrayComponentCount(const MachineRegisterInfo *MRI,
const MachineInstr *ResType);
MachineBasicBlock::iterator
getFirstValidInstructionInsertPoint(MachineBasicBlock &BB);
+
+std::optional<SPIRV::LinkageType::LinkageType>
+getSpirvLinkageTypeFor(const SPIRVSubtarget &ST, const GlobalValue &GV);
} // namespace llvm
#endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index ed54404d..7840620 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1583,11 +1583,9 @@ def : Pat<(v4i32 (partial_reduce_umla (v4i32 V128:$acc), (v8i16 V128:$lhs),
// MLA: v16i8 -> v4i32
def : Pat<(v4i32 (partial_reduce_smla (v4i32 V128:$acc), (v16i8 V128:$lhs),
(v16i8 V128:$rhs))),
- (ADD_I32x4 (ADD_I32x4 (DOT (extend_low_s_I16x8 $lhs),
- (extend_low_s_I16x8 $rhs)),
- (DOT (extend_high_s_I16x8 $lhs),
- (extend_high_s_I16x8 $rhs))),
- $acc)>;
+ (ADD_I32x4 (ADD_I32x4 (extadd_pairwise_s_I32x4 (EXTMUL_LOW_S_I16x8 $lhs, $rhs)),
+ (extadd_pairwise_s_I32x4 (EXTMUL_HIGH_S_I16x8 $lhs, $rhs))),
+ $acc)>;
def : Pat<(v4i32 (partial_reduce_umla (v4i32 V128:$acc), (v16i8 V128:$lhs),
(v16i8 V128:$rhs))),
(ADD_I32x4 (ADD_I32x4 (extadd_pairwise_u_I32x4 (EXTMUL_LOW_U_I16x8 $lhs, $rhs)),
diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index 100f1ec..53ec712 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -1879,28 +1879,34 @@ bool X86InstructionSelector::selectSelect(MachineInstr &I,
unsigned OpCmp;
LLT Ty = MRI.getType(DstReg);
- switch (Ty.getSizeInBits()) {
- default:
- return false;
- case 8:
- OpCmp = X86::CMOV_GR8;
- break;
- case 16:
- OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16;
- break;
- case 32:
- OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32;
- break;
- case 64:
- assert(STI.is64Bit() && STI.canUseCMOV());
- OpCmp = X86::CMOV64rr;
- break;
+ if (Ty.getSizeInBits() == 80) {
+ BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(X86::CMOVE_Fp80),
+ DstReg)
+ .addReg(Sel.getTrueReg())
+ .addReg(Sel.getFalseReg());
+ } else {
+ switch (Ty.getSizeInBits()) {
+ default:
+ return false;
+ case 8:
+ OpCmp = X86::CMOV_GR8;
+ break;
+ case 16:
+ OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16;
+ break;
+ case 32:
+ OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32;
+ break;
+ case 64:
+ assert(STI.is64Bit() && STI.canUseCMOV());
+ OpCmp = X86::CMOV64rr;
+ break;
+ }
+ BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg)
+ .addReg(Sel.getTrueReg())
+ .addReg(Sel.getFalseReg())
+ .addImm(X86::COND_E);
}
- BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg)
- .addReg(Sel.getTrueReg())
- .addReg(Sel.getFalseReg())
- .addImm(X86::COND_E);
-
const TargetRegisterClass *DstRC = getRegClass(Ty, DstReg, MRI);
if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain CMOV\n");
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 28fa2cd..e792b1b 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -575,10 +575,13 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
// todo: vectors and address spaces
getActionDefinitionsBuilder(G_SELECT)
- .legalFor({{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32}, {p0, s32}})
+ .legalFor({{s16, s32}, {s32, s32}, {p0, s32}})
+ .legalFor(!HasCMOV, {{s8, s32}})
+ .legalFor(Is64Bit, {{s64, s32}})
+ .legalFor(UseX87, {{s80, s32}})
+ .clampScalar(1, s32, s32)
.widenScalarToNextPow2(0, /*Min=*/8)
- .clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar)
- .clampScalar(1, s32, s32);
+ .clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar);
// memory intrinsics
getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 8e08d16..a1fd366 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1164,7 +1164,6 @@ def ProcessorFeatures {
FeatureAVXNECONVERT,
FeatureAVXVNNIINT8,
FeatureAVXVNNIINT16,
- FeatureUSERMSR,
FeatureSHA512,
FeatureSM3,
FeatureEGPR,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b05d7c7..b5f8ee5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41846,7 +41846,7 @@ static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
if (!X86::mayFoldLoad(peekThroughOneUseBitcasts(N0), Subtarget) ||
X86::mayFoldLoad(peekThroughOneUseBitcasts(N1), Subtarget))
return SDValue();
- Imm = ((Imm & 0x0F) << 4) | ((Imm & 0xF0) >> 4);
+ Imm = llvm::rotl<uint8_t>(Imm, 4);
return DAG.getNode(X86ISD::SHUFP, DL, VT, N1, N0,
DAG.getTargetConstant(Imm, DL, MVT::i8));
};
diff --git a/llvm/lib/TargetParser/ARMTargetParser.cpp b/llvm/lib/TargetParser/ARMTargetParser.cpp
index 08944e6..7882045 100644
--- a/llvm/lib/TargetParser/ARMTargetParser.cpp
+++ b/llvm/lib/TargetParser/ARMTargetParser.cpp
@@ -235,16 +235,16 @@ ARM::NeonSupportLevel ARM::getFPUNeonSupportLevel(ARM::FPUKind FPUKind) {
StringRef ARM::getFPUSynonym(StringRef FPU) {
return StringSwitch<StringRef>(FPU)
- .Cases("fpa", "fpe2", "fpe3", "maverick", "invalid") // Unsupported
+ .Cases({"fpa", "fpe2", "fpe3", "maverick"}, "invalid") // Unsupported
.Case("vfp2", "vfpv2")
.Case("vfp3", "vfpv3")
.Case("vfp4", "vfpv4")
.Case("vfp3-d16", "vfpv3-d16")
.Case("vfp4-d16", "vfpv4-d16")
- .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16")
- .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16")
+ .Cases({"fp4-sp-d16", "vfpv4-sp-d16"}, "fpv4-sp-d16")
+ .Cases({"fp4-dp-d16", "fpv4-dp-d16"}, "vfpv4-d16")
.Case("fp5-sp-d16", "fpv5-sp-d16")
- .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16")
+ .Cases({"fp5-dp-d16", "fpv5-dp-d16"}, "fpv5-d16")
// FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3.
.Case("neon-vfpv3", "neon")
.Default(FPU);
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index dd13ce3..b13c795 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -143,8 +143,7 @@ constexpr FeatureBitset FeaturesDiamondRapids =
FeatureAVXVNNIINT8 | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 |
FeatureSM4 | FeatureEGPR | FeatureZU | FeatureCCMP | FeaturePush2Pop2 |
FeaturePPX | FeatureNDD | FeatureNF | FeatureMOVRS | FeatureAMX_MOVRS |
- FeatureAMX_AVX512 | FeatureAMX_FP8 | FeatureAMX_TF32 |
- FeatureAMX_TRANSPOSE | FeatureUSERMSR;
+ FeatureAMX_AVX512 | FeatureAMX_FP8 | FeatureAMX_TF32 | FeatureAMX_TRANSPOSE;
// Intel Atom processors.
// Bonnell has feature parity with Core2 and adds MOVBE.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e1e24a9..dab200d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -289,12 +289,11 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
// * Narrow width by halfs excluding zero/undef lanes
Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
Value *LoadPtr = II.getArgOperand(0);
- const Align Alignment =
- cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
+ const Align Alignment = II.getParamAlign(0).valueOrOne();
// If the mask is all ones or undefs, this is a plain vector load of the 1st
// argument.
- if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
+ if (maskIsAllOneOrUndef(II.getArgOperand(1))) {
LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
"unmaskedload");
L->copyMetadata(II);
@@ -308,7 +307,7 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
"unmaskedload");
LI->copyMetadata(II);
- return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
+ return Builder.CreateSelect(II.getArgOperand(1), LI, II.getArgOperand(2));
}
return nullptr;
@@ -319,8 +318,8 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
// * Narrow width by halfs excluding zero/undef lanes
Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
Value *StorePtr = II.getArgOperand(1);
- Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
- auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
+ Align Alignment = II.getParamAlign(1).valueOrOne();
+ auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
if (!ConstMask)
return nullptr;
@@ -356,7 +355,7 @@ Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
// * Narrow width by halfs excluding zero/undef lanes
// * Vector incrementing address -> vector masked load
Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
- auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
+ auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(1));
if (!ConstMask)
return nullptr;
@@ -366,8 +365,7 @@ Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
if (ConstMask->isAllOnesValue())
if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
auto *VecTy = cast<VectorType>(II.getType());
- const Align Alignment =
- cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
+ const Align Alignment = II.getParamAlign(0).valueOrOne();
LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
Alignment, "load.scalar");
Value *Shuf =
@@ -384,7 +382,7 @@ Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
// * Narrow store width by halfs excluding zero/undef lanes
// * Vector incrementing address -> vector masked store
Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
- auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
+ auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
if (!ConstMask)
return nullptr;
@@ -397,8 +395,7 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
// scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
if (maskContainsAllOneOrUndef(ConstMask)) {
- Align Alignment =
- cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
+ Align Alignment = II.getParamAlign(1).valueOrOne();
StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
Alignment);
S->copyMetadata(II);
@@ -408,7 +405,7 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
// scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
// lastlane), ptr
if (ConstMask->isAllOnesValue()) {
- Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
+ Align Alignment = II.getParamAlign(1).valueOrOne();
VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
ElementCount VF = WideLoadTy->getElementCount();
Value *RunTimeVF = Builder.CreateElementCount(Builder.getInt32Ty(), VF);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index a8eb9b9..975498f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -4501,24 +4501,24 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (Value *V = foldSelectIntoAddConstant(SI, Builder))
return replaceInstUsesWith(SI, V);
- // select(mask, mload(,,mask,0), 0) -> mload(,,mask,0)
+ // select(mask, mload(ptr,mask,0), 0) -> mload(ptr,mask,0)
// Load inst is intentionally not checked for hasOneUse()
if (match(FalseVal, m_Zero()) &&
- (match(TrueVal, m_MaskedLoad(m_Value(), m_Value(), m_Specific(CondVal),
+ (match(TrueVal, m_MaskedLoad(m_Value(), m_Specific(CondVal),
m_CombineOr(m_Undef(), m_Zero()))) ||
- match(TrueVal, m_MaskedGather(m_Value(), m_Value(), m_Specific(CondVal),
+ match(TrueVal, m_MaskedGather(m_Value(), m_Specific(CondVal),
m_CombineOr(m_Undef(), m_Zero()))))) {
auto *MaskedInst = cast<IntrinsicInst>(TrueVal);
- if (isa<UndefValue>(MaskedInst->getArgOperand(3)))
- MaskedInst->setArgOperand(3, FalseVal /* Zero */);
+ if (isa<UndefValue>(MaskedInst->getArgOperand(2)))
+ MaskedInst->setArgOperand(2, FalseVal /* Zero */);
return replaceInstUsesWith(SI, MaskedInst);
}
Value *Mask;
if (match(TrueVal, m_Zero()) &&
- (match(FalseVal, m_MaskedLoad(m_Value(), m_Value(), m_Value(Mask),
+ (match(FalseVal, m_MaskedLoad(m_Value(), m_Value(Mask),
m_CombineOr(m_Undef(), m_Zero()))) ||
- match(FalseVal, m_MaskedGather(m_Value(), m_Value(), m_Value(Mask),
+ match(FalseVal, m_MaskedGather(m_Value(), m_Value(Mask),
m_CombineOr(m_Undef(), m_Zero())))) &&
(CondVal->getType() == Mask->getType())) {
// We can remove the select by ensuring the load zeros all lanes the
@@ -4531,8 +4531,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (CanMergeSelectIntoLoad) {
auto *MaskedInst = cast<IntrinsicInst>(FalseVal);
- if (isa<UndefValue>(MaskedInst->getArgOperand(3)))
- MaskedInst->setArgOperand(3, TrueVal /* Zero */);
+ if (isa<UndefValue>(MaskedInst->getArgOperand(2)))
+ MaskedInst->setArgOperand(2, TrueVal /* Zero */);
return replaceInstUsesWith(SI, MaskedInst);
}
}
@@ -4671,14 +4671,13 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
}
Value *MaskedLoadPtr;
- const APInt *MaskedLoadAlignment;
if (match(TrueVal, m_OneUse(m_MaskedLoad(m_Value(MaskedLoadPtr),
- m_APInt(MaskedLoadAlignment),
m_Specific(CondVal), m_Value()))))
return replaceInstUsesWith(
- SI, Builder.CreateMaskedLoad(TrueVal->getType(), MaskedLoadPtr,
- Align(MaskedLoadAlignment->getZExtValue()),
- CondVal, FalseVal));
+ SI, Builder.CreateMaskedLoad(
+ TrueVal->getType(), MaskedLoadPtr,
+ cast<IntrinsicInst>(TrueVal)->getParamAlign(0).valueOrOne(),
+ CondVal, FalseVal));
return nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index a330bb7..651e305 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1892,7 +1892,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// segfaults which didn't exist in the original program.
APInt DemandedPtrs(APInt::getAllOnes(VWidth)),
DemandedPassThrough(DemandedElts);
- if (auto *CMask = dyn_cast<Constant>(II->getOperand(2))) {
+ if (auto *CMask = dyn_cast<Constant>(II->getOperand(1))) {
for (unsigned i = 0; i < VWidth; i++) {
if (Constant *CElt = CMask->getAggregateElement(i)) {
if (CElt->isNullValue())
@@ -1905,7 +1905,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
if (II->getIntrinsicID() == Intrinsic::masked_gather)
simplifyAndSetOp(II, 0, DemandedPtrs, PoisonElts2);
- simplifyAndSetOp(II, 3, DemandedPassThrough, PoisonElts3);
+ simplifyAndSetOp(II, 2, DemandedPassThrough, PoisonElts3);
// Output elements are undefined if the element from both sources are.
// TODO: can strengthen via mask as well.
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 2646334..cb6ca72 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1494,11 +1494,8 @@ void AddressSanitizer::getInterestingMemoryOperands(
if (ignoreAccess(I, BasePtr))
return;
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
- MaybeAlign Alignment = Align(1);
- // Otherwise no alignment guarantees. We probably got Undef.
- if (auto *Op = dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
- Alignment = Op->getMaybeAlignValue();
- Value *Mask = CI->getOperand(2 + OpOffset);
+ MaybeAlign Alignment = CI->getParamAlign(0);
+ Value *Mask = CI->getOperand(1 + OpOffset);
Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask);
break;
}
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
index 3ae771a..3c0f185 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp
@@ -338,7 +338,7 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
}
auto *BasePtr = CI->getOperand(0 + OpOffset);
- Access.MaybeMask = CI->getOperand(2 + OpOffset);
+ Access.MaybeMask = CI->getOperand(1 + OpOffset);
Access.Addr = BasePtr;
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index eff6f0c..b6cbecb 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4191,10 +4191,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void handleMaskedGather(IntrinsicInst &I) {
IRBuilder<> IRB(&I);
Value *Ptrs = I.getArgOperand(0);
- const Align Alignment(
- cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
- Value *Mask = I.getArgOperand(2);
- Value *PassThru = I.getArgOperand(3);
+ const Align Alignment = I.getParamAlign(0).valueOrOne();
+ Value *Mask = I.getArgOperand(1);
+ Value *PassThru = I.getArgOperand(2);
Type *PtrsShadowTy = getShadowTy(Ptrs);
if (ClCheckAccessAddress) {
@@ -4230,9 +4229,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRB(&I);
Value *Values = I.getArgOperand(0);
Value *Ptrs = I.getArgOperand(1);
- const Align Alignment(
- cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
- Value *Mask = I.getArgOperand(3);
+ const Align Alignment = I.getParamAlign(1).valueOrOne();
+ Value *Mask = I.getArgOperand(2);
Type *PtrsShadowTy = getShadowTy(Ptrs);
if (ClCheckAccessAddress) {
@@ -4262,9 +4260,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRB(&I);
Value *V = I.getArgOperand(0);
Value *Ptr = I.getArgOperand(1);
- const Align Alignment(
- cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
- Value *Mask = I.getArgOperand(3);
+ const Align Alignment = I.getParamAlign(1).valueOrOne();
+ Value *Mask = I.getArgOperand(2);
Value *Shadow = getShadow(V);
if (ClCheckAccessAddress) {
@@ -4295,10 +4292,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void handleMaskedLoad(IntrinsicInst &I) {
IRBuilder<> IRB(&I);
Value *Ptr = I.getArgOperand(0);
- const Align Alignment(
- cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
- Value *Mask = I.getArgOperand(2);
- Value *PassThru = I.getArgOperand(3);
+ const Align Alignment = I.getParamAlign(0).valueOrOne();
+ Value *Mask = I.getArgOperand(1);
+ Value *PassThru = I.getArgOperand(2);
if (ClCheckAccessAddress) {
insertCheckShadowOf(Ptr, &I);
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 6141b6d..4ac1321 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -272,7 +272,7 @@ static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
// Masks.
// TODO: check that KillingII's mask is a superset of the DeadII's mask.
- if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
+ if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
return OW_Unknown;
} else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
// Masks.
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 2afa7b7..e30f306 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -1017,14 +1017,14 @@ private:
};
auto MaskOp = [](const IntrinsicInst *II) {
if (II->getIntrinsicID() == Intrinsic::masked_load)
- return II->getOperand(2);
+ return II->getOperand(1);
if (II->getIntrinsicID() == Intrinsic::masked_store)
- return II->getOperand(3);
+ return II->getOperand(2);
llvm_unreachable("Unexpected IntrinsicInst");
};
auto ThruOp = [](const IntrinsicInst *II) {
if (II->getIntrinsicID() == Intrinsic::masked_load)
- return II->getOperand(3);
+ return II->getOperand(2);
llvm_unreachable("Unexpected IntrinsicInst");
};
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 42db424..72e1131 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -2212,11 +2212,11 @@ bool GVNPass::processMaskedLoad(IntrinsicInst *I) {
if (!DepInst || !Dep.isLocal() || !Dep.isDef())
return false;
- Value *Mask = I->getOperand(2);
- Value *Passthrough = I->getOperand(3);
+ Value *Mask = I->getOperand(1);
+ Value *Passthrough = I->getOperand(2);
Value *StoreVal;
- if (!match(DepInst, m_MaskedStore(m_Value(StoreVal), m_Value(), m_Value(),
- m_Specific(Mask))) ||
+ if (!match(DepInst,
+ m_MaskedStore(m_Value(StoreVal), m_Value(), m_Specific(Mask))) ||
StoreVal->getType() != I->getType())
return false;
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
index b9534def..a06f832 100644
--- a/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -430,6 +430,7 @@ public:
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::PtrToInt:
+ case Instruction::PtrToAddr:
case Instruction::IntToPtr:
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
diff --git a/llvm/lib/Transforms/Scalar/InferAlignment.cpp b/llvm/lib/Transforms/Scalar/InferAlignment.cpp
index 995b803..39751c0 100644
--- a/llvm/lib/Transforms/Scalar/InferAlignment.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAlignment.cpp
@@ -45,25 +45,20 @@ static bool tryToImproveAlign(
switch (II->getIntrinsicID()) {
case Intrinsic::masked_load:
case Intrinsic::masked_store: {
- int AlignOpIdx = II->getIntrinsicID() == Intrinsic::masked_load ? 1 : 2;
- Value *PtrOp = II->getIntrinsicID() == Intrinsic::masked_load
- ? II->getArgOperand(0)
- : II->getArgOperand(1);
+ unsigned PtrOpIdx = II->getIntrinsicID() == Intrinsic::masked_load ? 0 : 1;
+ Value *PtrOp = II->getArgOperand(PtrOpIdx);
Type *Type = II->getIntrinsicID() == Intrinsic::masked_load
? II->getType()
: II->getArgOperand(0)->getType();
- Align OldAlign =
- cast<ConstantInt>(II->getArgOperand(AlignOpIdx))->getAlignValue();
+ Align OldAlign = II->getParamAlign(PtrOpIdx).valueOrOne();
Align PrefAlign = DL.getPrefTypeAlign(Type);
Align NewAlign = Fn(PtrOp, OldAlign, PrefAlign);
- if (NewAlign <= OldAlign ||
- NewAlign.value() > std::numeric_limits<uint32_t>().max())
+ if (NewAlign <= OldAlign)
return false;
- Value *V =
- ConstantInt::get(Type::getInt32Ty(II->getContext()), NewAlign.value());
- II->setOperand(AlignOpIdx, V);
+ II->addParamAttr(PtrOpIdx,
+ Attribute::getWithAlignment(II->getContext(), NewAlign));
return true;
}
default:
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 28ae4f0..9aaf6a5 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -43,6 +43,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <cassert>
#include <utility>
@@ -1872,6 +1873,51 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
}
+/// This deals with a corner case when a LCSSA phi node appears in a non-exit
+/// block: the outer loop latch block does not need to be exit block of the
+/// inner loop. Consider a loop that was in LCSSA form, but then some
+/// transformation like loop-unswitch comes along and creates an empty block,
+/// where BB5 in this example is the outer loop latch block:
+///
+/// BB4:
+/// br label %BB5
+/// BB5:
+/// %old.cond.lcssa = phi i16 [ %cond, %BB4 ]
+/// br outer.header
+///
+/// Interchange then brings it in LCSSA form again resulting in this chain of
+/// single-input phi nodes:
+///
+/// BB4:
+/// %new.cond.lcssa = phi i16 [ %cond, %BB3 ]
+/// br label %BB5
+/// BB5:
+/// %old.cond.lcssa = phi i16 [ %new.cond.lcssa, %BB4 ]
+///
+/// The problem is that interchange can reoder blocks BB4 and BB5 placing the
+/// use before the def if we don't check this. The solution is to simplify
+/// lcssa phi nodes (remove) if they appear in non-exit blocks.
+///
+static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop) {
+ BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
+ BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
+
+ // Do not modify lcssa phis where they actually belong, i.e. in exit blocks.
+ if (OuterLoopLatch == InnerLoopExit)
+ return;
+
+ // Collect and remove phis in non-exit blocks if they have 1 input.
+ SmallVector<PHINode *, 8> Phis(
+ llvm::make_pointer_range(OuterLoopLatch->phis()));
+ for (PHINode *Phi : Phis) {
+ assert(Phi->getNumIncomingValues() == 1 && "Single input phi expected");
+ LLVM_DEBUG(dbgs() << "Removing 1-input phi in non-exit block: " << *Phi
+ << "\n");
+ Phi->replaceAllUsesWith(Phi->getIncomingValue(0));
+ Phi->eraseFromParent();
+ }
+}
+
bool LoopInterchangeTransform::adjustLoopBranches() {
LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
std::vector<DominatorTree::UpdateType> DTUpdates;
@@ -1882,6 +1928,9 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
InnerLoopPreHeader && "Guaranteed by loop-simplify form");
+
+ simplifyLCSSAPhis(OuterLoop, InnerLoop);
+
// Ensure that both preheaders do not contain PHI nodes and have single
// predecessors. This allows us to move them easily. We use
// InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
diff --git a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
index 42d6680..146e7d1 100644
--- a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
+++ b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
@@ -111,7 +111,7 @@ static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth,
}
// Translate a masked load intrinsic like
-// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
+// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr,
// <16 x i1> %mask, <16 x i32> %passthru)
// to a chain of basic blocks, with loading element one-by-one if
// the appropriate mask bit is set
@@ -146,11 +146,10 @@ static void scalarizeMaskedLoad(const DataLayout &DL, bool HasBranchDivergence,
CallInst *CI, DomTreeUpdater *DTU,
bool &ModifiedDT) {
Value *Ptr = CI->getArgOperand(0);
- Value *Alignment = CI->getArgOperand(1);
- Value *Mask = CI->getArgOperand(2);
- Value *Src0 = CI->getArgOperand(3);
+ Value *Mask = CI->getArgOperand(1);
+ Value *Src0 = CI->getArgOperand(2);
- const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
+ const Align AlignVal = CI->getParamAlign(0).valueOrOne();
VectorType *VecType = cast<FixedVectorType>(CI->getType());
Type *EltTy = VecType->getElementType();
@@ -290,7 +289,7 @@ static void scalarizeMaskedLoad(const DataLayout &DL, bool HasBranchDivergence,
}
// Translate a masked store intrinsic, like
-// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
+// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr,
// <16 x i1> %mask)
// to a chain of basic blocks, that stores element one-by-one if
// the appropriate mask bit is set
@@ -320,10 +319,9 @@ static void scalarizeMaskedStore(const DataLayout &DL, bool HasBranchDivergence,
bool &ModifiedDT) {
Value *Src = CI->getArgOperand(0);
Value *Ptr = CI->getArgOperand(1);
- Value *Alignment = CI->getArgOperand(2);
- Value *Mask = CI->getArgOperand(3);
+ Value *Mask = CI->getArgOperand(2);
- const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
+ const Align AlignVal = CI->getParamAlign(1).valueOrOne();
auto *VecType = cast<VectorType>(Src->getType());
Type *EltTy = VecType->getElementType();
@@ -472,9 +470,8 @@ static void scalarizeMaskedGather(const DataLayout &DL,
bool HasBranchDivergence, CallInst *CI,
DomTreeUpdater *DTU, bool &ModifiedDT) {
Value *Ptrs = CI->getArgOperand(0);
- Value *Alignment = CI->getArgOperand(1);
- Value *Mask = CI->getArgOperand(2);
- Value *Src0 = CI->getArgOperand(3);
+ Value *Mask = CI->getArgOperand(1);
+ Value *Src0 = CI->getArgOperand(2);
auto *VecType = cast<FixedVectorType>(CI->getType());
Type *EltTy = VecType->getElementType();
@@ -483,7 +480,7 @@ static void scalarizeMaskedGather(const DataLayout &DL,
Instruction *InsertPt = CI;
BasicBlock *IfBlock = CI->getParent();
Builder.SetInsertPoint(InsertPt);
- MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
+ Align AlignVal = CI->getParamAlign(0).valueOrOne();
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
@@ -608,8 +605,7 @@ static void scalarizeMaskedScatter(const DataLayout &DL,
DomTreeUpdater *DTU, bool &ModifiedDT) {
Value *Src = CI->getArgOperand(0);
Value *Ptrs = CI->getArgOperand(1);
- Value *Alignment = CI->getArgOperand(2);
- Value *Mask = CI->getArgOperand(3);
+ Value *Mask = CI->getArgOperand(2);
auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
@@ -623,7 +619,7 @@ static void scalarizeMaskedScatter(const DataLayout &DL,
Builder.SetInsertPoint(InsertPt);
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
- MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
+ Align AlignVal = CI->getParamAlign(1).valueOrOne();
unsigned VectorWidth = SrcFVTy->getNumElements();
// Shorten the way if the mask is a vector of constants.
@@ -1125,8 +1121,7 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
case Intrinsic::masked_load:
// Scalarize unsupported vector masked load
if (TTI.isLegalMaskedLoad(
- CI->getType(),
- cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue(),
+ CI->getType(), CI->getParamAlign(0).valueOrOne(),
cast<PointerType>(CI->getArgOperand(0)->getType())
->getAddressSpace()))
return false;
@@ -1135,18 +1130,15 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
case Intrinsic::masked_store:
if (TTI.isLegalMaskedStore(
CI->getArgOperand(0)->getType(),
- cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue(),
+ CI->getParamAlign(1).valueOrOne(),
cast<PointerType>(CI->getArgOperand(1)->getType())
->getAddressSpace()))
return false;
scalarizeMaskedStore(DL, HasBranchDivergence, CI, DTU, ModifiedDT);
return true;
case Intrinsic::masked_gather: {
- MaybeAlign MA =
- cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue();
+ Align Alignment = CI->getParamAlign(0).valueOrOne();
Type *LoadTy = CI->getType();
- Align Alignment = DL.getValueOrABITypeAlignment(MA,
- LoadTy->getScalarType());
if (TTI.isLegalMaskedGather(LoadTy, Alignment) &&
!TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment))
return false;
@@ -1154,11 +1146,8 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
return true;
}
case Intrinsic::masked_scatter: {
- MaybeAlign MA =
- cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue();
+ Align Alignment = CI->getParamAlign(1).valueOrOne();
Type *StoreTy = CI->getArgOperand(0)->getType();
- Align Alignment = DL.getValueOrABITypeAlignment(MA,
- StoreTy->getScalarType());
if (TTI.isLegalMaskedScatter(StoreTy, Alignment) &&
!TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy),
Alignment))
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index fa66a03..23e1243 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -227,6 +227,7 @@ static InstructionCost ComputeSpeculationCost(const Instruction *I,
case Instruction::Call:
case Instruction::BitCast:
case Instruction::PtrToInt:
+ case Instruction::PtrToAddr:
case Instruction::IntToPtr:
case Instruction::AddrSpaceCast:
case Instruction::FPToUI:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 7651ba1..3fed003 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -325,6 +325,8 @@ public:
VPIRFlags Flags;
if (Opcode == Instruction::Trunc)
Flags = VPIRFlags::TruncFlagsTy(false, false);
+ else if (Opcode == Instruction::ZExt)
+ Flags = VPIRFlags::NonNegFlagsTy(false);
return tryInsertInstruction(
new VPWidenCastRecipe(Opcode, Op, ResultTy, Flags));
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 280eb20..febdc54 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7192,7 +7192,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
// cost model is complete for better cost estimates.
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF);
- VPlanTransforms::runPass(VPlanTransforms::materializeBuildVectors, BestVPlan);
+ VPlanTransforms::runPass(VPlanTransforms::materializePacksAndUnpacks,
+ BestVPlan);
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
bool HasBranchWeights =
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9cd52da..3f18bd7 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5343,7 +5343,7 @@ private:
unsigned &OpCnt =
OrderedEntriesCount.try_emplace(TE, 0).first->getSecond();
EdgeInfo EI(TE, U.getOperandNo());
- if (!getScheduleCopyableData(EI, Op) && OpCnt < NumOps)
+ if (!getScheduleCopyableData(EI, Op))
continue;
// Found copyable operand - continue.
++OpCnt;
@@ -10546,8 +10546,11 @@ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
PoisonValue::get(UniqueValues.front()->getType()));
// Check that extended with poisons/copyable operations are still valid
// for vectorization (div/rem are not allowed).
- if (!S.areInstructionsWithCopyableElements() &&
- !getSameOpcode(PaddedUniqueValues, TLI).valid()) {
+ if ((!S.areInstructionsWithCopyableElements() &&
+ !getSameOpcode(PaddedUniqueValues, TLI).valid()) ||
+ (S.areInstructionsWithCopyableElements() && S.isMulDivLikeOp() &&
+ (S.getMainOp()->isIntDivRem() || S.getMainOp()->isFPDivRem() ||
+ isa<CallInst>(S.getMainOp())))) {
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
ReuseShuffleIndices.clear();
return false;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 84d2ea6..fed04eb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1007,6 +1007,11 @@ public:
/// Creates a fixed-width vector containing all operands. The number of
/// operands matches the vector element count.
BuildVector,
+ /// Extracts all lanes from its (non-scalable) vector operand. This is an
+ /// abstract VPInstruction whose single defined VPValue represents VF
+ /// scalars extracted from a vector, to be replaced by VF ExtractElement
+ /// VPInstructions.
+ Unpack,
/// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
/// where one of (x,y) is loop invariant, and both x and y are integer type.
ComputeAnyOfResult,
@@ -2715,6 +2720,15 @@ public:
return R && classof(R);
}
+ static inline bool classof(const VPValue *VPV) {
+ const VPRecipeBase *R = VPV->getDefiningRecipe();
+ return R && classof(R);
+ }
+
+ static inline bool classof(const VPSingleDefRecipe *R) {
+ return classof(static_cast<const VPRecipeBase *>(R));
+ }
+
/// Generate the reduction in the loop.
void execute(VPTransformState &State) override;
@@ -3100,6 +3114,9 @@ public:
/// Returns true if this expression contains recipes that may have side
/// effects.
bool mayHaveSideEffects() const;
+
+ /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
+ bool isSingleScalar() const;
};
/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 7e074c1..80a2e4b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -110,6 +110,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::AnyOf:
case VPInstruction::BuildStructVector:
case VPInstruction::BuildVector:
+ case VPInstruction::Unpack:
return SetResultTyFromOp();
case VPInstruction::ExtractLane:
return inferScalarType(R->getOperand(1));
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index d8203e2..b5b98c6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -388,6 +388,12 @@ m_ExtractLastElement(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0);
}
+template <typename Op0_t, typename Op1_t>
+inline VPInstruction_match<Instruction::ExtractElement, Op0_t, Op1_t>
+m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1) {
+ return m_VPInstruction<Instruction::ExtractElement>(Op0, Op1);
+}
+
template <typename Op0_t>
inline VPInstruction_match<VPInstruction::ExtractLastLanePerPart, Op0_t>
m_ExtractLastLanePerPart(const Op0_t &Op0) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index d1e67e6b..1f1b42b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -515,6 +515,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::FirstActiveLane:
case VPInstruction::Not:
+ case VPInstruction::Unpack:
return 1;
case Instruction::ICmp:
case Instruction::FCmp:
@@ -1246,6 +1247,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::StepVector:
case VPInstruction::ReductionStartVector:
case VPInstruction::VScale:
+ case VPInstruction::Unpack:
return false;
default:
return true;
@@ -1290,7 +1292,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
case VPInstruction::WidePtrAdd:
- return Op == getOperand(0);
+ // WidePtrAdd supports scalar and vector base addresses.
+ return false;
case VPInstruction::ComputeAnyOfResult:
case VPInstruction::ComputeFindIVResult:
return Op == getOperand(1);
@@ -1417,6 +1420,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ResumeForEpilogue:
O << "resume-for-epilogue";
break;
+ case VPInstruction::Unpack:
+ O << "unpack";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
@@ -2888,6 +2894,13 @@ bool VPExpressionRecipe::mayHaveSideEffects() const {
return false;
}
+bool VPExpressionRecipe::isSingleScalar() const {
+ // Cannot use vputils::isSingleScalar(), because all external operands
+ // of the expression will be live-ins while bundled.
+ return isa<VPReductionRecipe>(ExpressionRecipes.back()) &&
+ !isa<VPPartialReductionRecipe>(ExpressionRecipes.back());
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
@@ -3149,7 +3162,17 @@ static bool isUsedByLoadStoreAddress(const VPUser *V) {
while (!WorkList.empty()) {
auto *Cur = dyn_cast<VPSingleDefRecipe>(WorkList.pop_back_val());
- if (!Cur || !Seen.insert(Cur).second || isa<VPBlendRecipe>(Cur))
+ if (!Cur || !Seen.insert(Cur).second)
+ continue;
+
+ auto *Blend = dyn_cast<VPBlendRecipe>(Cur);
+ // Skip blends that use V only through a compare by checking if any incoming
+ // value was already visited.
+ if (Blend && none_of(seq<unsigned>(0, Blend->getNumIncomingValues()),
+ [&](unsigned I) {
+ return Seen.contains(
+ Blend->getIncomingValue(I)->getDefiningRecipe());
+ }))
continue;
for (VPUser *U : Cur->users()) {
@@ -3170,7 +3193,13 @@ static bool isUsedByLoadStoreAddress(const VPUser *V) {
}
}
- append_range(WorkList, cast<VPSingleDefRecipe>(Cur)->users());
+ // The legacy cost model only supports scalarization loads/stores with phi
+ // addresses, if the phi is directly used as load/store address. Don't
+ // traverse further for Blends.
+ if (Blend)
+ continue;
+
+ append_range(WorkList, Cur->users());
}
return false;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index f5f616f..e060e70 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -106,7 +106,7 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
return false;
NewRecipe = new VPWidenIntrinsicRecipe(
*CI, getVectorIntrinsicIDForCall(CI, &TLI),
- {Ingredient.op_begin(), Ingredient.op_end() - 1}, CI->getType(),
+ drop_end(Ingredient.operands()), CI->getType(),
CI->getDebugLoc());
} else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands());
@@ -356,8 +356,7 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,
// Replace predicated replicate recipe with a replicate recipe without a
// mask but in the replicate region.
auto *RecipeWithoutMask = new VPReplicateRecipe(
- PredRecipe->getUnderlyingInstr(),
- make_range(PredRecipe->op_begin(), std::prev(PredRecipe->op_end())),
+ PredRecipe->getUnderlyingInstr(), drop_end(PredRecipe->operands()),
PredRecipe->isSingleScalar(), nullptr /*Mask*/, *PredRecipe);
auto *Pred =
Plan.createVPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask);
@@ -939,7 +938,7 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
continue;
if (!isDeadRecipe(*R))
continue;
- WorkList.append(R->op_begin(), R->op_end());
+ append_range(WorkList, R->operands());
R->eraseFromParent();
}
}
@@ -1224,6 +1223,13 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
}
+ uint64_t Idx;
+ if (match(&R, m_ExtractElement(m_BuildVector(), m_ConstantInt(Idx)))) {
+ auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
+ Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
+ return;
+ }
+
if (auto *Phi = dyn_cast<VPPhi>(Def)) {
if (Phi->getNumOperands() == 1)
Phi->replaceAllUsesWith(Phi->getOperand(0));
@@ -2006,7 +2012,7 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
.Case<VPWidenIntrinsicRecipe>([](auto *I) {
return std::make_pair(true, I->getVectorIntrinsicID());
})
- .Case<VPVectorPointerRecipe>([](auto *I) {
+ .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) {
// For recipes that do not directly map to LLVM IR instructions,
// assign opcodes after the last VPInstruction opcode (which is also
// after the last IR Instruction opcode), based on the VPDefID.
@@ -2083,6 +2089,15 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
LFlags->getPredicate() !=
cast<VPRecipeWithIRFlags>(R)->getPredicate())
return false;
+ // Recipes in replicate regions implicitly depend on predicate. If either
+ // recipe is in a replicate region, only consider them equal if both have
+ // the same parent.
+ const VPRegionBlock *RegionL = L->getParent()->getParent();
+ const VPRegionBlock *RegionR = R->getParent()->getParent();
+ if (((RegionL && RegionL->isReplicator()) ||
+ (RegionR && RegionR->isReplicator())) &&
+ L->getParent() != R->getParent())
+ return false;
const VPlan *Plan = L->getParent()->getPlan();
VPTypeAnalysis TypeInfo(*Plan);
return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
@@ -3780,7 +3795,7 @@ void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan,
BTC->replaceAllUsesWith(TCMO);
}
-void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
+void VPlanTransforms::materializePacksAndUnpacks(VPlan &Plan) {
if (Plan.hasScalarVFOnly())
return;
@@ -3828,6 +3843,50 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
});
}
}
+
+ // Create explicit VPInstructions to convert vectors to scalars. The current
+ // implementation is conservative - it may miss some cases that may or may not
+ // be vector values. TODO: introduce Unpacks speculatively - remove them later
+ // if they are known to operate on scalar values.
+ for (VPBasicBlock *VPBB : VPBBsInsideLoopRegion) {
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+ if (isa<VPReplicateRecipe, VPInstruction, VPScalarIVStepsRecipe,
+ VPDerivedIVRecipe, VPCanonicalIVPHIRecipe>(&R))
+ continue;
+ for (VPValue *Def : R.definedValues()) {
+ // Skip recipes that are single-scalar or only have their first lane
+ // used.
+ // TODO: The Defs skipped here may or may not be vector values.
+ // Introduce Unpacks, and remove them later, if they are guaranteed to
+ // produce scalar values.
+ if (vputils::isSingleScalar(Def) || vputils::onlyFirstLaneUsed(Def))
+ continue;
+
+ // At the moment, we create unpacks only for scalar users outside
+ // replicate regions. Recipes inside replicate regions still extract the
+ // required lanes implicitly.
+ // TODO: Remove once replicate regions are unrolled completely.
+ auto IsCandidateUnpackUser = [Def](VPUser *U) {
+ VPRegionBlock *ParentRegion =
+ cast<VPRecipeBase>(U)->getParent()->getParent();
+ return U->usesScalars(Def) &&
+ (!ParentRegion || !ParentRegion->isReplicator());
+ };
+ if (none_of(Def->users(), IsCandidateUnpackUser))
+ continue;
+
+ auto *Unpack = new VPInstruction(VPInstruction::Unpack, {Def});
+ if (R.isPhi())
+ Unpack->insertBefore(*VPBB, VPBB->getFirstNonPhi());
+ else
+ Unpack->insertAfter(&R);
+ Def->replaceUsesWithIf(Unpack,
+ [&IsCandidateUnpackUser](VPUser &U, unsigned) {
+ return IsCandidateUnpackUser(&U);
+ });
+ }
+ }
+ }
}
void VPlanTransforms::materializeVectorTripCount(VPlan &Plan,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 5a8a2bb..b28559b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -325,9 +325,10 @@ struct VPlanTransforms {
static void materializeBackedgeTakenCount(VPlan &Plan,
VPBasicBlock *VectorPH);
- /// Add explicit Build[Struct]Vector recipes that combine multiple scalar
- /// values into single vectors.
- static void materializeBuildVectors(VPlan &Plan);
+ /// Add explicit Build[Struct]Vector recipes to Pack multiple scalar values
+ /// into vectors and Unpack recipes to extract scalars from vectors as
+ /// needed.
+ static void materializePacksAndUnpacks(VPlan &Plan);
/// Materialize VF and VFxUF to be computed explicitly using VPInstructions.
static void materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 5aeda3e..cfd1a74 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -465,10 +465,21 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe or
/// VPInstruction) for lane \p Lane. Use \p Def2LaneDefs to look up scalar
/// definitions for operands of \DefR.
-static VPRecipeWithIRFlags *
+static VPValue *
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
VPRecipeWithIRFlags *DefR, VPLane Lane,
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
+ VPValue *Op;
+ if (match(DefR, m_VPInstruction<VPInstruction::Unpack>(m_VPValue(Op)))) {
+ auto LaneDefs = Def2LaneDefs.find(Op);
+ if (LaneDefs != Def2LaneDefs.end())
+ return LaneDefs->second[Lane.getKnownLane()];
+
+ VPValue *Idx =
+ Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane()));
+ return Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
+ }
+
// Collect the operands at Lane, creating extracts as needed.
SmallVector<VPValue *> NewOps;
for (VPValue *Op : DefR->operands()) {
@@ -480,6 +491,10 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
continue;
}
if (Lane.getKind() == VPLane::Kind::ScalableLast) {
+ // Look through mandatory Unpack.
+ [[maybe_unused]] bool Matched =
+ match(Op, m_VPInstruction<VPInstruction::Unpack>(m_VPValue(Op)));
+ assert(Matched && "original op must have been Unpack");
NewOps.push_back(
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
continue;
@@ -547,7 +562,8 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
(isa<VPReplicateRecipe>(&R) &&
cast<VPReplicateRecipe>(&R)->isSingleScalar()) ||
(isa<VPInstruction>(&R) &&
- !cast<VPInstruction>(&R)->doesGeneratePerAllLanes()))
+ !cast<VPInstruction>(&R)->doesGeneratePerAllLanes() &&
+ cast<VPInstruction>(&R)->getOpcode() != VPInstruction::Unpack))
continue;
auto *DefR = cast<VPRecipeWithIRFlags>(&R);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 8b1b0e5..10801c0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -113,12 +113,12 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
return TypeSwitch<const VPRecipeBase *, bool>(R)
.Case<VPDerivedIVRecipe>([](const auto *R) { return true; })
.Case<VPReplicateRecipe>([](const auto *R) {
- // Loads and stores that are uniform across VF lanes are handled by
- // VPReplicateRecipe.IsUniform. They are also uniform across UF parts if
- // all their operands are invariant.
- // TODO: Further relax the restrictions.
+ // Be conservative about side-effects, except for the
+ // known-side-effecting assumes and stores, which we know will be
+ // uniform.
return R->isSingleScalar() &&
- (isa<LoadInst, StoreInst>(R->getUnderlyingValue())) &&
+ (!R->mayHaveSideEffects() ||
+ isa<AssumeInst, StoreInst>(R->getUnderlyingInstr())) &&
all_of(R->operands(), isUniformAcrossVFsAndUFs);
})
.Case<VPInstruction>([](const auto *VPI) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 9a2497e..840a5b9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -84,6 +84,12 @@ inline bool isSingleScalar(const VPValue *VPV) {
return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
(PreservesUniformity(VPI->getOpcode()) &&
all_of(VPI->operands(), isSingleScalar));
+ if (isa<VPPartialReductionRecipe>(VPV))
+ return false;
+ if (isa<VPReductionRecipe>(VPV))
+ return true;
+ if (auto *Expr = dyn_cast<VPExpressionRecipe>(VPV))
+ return Expr->isSingleScalar();
// VPExpandSCEVRecipes must be placed in the entry and are alway uniform.
return isa<VPExpandSCEVRecipe>(VPV);
diff --git a/llvm/lib/WindowsDriver/MSVCPaths.cpp b/llvm/lib/WindowsDriver/MSVCPaths.cpp
index 1fc8974..09468da 100644
--- a/llvm/lib/WindowsDriver/MSVCPaths.cpp
+++ b/llvm/lib/WindowsDriver/MSVCPaths.cpp
@@ -259,9 +259,7 @@ static bool getSystemRegistryString(const char *keyPath, const char *valueName,
#endif // _WIN32
}
-namespace llvm {
-
-const char *archToWindowsSDKArch(Triple::ArchType Arch) {
+const char *llvm::archToWindowsSDKArch(Triple::ArchType Arch) {
switch (Arch) {
case Triple::ArchType::x86:
return "x86";
@@ -277,7 +275,7 @@ const char *archToWindowsSDKArch(Triple::ArchType Arch) {
}
}
-const char *archToLegacyVCArch(Triple::ArchType Arch) {
+const char *llvm::archToLegacyVCArch(Triple::ArchType Arch) {
switch (Arch) {
case Triple::ArchType::x86:
// x86 is default in legacy VC toolchains.
@@ -295,7 +293,7 @@ const char *archToLegacyVCArch(Triple::ArchType Arch) {
}
}
-const char *archToDevDivInternalArch(Triple::ArchType Arch) {
+const char *llvm::archToDevDivInternalArch(Triple::ArchType Arch) {
switch (Arch) {
case Triple::ArchType::x86:
return "i386";
@@ -311,8 +309,9 @@ const char *archToDevDivInternalArch(Triple::ArchType Arch) {
}
}
-bool appendArchToWindowsSDKLibPath(int SDKMajor, SmallString<128> LibPath,
- Triple::ArchType Arch, std::string &path) {
+bool llvm::appendArchToWindowsSDKLibPath(int SDKMajor, SmallString<128> LibPath,
+ Triple::ArchType Arch,
+ std::string &path) {
if (SDKMajor >= 8) {
sys::path::append(LibPath, archToWindowsSDKArch(Arch));
} else {
@@ -336,10 +335,11 @@ bool appendArchToWindowsSDKLibPath(int SDKMajor, SmallString<128> LibPath,
return true;
}
-std::string getSubDirectoryPath(SubDirectoryType Type, ToolsetLayout VSLayout,
- const std::string &VCToolChainPath,
- Triple::ArchType TargetArch,
- StringRef SubdirParent) {
+std::string llvm::getSubDirectoryPath(SubDirectoryType Type,
+ ToolsetLayout VSLayout,
+ const std::string &VCToolChainPath,
+ Triple::ArchType TargetArch,
+ StringRef SubdirParent) {
const char *SubdirName;
const char *IncludeName;
switch (VSLayout) {
@@ -390,19 +390,22 @@ std::string getSubDirectoryPath(SubDirectoryType Type, ToolsetLayout VSLayout,
return std::string(Path);
}
-bool useUniversalCRT(ToolsetLayout VSLayout, const std::string &VCToolChainPath,
- Triple::ArchType TargetArch, vfs::FileSystem &VFS) {
+bool llvm::useUniversalCRT(ToolsetLayout VSLayout,
+ const std::string &VCToolChainPath,
+ Triple::ArchType TargetArch, vfs::FileSystem &VFS) {
SmallString<128> TestPath(getSubDirectoryPath(
SubDirectoryType::Include, VSLayout, VCToolChainPath, TargetArch));
sys::path::append(TestPath, "stdlib.h");
return !VFS.exists(TestPath);
}
-bool getWindowsSDKDir(vfs::FileSystem &VFS, std::optional<StringRef> WinSdkDir,
- std::optional<StringRef> WinSdkVersion,
- std::optional<StringRef> WinSysRoot, std::string &Path,
- int &Major, std::string &WindowsSDKIncludeVersion,
- std::string &WindowsSDKLibVersion) {
+bool llvm::getWindowsSDKDir(vfs::FileSystem &VFS,
+ std::optional<StringRef> WinSdkDir,
+ std::optional<StringRef> WinSdkVersion,
+ std::optional<StringRef> WinSysRoot,
+ std::string &Path, int &Major,
+ std::string &WindowsSDKIncludeVersion,
+ std::string &WindowsSDKLibVersion) {
// Trust /winsdkdir and /winsdkversion if present.
if (getWindowsSDKDirViaCommandLine(VFS, WinSdkDir, WinSdkVersion, WinSysRoot,
Path, Major, WindowsSDKIncludeVersion)) {
@@ -460,11 +463,11 @@ bool getWindowsSDKDir(vfs::FileSystem &VFS, std::optional<StringRef> WinSdkDir,
return false;
}
-bool getUniversalCRTSdkDir(vfs::FileSystem &VFS,
- std::optional<StringRef> WinSdkDir,
- std::optional<StringRef> WinSdkVersion,
- std::optional<StringRef> WinSysRoot,
- std::string &Path, std::string &UCRTVersion) {
+bool llvm::getUniversalCRTSdkDir(vfs::FileSystem &VFS,
+ std::optional<StringRef> WinSdkDir,
+ std::optional<StringRef> WinSdkVersion,
+ std::optional<StringRef> WinSysRoot,
+ std::string &Path, std::string &UCRTVersion) {
// If /winsdkdir is passed, use it as location for the UCRT too.
// FIXME: Should there be a dedicated /ucrtdir to override /winsdkdir?
int Major;
@@ -491,11 +494,11 @@ bool getUniversalCRTSdkDir(vfs::FileSystem &VFS,
return getWindows10SDKVersionFromPath(VFS, Path, UCRTVersion);
}
-bool findVCToolChainViaCommandLine(vfs::FileSystem &VFS,
- std::optional<StringRef> VCToolsDir,
- std::optional<StringRef> VCToolsVersion,
- std::optional<StringRef> WinSysRoot,
- std::string &Path, ToolsetLayout &VSLayout) {
+bool llvm::findVCToolChainViaCommandLine(
+ vfs::FileSystem &VFS, std::optional<StringRef> VCToolsDir,
+ std::optional<StringRef> VCToolsVersion,
+ std::optional<StringRef> WinSysRoot, std::string &Path,
+ ToolsetLayout &VSLayout) {
// Don't validate the input; trust the value supplied by the user.
// The primary motivation is to prevent unnecessary file and registry access.
if (VCToolsDir || WinSysRoot) {
@@ -518,8 +521,9 @@ bool findVCToolChainViaCommandLine(vfs::FileSystem &VFS,
return false;
}
-bool findVCToolChainViaEnvironment(vfs::FileSystem &VFS, std::string &Path,
- ToolsetLayout &VSLayout) {
+bool llvm::findVCToolChainViaEnvironment(vfs::FileSystem &VFS,
+ std::string &Path,
+ ToolsetLayout &VSLayout) {
// These variables are typically set by vcvarsall.bat
// when launching a developer command prompt.
if (std::optional<std::string> VCToolsInstallDir =
@@ -627,9 +631,9 @@ bool findVCToolChainViaEnvironment(vfs::FileSystem &VFS, std::string &Path,
return false;
}
-bool findVCToolChainViaSetupConfig(vfs::FileSystem &VFS,
- std::optional<StringRef> VCToolsVersion,
- std::string &Path, ToolsetLayout &VSLayout) {
+bool llvm::findVCToolChainViaSetupConfig(
+ vfs::FileSystem &VFS, std::optional<StringRef> VCToolsVersion,
+ std::string &Path, ToolsetLayout &VSLayout) {
#if !defined(USE_MSVC_SETUP_API)
return false;
#else
@@ -724,7 +728,8 @@ bool findVCToolChainViaSetupConfig(vfs::FileSystem &VFS,
#endif
}
-bool findVCToolChainViaRegistry(std::string &Path, ToolsetLayout &VSLayout) {
+bool llvm::findVCToolChainViaRegistry(std::string &Path,
+ ToolsetLayout &VSLayout) {
std::string VSInstallPath;
if (getSystemRegistryString(R"(SOFTWARE\Microsoft\VisualStudio\$VERSION)",
"InstallDir", VSInstallPath, nullptr) ||
@@ -744,5 +749,3 @@ bool findVCToolChainViaRegistry(std::string &Path, ToolsetLayout &VSLayout) {
}
return false;
}
-
-} // namespace llvm