aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/DependenceAnalysis.cpp37
-rw-r--r--llvm/lib/Analysis/MemoryDependenceAnalysis.cpp10
-rw-r--r--llvm/lib/Analysis/MemoryLocation.cpp2
-rw-r--r--llvm/lib/Analysis/StackLifetime.cpp2
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp13
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp13
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp18
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp18
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp26
-rw-r--r--llvm/lib/IR/IRBuilder.cpp20
-rw-r--r--llvm/lib/IR/Verifier.cpp2
-rw-r--r--llvm/lib/MC/MCObjectStreamer.cpp2
-rw-r--r--llvm/lib/MC/MCSection.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp10
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td10
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64StackTagging.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp13
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp140
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h3
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp3
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp17
-rw-r--r--llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp6
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp30
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp29
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp14
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h3
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroFrame.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/ExpandVariadics.cpp21
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp48
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp20
-rw-r--r--llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp16
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp27
-rw-r--r--llvm/lib/Transforms/Utils/CodeExtractor.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp30
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp2
47 files changed, 391 insertions, 262 deletions
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 256befa..835e270 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -1074,7 +1074,7 @@ bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X,
/// Compare to see if S is less than Size, using
///
-/// isKnownNegative(S - max(Size, 1))
+/// isKnownNegative(S - Size)
///
/// with some extra checking if S is an AddRec and we can prove less-than using
/// the loop bounds.
@@ -1090,21 +1090,34 @@ bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const {
Size = SE->getTruncateOrZeroExtend(Size, MaxType);
// Special check for addrecs using BE taken count
- const SCEV *Bound = SE->getMinusSCEV(S, Size);
- if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) {
- if (AddRec->isAffine()) {
+ if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S))
+ if (AddRec->isAffine() && AddRec->hasNoSignedWrap()) {
const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop());
- if (!isa<SCEVCouldNotCompute>(BECount)) {
- const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE);
- if (SE->isKnownNegative(Limit))
- return true;
- }
+ const SCEV *Start = AddRec->getStart();
+ const SCEV *Step = AddRec->getStepRecurrence(*SE);
+ const SCEV *End = AddRec->evaluateAtIteration(BECount, *SE);
+ const SCEV *Diff0 = SE->getMinusSCEV(Start, Size);
+ const SCEV *Diff1 = SE->getMinusSCEV(End, Size);
+
+ // If the value of Step is non-negative and the AddRec is non-wrap, it
+ // reaches its maximum at the last iteration. So it's enouth to check
+ // whether End - Size is negative.
+ if (SE->isKnownNonNegative(Step) && SE->isKnownNegative(Diff1))
+ return true;
+
+ // If the value of Step is non-positive and the AddRec is non-wrap, the
+ // initial value is its maximum.
+ if (SE->isKnownNonPositive(Step) && SE->isKnownNegative(Diff0))
+ return true;
+
+ // Even if we don't know the sign of Step, either Start or End must be
+ // the maximum value of the AddRec since it is non-wrap.
+ if (SE->isKnownNegative(Diff0) && SE->isKnownNegative(Diff1))
+ return true;
}
- }
// Check using normal isKnownNegative
- const SCEV *LimitedBound =
- SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType())));
+ const SCEV *LimitedBound = SE->getMinusSCEV(S, Size);
return SE->isKnownNegative(LimitedBound);
}
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 2b0f212..67c2cfa 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -150,6 +150,10 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
+ Loc = MemoryLocation::getForArgument(II, 0, TLI);
+ // These intrinsics don't really modify the memory, but returning Mod
+ // will allow them to be handled conservatively.
+ return ModRefInfo::Mod;
case Intrinsic::invariant_start:
Loc = MemoryLocation::getForArgument(II, 1, TLI);
// These intrinsics don't really modify the memory, but returning Mod
@@ -441,11 +445,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
Intrinsic::ID ID = II->getIntrinsicID();
switch (ID) {
case Intrinsic::lifetime_start: {
- // FIXME: This only considers queries directly on the invariant-tagged
- // pointer, not on query pointers that are indexed off of them. It'd
- // be nice to handle that at some point (the right approach is to use
- // GetPointerBaseWithConstantOffset).
- MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1));
+ MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(0));
if (BatchAA.isMustAlias(ArgLoc, MemLoc))
return MemDepResult::getDef(II);
continue;
diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp
index 28a2640..72b643c 100644
--- a/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/llvm/lib/Analysis/MemoryLocation.cpp
@@ -191,7 +191,7 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end: {
- assert(ArgIdx == 1 && "Invalid argument index");
+ assert(ArgIdx == 0 && "Invalid argument index");
auto *AI = dyn_cast<AllocaInst>(Arg);
if (!AI)
// lifetime of poison value.
diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp
index abe4985..1e20fca 100644
--- a/llvm/lib/Analysis/StackLifetime.cpp
+++ b/llvm/lib/Analysis/StackLifetime.cpp
@@ -70,7 +70,7 @@ void StackLifetime::collectMarkers() {
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
if (!II || !II->isLifetimeStartOrEnd())
continue;
- const AllocaInst *AI = dyn_cast<AllocaInst>(II->getArgOperand(1));
+ const AllocaInst *AI = dyn_cast<AllocaInst>(II->getArgOperand(0));
if (!AI)
continue;
auto It = AllocaNumbering.find(AI);
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 3f3d5dc9..278dd65 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1915,7 +1915,6 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
// TODO: the "order" argument type is "int", not int32. So
// getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
- ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
Constant *OrderingVal =
ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
@@ -2012,7 +2011,7 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
if (CASExpected) {
AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
AllocaCASExpected->setAlignment(AllocaAlignment);
- Builder.CreateLifetimeStart(AllocaCASExpected, SizeVal64);
+ Builder.CreateLifetimeStart(AllocaCASExpected);
Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
Args.push_back(AllocaCASExpected);
}
@@ -2026,7 +2025,7 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
} else {
AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
AllocaValue->setAlignment(AllocaAlignment);
- Builder.CreateLifetimeStart(AllocaValue, SizeVal64);
+ Builder.CreateLifetimeStart(AllocaValue);
Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
Args.push_back(AllocaValue);
}
@@ -2036,7 +2035,7 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
if (!CASExpected && HasResult && !UseSizedLibcall) {
AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
AllocaResult->setAlignment(AllocaAlignment);
- Builder.CreateLifetimeStart(AllocaResult, SizeVal64);
+ Builder.CreateLifetimeStart(AllocaResult);
Args.push_back(AllocaResult);
}
@@ -2069,7 +2068,7 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
// And then, extract the results...
if (ValueOperand && !UseSizedLibcall)
- Builder.CreateLifetimeEnd(AllocaValue, SizeVal64);
+ Builder.CreateLifetimeEnd(AllocaValue);
if (CASExpected) {
// The final result from the CAS is {load of 'expected' alloca, bool result
@@ -2078,7 +2077,7 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
Value *V = PoisonValue::get(FinalResultTy);
Value *ExpectedOut = Builder.CreateAlignedLoad(
CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
- Builder.CreateLifetimeEnd(AllocaCASExpected, SizeVal64);
+ Builder.CreateLifetimeEnd(AllocaCASExpected);
V = Builder.CreateInsertValue(V, ExpectedOut, 0);
V = Builder.CreateInsertValue(V, Result, 1);
I->replaceAllUsesWith(V);
@@ -2089,7 +2088,7 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
else {
V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
AllocaAlignment);
- Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
+ Builder.CreateLifetimeEnd(AllocaResult);
}
I->replaceAllUsesWith(V);
}
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index bbfae57..d30dfa7 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2209,7 +2209,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START
: TargetOpcode::LIFETIME_END;
- const AllocaInst *AI = dyn_cast<AllocaInst>(CI.getArgOperand(1));
+ const AllocaInst *AI = dyn_cast<AllocaInst>(CI.getArgOperand(0));
if (!AI || !AI->isStaticAlloca())
return true;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7341914..5f1e38a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12843,22 +12843,21 @@ SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
SDLoc DL(HG);
EVT MemVT = HG->getMemoryVT();
+ EVT DataVT = Index.getValueType();
MachineMemOperand *MMO = HG->getMemOperand();
ISD::MemIndexType IndexType = HG->getIndexType();
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return Chain;
- SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
- HG->getScale(), HG->getIntID()};
- if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL))
+ if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL) ||
+ refineIndexType(Index, IndexType, DataVT, DAG)) {
+ SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
+ HG->getScale(), HG->getIntID()};
return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
MMO, IndexType);
+ }
- EVT DataVT = Index.getValueType();
- if (refineIndexType(Index, IndexType, DataVT, DAG))
- return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
- MMO, IndexType);
return SDValue();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b9e72c9..5ef1746 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1371,7 +1371,7 @@ void SelectionDAG::init(MachineFunction &NewMF,
const TargetLibraryInfo *LibraryInfo,
UniformityInfo *NewUA, ProfileSummaryInfo *PSIin,
BlockFrequencyInfo *BFIin, MachineModuleInfo &MMIin,
- FunctionVarLocs const *VarLocs, bool HasDivergency) {
+ FunctionVarLocs const *VarLocs) {
MF = &NewMF;
SDAGISelPass = PassPtr;
ORE = &NewORE;
@@ -1384,7 +1384,6 @@ void SelectionDAG::init(MachineFunction &NewMF,
BFI = BFIin;
MMI = &MMIin;
FnVarLocs = VarLocs;
- DivergentTarget = HasDivergency;
}
SelectionDAG::~SelectionDAG() {
@@ -2331,8 +2330,7 @@ SDValue SelectionDAG::getRegister(Register Reg, EVT VT) {
return SDValue(E, 0);
auto *N = newSDNode<RegisterSDNode>(Reg, VTs);
- N->SDNodeBits.IsDivergent =
- DivergentTarget && TLI->isSDNodeSourceOfDivergence(N, FLI, UA);
+ N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, UA);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -12264,8 +12262,6 @@ static bool gluePropagatesDivergence(const SDNode *Node) {
}
bool SelectionDAG::calculateDivergence(SDNode *N) {
- if (!DivergentTarget)
- return false;
if (TLI->isSDNodeAlwaysUniform(N)) {
assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, UA) &&
"Conflicting divergence information!");
@@ -12285,8 +12281,6 @@ bool SelectionDAG::calculateDivergence(SDNode *N) {
}
void SelectionDAG::updateDivergence(SDNode *N) {
- if (!DivergentTarget)
- return;
SmallVector<SDNode *, 16> Worklist(1, N);
do {
N = Worklist.pop_back_val();
@@ -13847,20 +13841,16 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
Ops[I].setInitial(Vals[I]);
EVT VT = Ops[I].getValueType();
- // Take care of the Node's operands iff target has divergence
// Skip Chain. It does not carry divergence.
- if (DivergentTarget && VT != MVT::Other &&
+ if (VT != MVT::Other &&
(VT != MVT::Glue || gluePropagatesDivergence(Ops[I].getNode())) &&
Ops[I].getNode()->isDivergent()) {
- // Node is going to be divergent if at least one of its operand is
- // divergent, unless it belongs to the "AlwaysUniform" exemptions.
IsDivergent = true;
}
}
Node->NumOperands = Vals.size();
Node->OperandList = Ops;
- // Check the divergence of the Node itself.
- if (DivergentTarget && !TLI->isSDNodeAlwaysUniform(Node)) {
+ if (!TLI->isSDNodeAlwaysUniform(Node)) {
IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, UA);
Node->SDNodeBits.IsDivergent = IsDivergent;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index f5f5c14..0d1e954 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7597,7 +7597,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (TM.getOptLevel() == CodeGenOptLevel::None)
return;
- const AllocaInst *LifetimeObject = dyn_cast<AllocaInst>(I.getArgOperand(1));
+ const AllocaInst *LifetimeObject = dyn_cast<AllocaInst>(I.getArgOperand(0));
if (!LifetimeObject)
return;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 26071ed..ece50ed 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -480,10 +480,7 @@ void SelectionDAGISel::initializeAnalysisResults(
MachineModuleInfo &MMI =
MAMP.getCachedResult<MachineModuleAnalysis>(*Fn.getParent())->getMMI();
- TTI = &FAM.getResult<TargetIRAnalysis>(Fn);
-
- CurDAG->init(*MF, *ORE, MFAM, LibInfo, UA, PSI, BFI, MMI, FnVarLocs,
- TTI->hasBranchDivergence(&Fn));
+ CurDAG->init(*MF, *ORE, MFAM, LibInfo, UA, PSI, BFI, MMI, FnVarLocs);
// Now get the optional analyzes if we want to.
// This is based on the possibly changed OptLevel (after optnone is taken
@@ -501,6 +498,10 @@ void SelectionDAGISel::initializeAnalysisResults(
BatchAA = std::nullopt;
SP = &FAM.getResult<SSPLayoutAnalysis>(Fn);
+
+#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
+ TTI = &FAM.getResult<TargetIRAnalysis>(Fn);
+#endif
}
void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) {
@@ -536,10 +537,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) {
MachineModuleInfo &MMI =
MFP.getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
- TTI = &MFP.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);
-
- CurDAG->init(*MF, *ORE, &MFP, LibInfo, UA, PSI, BFI, MMI, FnVarLocs,
- TTI->hasBranchDivergence(&Fn));
+ CurDAG->init(*MF, *ORE, &MFP, LibInfo, UA, PSI, BFI, MMI, FnVarLocs);
// Now get the optional analyzes if we want to.
// This is based on the possibly changed OptLevel (after optnone is taken
@@ -558,6 +556,10 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) {
BatchAA = std::nullopt;
SP = &MFP.getAnalysis<StackProtector>().getLayoutInfo();
+
+#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
+ TTI = &MFP.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);
+#endif
}
bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 7159107..35f00ae 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1311,14 +1311,15 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
}
break;
case 'l':
- if (Name.starts_with("lifetime.start") ||
- Name.starts_with("lifetime.end")) {
- // Unless remangling is required, do not upgrade the function declaration,
- // but do upgrade the calls.
- if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F))
- NewFn = *Result;
- else
- NewFn = F;
+ if ((Name.starts_with("lifetime.start") ||
+ Name.starts_with("lifetime.end")) &&
+ F->arg_size() == 2) {
+ rename(F);
+ NewFn = Intrinsic::getOrInsertDeclaration(
+ F->getParent(),
+ Name.starts_with("lifetime.start") ? Intrinsic::lifetime_start
+ : Intrinsic::lifetime_end,
+ F->getArg(0)->getType());
return true;
}
break;
@@ -5133,21 +5134,20 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end: {
- Value *Size = CI->getArgOperand(0);
- Value *Ptr = CI->getArgOperand(1);
- if (isa<AllocaInst>(Ptr)) {
+ if (CI->arg_size() != 2) {
DefaultCase();
return;
}
+ Value *Ptr = CI->getArgOperand(1);
// Try to strip pointer casts, such that the lifetime works on an alloca.
Ptr = Ptr->stripPointerCasts();
if (isa<AllocaInst>(Ptr)) {
// Don't use NewFn, as we might have looked through an addrspacecast.
if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
- NewCall = Builder.CreateLifetimeStart(Ptr, cast<ConstantInt>(Size));
+ NewCall = Builder.CreateLifetimeStart(Ptr);
else
- NewCall = Builder.CreateLifetimeEnd(Ptr, cast<ConstantInt>(Size));
+ NewCall = Builder.CreateLifetimeEnd(Ptr);
break;
}
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 49c6dc7..614c3a9 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -411,28 +411,16 @@ CallInst *IRBuilderBase::CreateFPMinimumReduce(Value *Src) {
return getReductionIntrinsic(Intrinsic::vector_reduce_fminimum, Src);
}
-CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {
+CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr) {
assert(isa<PointerType>(Ptr->getType()) &&
"lifetime.start only applies to pointers.");
- if (!Size)
- Size = getInt64(-1);
- else
- assert(Size->getType() == getInt64Ty() &&
- "lifetime.start requires the size to be an i64");
- Value *Ops[] = { Size, Ptr };
- return CreateIntrinsic(Intrinsic::lifetime_start, {Ptr->getType()}, Ops);
+ return CreateIntrinsic(Intrinsic::lifetime_start, {Ptr->getType()}, {Ptr});
}
-CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) {
+CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr) {
assert(isa<PointerType>(Ptr->getType()) &&
"lifetime.end only applies to pointers.");
- if (!Size)
- Size = getInt64(-1);
- else
- assert(Size->getType() == getInt64Ty() &&
- "lifetime.end requires the size to be an i64");
- Value *Ops[] = { Size, Ptr };
- return CreateIntrinsic(Intrinsic::lifetime_end, {Ptr->getType()}, Ops);
+ return CreateIntrinsic(Intrinsic::lifetime_end, {Ptr->getType()}, {Ptr});
}
CallInst *IRBuilderBase::CreateInvariantStart(Value *Ptr, ConstantInt *Size) {
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index ca3f148..f5dcb5e 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6770,7 +6770,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
}
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end: {
- Value *Ptr = Call.getArgOperand(1);
+ Value *Ptr = Call.getArgOperand(0);
Check(isa<AllocaInst>(Ptr) || isa<PoisonValue>(Ptr),
"llvm.lifetime.start/end can only be used on alloca or poison",
&Call);
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index 8c27958..d0c6144 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -443,7 +443,7 @@ void MCObjectStreamer::emitInstToData(const MCInst &Inst,
// MCAssembler::relaxAlign.
auto *Sec = F->getParent();
if (!Sec->isLinkerRelaxable())
- Sec->setLinkerRelaxable();
+ Sec->setFirstLinkerRelaxable(F->getLayoutOrder());
// Do not add data after a linker-relaxable instruction. The difference
// between a new label and a label at or before the linker-relaxable
// instruction cannot be resolved at assemble-time.
diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp
index 27ca131..9ed6fd1 100644
--- a/llvm/lib/MC/MCSection.cpp
+++ b/llvm/lib/MC/MCSection.cpp
@@ -20,7 +20,7 @@ using namespace llvm;
MCSection::MCSection(StringRef Name, bool IsText, bool IsBss, MCSymbol *Begin)
: Begin(Begin), HasInstructions(false), IsRegistered(false), IsText(IsText),
- IsBss(IsBss), LinkerRelaxable(false), Name(Name) {
+ IsBss(IsBss), Name(Name) {
DummyFragment.setParent(this);
}
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 201bfe0..d6a3d59 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1236,14 +1236,20 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
.add(MI.getOperand(3));
transferImpOps(MI, I, I);
} else {
+ unsigned RegState =
+ getRenamableRegState(MI.getOperand(1).isRenamable()) |
+ getKillRegState(
+ MI.getOperand(1).isKill() &&
+ MI.getOperand(1).getReg() != MI.getOperand(2).getReg() &&
+ MI.getOperand(1).getReg() != MI.getOperand(3).getReg());
BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
: AArch64::ORRv16i8))
.addReg(DstReg,
RegState::Define |
getRenamableRegState(MI.getOperand(0).isRenamable()))
- .add(MI.getOperand(1))
- .add(MI.getOperand(1));
+ .addReg(MI.getOperand(1).getReg(), RegState)
+ .addReg(MI.getOperand(1).getReg(), RegState);
auto I2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index d068a12..b033f88 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -7362,7 +7362,9 @@ multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm,
[(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
def v16i8 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
V128, V128, V128,
- asm#"2", ".8h", ".16b", ".16b", []>;
+ asm#"2", ".8h", ".16b", ".16b",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn))),
+ (v8i8 (extract_high_v16i8 (v16i8 V128:$Rm)))))]>;
let Predicates = [HasAES] in {
def v1i64 : BaseSIMDDifferentThreeVector<U, 0b110, opc,
V128, V64, V64,
@@ -7374,10 +7376,6 @@ multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm,
[(set (v16i8 V128:$Rd), (OpNode (extract_high_v2i64 (v2i64 V128:$Rn)),
(extract_high_v2i64 (v2i64 V128:$Rm))))]>;
}
-
- def : Pat<(v8i16 (OpNode (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn))),
- (v8i8 (extract_high_v16i8 (v16i8 V128:$Rm))))),
- (!cast<Instruction>(NAME#"v16i8") V128:$Rn, V128:$Rm)>;
}
multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm,
@@ -7402,6 +7400,7 @@ multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm,
(extract_high_v4i32 (v4i32 V128:$Rm))))]>;
}
+let isCommutable = 1 in
multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm,
SDPatternOperator OpNode = null_frag> {
def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
@@ -7483,6 +7482,7 @@ multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc,
(extract_high_v4i32 (v4i32 V128:$Rm)))))))]>;
}
+let isCommutable = 1 in
multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm,
SDPatternOperator OpNode = null_frag> {
def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ac31236..8cfbff9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6055,6 +6055,7 @@ defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>;
defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
+let isCommutable = 1 in
defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
TriOpFrag<(add node:$LHS, (abds node:$MHS, node:$RHS))> >;
@@ -6806,6 +6807,7 @@ defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>
defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
+let isCommutable = 1 in
defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>;
defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", abds>;
defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", abds>;
@@ -6822,6 +6824,7 @@ defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", saddsat>;
defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", ssubsat>;
defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
int_aarch64_neon_sqdmull>;
+let isCommutable = 0 in
defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
@@ -6836,6 +6839,7 @@ defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>;
+let isCommutable = 0 in
defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
index f136a184..a67bd42 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -585,8 +585,7 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
ClMaxLifetimes);
if (StandardLifetime) {
IntrinsicInst *Start = Info.LifetimeStart[0];
- uint64_t Size =
- cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
+ uint64_t Size = *Info.AI->getAllocationSize(*DL);
Size = alignTo(Size, kTagGranuleSize);
tagAlloca(AI, Start->getNextNode(), TagPCall, Size);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
index f580f43..c21a9a1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
@@ -109,12 +109,17 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
// Find AV_* registers assigned to AGPRs.
const TargetRegisterClass *VirtRegRC = MRI.getRegClass(VReg);
- if (!TRI.isVectorSuperClass(VirtRegRC))
+ if (!TRI.hasAGPRs(VirtRegRC))
continue;
- const TargetRegisterClass *AssignedRC = TRI.getPhysRegBaseClass(PhysReg);
- if (!TRI.isAGPRClass(AssignedRC))
- continue;
+ const TargetRegisterClass *AssignedRC = VirtRegRC;
+ if (TRI.hasVGPRs(VirtRegRC)) {
+ // If this is an AV register, we have to check if the actual assignment is
+ // to an AGPR
+ AssignedRC = TRI.getPhysRegBaseClass(PhysReg);
+ if (!TRI.isAGPRClass(AssignedRC))
+ continue;
+ }
LiveInterval &LI = LIS.getInterval(VReg);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index ea99cc4..75d3cfa 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -802,6 +802,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::BSWAP, VT, Expand);
}
+ if (!Subtarget->isThumb1Only() && !Subtarget->hasV8_1MMainlineOps())
+ setOperationAction(ISD::SCMP, MVT::i32, Custom);
+
+ if (!Subtarget->hasV8_1MMainlineOps())
+ setOperationAction(ISD::UCMP, MVT::i32, Custom);
+
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
@@ -1634,6 +1640,10 @@ bool ARMTargetLowering::useSoftFloat() const {
return Subtarget->useSoftFloat();
}
+bool ARMTargetLowering::shouldExpandCmpUsingSelects(EVT VT) const {
+ return !Subtarget->isThumb1Only() && VT.getSizeInBits() <= 32;
+}
+
// FIXME: It might make sense to define the representative register class as the
// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
@@ -10612,6 +10622,133 @@ SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op,
return DAG.getBitcast(MVT::i32, Res);
}
+SDValue ARMTargetLowering::LowerCMP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ // Determine if this is signed or unsigned comparison
+ bool IsSigned = (Op.getOpcode() == ISD::SCMP);
+
+ // Special case for Thumb1 UCMP only
+ if (!IsSigned && Subtarget->isThumb1Only()) {
+ // For Thumb unsigned comparison, use this sequence:
+ // subs r2, r0, r1 ; r2 = LHS - RHS, sets flags
+ // sbc r2, r2 ; r2 = r2 - r2 - !carry
+ // cmp r1, r0 ; compare RHS with LHS
+ // sbc r1, r1 ; r1 = r1 - r1 - !carry
+ // subs r0, r2, r1 ; r0 = r2 - r1 (final result)
+
+ // First subtraction: LHS - RHS
+ SDValue Sub1WithFlags = DAG.getNode(
+ ARMISD::SUBC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+ SDValue Sub1Result = Sub1WithFlags.getValue(0);
+ SDValue Flags1 = Sub1WithFlags.getValue(1);
+
+ // SUBE: Sub1Result - Sub1Result - !carry
+ // This gives 0 if LHS >= RHS (unsigned), -1 if LHS < RHS (unsigned)
+ SDValue Sbc1 =
+ DAG.getNode(ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT),
+ Sub1Result, Sub1Result, Flags1);
+ SDValue Sbc1Result = Sbc1.getValue(0);
+
+ // Second comparison: RHS vs LHS (reverse comparison)
+ SDValue CmpFlags = DAG.getNode(ARMISD::CMP, dl, FlagsVT, RHS, LHS);
+
+ // SUBE: RHS - RHS - !carry
+ // This gives 0 if RHS <= LHS (unsigned), -1 if RHS > LHS (unsigned)
+ SDValue Sbc2 = DAG.getNode(
+ ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT), RHS, RHS, CmpFlags);
+ SDValue Sbc2Result = Sbc2.getValue(0);
+
+ // Final subtraction: Sbc1Result - Sbc2Result (no flags needed)
+ SDValue Result =
+ DAG.getNode(ISD::SUB, dl, MVT::i32, Sbc1Result, Sbc2Result);
+ if (Op.getValueType() != MVT::i32)
+ Result = DAG.getSExtOrTrunc(Result, dl, Op.getValueType());
+
+ return Result;
+ }
+
+ // For the ARM assembly pattern:
+ // subs r0, r0, r1 ; subtract RHS from LHS and set flags
+ // movgt r0, #1 ; if LHS > RHS, set result to 1 (GT for signed, HI for
+ // unsigned) mvnlt r0, #0 ; if LHS < RHS, set result to -1 (LT for
+ // signed, LO for unsigned)
+ // ; if LHS == RHS, result remains 0 from the subs
+
+ // Optimization: if RHS is a subtraction against 0, use ADDC instead of SUBC
+ unsigned Opcode = ARMISD::SUBC;
+
+ // Check if RHS is a subtraction against 0: (0 - X)
+ if (RHS.getOpcode() == ISD::SUB) {
+ SDValue SubLHS = RHS.getOperand(0);
+ SDValue SubRHS = RHS.getOperand(1);
+
+ // Check if it's 0 - X
+ if (isNullConstant(SubLHS)) {
+ bool CanUseAdd = false;
+ if (IsSigned) {
+ // For SCMP: only if X is known to never be INT_MIN (to avoid overflow)
+ if (RHS->getFlags().hasNoSignedWrap() || !DAG.computeKnownBits(SubRHS)
+ .getSignedMinValue()
+ .isMinSignedValue()) {
+ CanUseAdd = true;
+ }
+ } else {
+ // For UCMP: only if X is known to never be zero
+ if (DAG.isKnownNeverZero(SubRHS)) {
+ CanUseAdd = true;
+ }
+ }
+
+ if (CanUseAdd) {
+ Opcode = ARMISD::ADDC;
+ RHS = SubRHS; // Replace RHS with X, so we do LHS + X instead of
+ // LHS - (0 - X)
+ }
+ }
+ }
+
+ // Generate the operation with flags
+ SDValue OpWithFlags;
+ if (Opcode == ARMISD::ADDC) {
+ // Use ADDC: LHS + RHS (where RHS was 0 - X, now X)
+ OpWithFlags = DAG.getNode(ARMISD::ADDC, dl,
+ DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+ } else {
+ // Use ARMISD::SUBC to generate SUBS instruction (subtract with flags)
+ OpWithFlags = DAG.getNode(ARMISD::SUBC, dl,
+ DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+ }
+
+ SDValue OpResult = OpWithFlags.getValue(0); // The operation result
+ SDValue Flags = OpWithFlags.getValue(1); // The flags
+
+ // Constants for conditional moves
+ SDValue One = DAG.getConstant(1, dl, MVT::i32);
+ SDValue MinusOne = DAG.getAllOnesConstant(dl, MVT::i32);
+
+ // Select condition codes based on signed vs unsigned
+ ARMCC::CondCodes GTCond = IsSigned ? ARMCC::GT : ARMCC::HI;
+ ARMCC::CondCodes LTCond = IsSigned ? ARMCC::LT : ARMCC::LO;
+
+ // First conditional move: if greater than, set to 1
+ SDValue GTCondValue = DAG.getConstant(GTCond, dl, MVT::i32);
+ SDValue Result1 = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, OpResult, One,
+ GTCondValue, Flags);
+
+ // Second conditional move: if less than, set to -1
+ SDValue LTCondValue = DAG.getConstant(LTCond, dl, MVT::i32);
+ SDValue Result2 = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Result1, MinusOne,
+ LTCondValue, Flags);
+
+ if (Op.getValueType() != MVT::i32)
+ Result2 = DAG.getSExtOrTrunc(Result2, dl, Op.getValueType());
+
+ return Result2;
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
switch (Op.getOpcode()) {
@@ -10740,6 +10877,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_TO_BF16:
return LowerFP_TO_BF16(Op, DAG);
case ARMISD::WIN__DBZCHK: return SDValue();
+ case ISD::UCMP:
+ case ISD::SCMP:
+ return LowerCMP(Op, DAG);
}
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 825145d..a84a3cb 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -607,6 +607,8 @@ class VectorType;
bool preferZeroCompareBranch() const override { return true; }
+ bool shouldExpandCmpUsingSelects(EVT VT) const override;
+
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
bool hasAndNotCompare(SDValue V) const override {
@@ -904,6 +906,7 @@ class VectorType;
void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCMP(SDValue Op, SelectionDAG &DAG) const;
Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index fda9d97..ca5d27d 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -254,7 +254,8 @@ bool LoongArchAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN);
F.setVarFixups({Fixup});
F.setLinkerRelaxable();
- F.getParent()->setLinkerRelaxable();
+ if (!F.getParent()->isLinkerRelaxable())
+ F.getParent()->setFirstLinkerRelaxable(F.getLayoutOrder());
return true;
}
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 76dca47..f123040 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1102,13 +1102,20 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
SpillsKnownBit = true;
break;
default:
+ // When spilling a CR bit, the super register may not be explicitly defined
+ // (i.e. it can be defined by a CR-logical that only defines the subreg) so
+ // we state that the CR field is undef. Also, in order to preserve the kill
+ // flag on the CR bit, we add it as an implicit use.
+
// On Power10, we can use SETNBC to spill all CR bits. SETNBC will set all
// bits (specifically, it produces a -1 if the CR bit is set). Ultimately,
// the bit that is of importance to us is bit 32 (bit 0 of a 32-bit
// register), and SETNBC will set this.
if (Subtarget.isISA3_1()) {
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETNBC8 : PPC::SETNBC), Reg)
- .addReg(SrcReg, RegState::Undef);
+ .addReg(SrcReg, RegState::Undef)
+ .addReg(SrcReg, RegState::Implicit |
+ getKillRegState(MI.getOperand(0).isKill()));
break;
}
@@ -1122,16 +1129,14 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
SrcReg == PPC::CR4LT || SrcReg == PPC::CR5LT ||
SrcReg == PPC::CR6LT || SrcReg == PPC::CR7LT) {
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETB8 : PPC::SETB), Reg)
- .addReg(getCRFromCRBit(SrcReg), RegState::Undef);
+ .addReg(getCRFromCRBit(SrcReg), RegState::Undef)
+ .addReg(SrcReg, RegState::Implicit |
+ getKillRegState(MI.getOperand(0).isKill()));
break;
}
}
// We need to move the CR field that contains the CR bit we are spilling.
- // The super register may not be explicitly defined (i.e. it can be defined
- // by a CR-logical that only defines the subreg) so we state that the CR
- // field is undef. Also, in order to preserve the kill flag on the CR bit,
- // we add it as an implicit use.
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
.addReg(getCRFromCRBit(SrcReg), RegState::Undef)
.addReg(SrcReg,
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 67cc01e..e0ac591 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -674,6 +674,9 @@ static constexpr FeatureBitset XAndesGroup = {
static constexpr DecoderListEntry DecoderList32[]{
// Vendor Extensions
+ {DecoderTableXCV32, XCVFeatureGroup, "CORE-V extensions"},
+ {DecoderTableXRivos32, XRivosFeatureGroup, "Rivos"},
+ {DecoderTableXqci32, XqciFeatureGroup, "Qualcomm uC Extensions"},
{DecoderTableXVentana32,
{RISCV::FeatureVendorXVentanaCondOps},
"XVentanaCondOps"},
@@ -690,9 +693,6 @@ static constexpr DecoderListEntry DecoderList32[]{
"MIPS mips.pref"},
{DecoderTableXAndes32, XAndesGroup, "Andes extensions"},
// Standard Extensions
- {DecoderTableXCV32, XCVFeatureGroup, "CORE-V extensions"},
- {DecoderTableXqci32, XqciFeatureGroup, "Qualcomm uC Extensions"},
- {DecoderTableXRivos32, XRivosFeatureGroup, "Rivos"},
{DecoderTable32, {}, "standard 32-bit instructions"},
{DecoderTableRV32Only32, {}, "RV32-only standard 32-bit instructions"},
{DecoderTableZfinx32, {}, "Zfinx (Float in Integer)"},
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index a997ea5..8d956ce 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -32,6 +32,11 @@ static cl::opt<bool> ULEB128Reloc(
"riscv-uleb128-reloc", cl::init(true), cl::Hidden,
cl::desc("Emit R_RISCV_SET_ULEB128/E_RISCV_SUB_ULEB128 if appropriate"));
+static cl::opt<bool>
+ AlignRvc("riscv-align-rvc", cl::init(true), cl::Hidden,
+ cl::desc("When generating R_RISCV_ALIGN, insert $alignment-2 "
+ "bytes of NOPs even in norvc code"));
+
RISCVAsmBackend::RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI,
bool Is64Bit, const MCTargetOptions &Options)
: MCAsmBackend(llvm::endianness::little), STI(STI), OSABI(OSABI),
@@ -306,12 +311,21 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst,
// If conditions are met, compute the padding size and create a fixup encoding
// the padding size in the addend.
bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
- // Use default handling unless linker relaxation is enabled and the alignment
- // is larger than the nop size.
- const MCSubtargetInfo *STI = F.getSubtargetInfo();
- if (!STI->hasFeature(RISCV::FeatureRelax))
+ // Alignments before the first linker-relaxable instruction have fixed sizes
+ // and do not require relocations. Alignments after a linker-relaxable
+ // instruction require a relocation, even if the STI specifies norelax.
+ //
+ // firstLinkerRelaxable is the layout order within the subsection, which may
+ // be smaller than the section's order. Therefore, alignments in a
+ // lower-numbered subsection may be unnecessarily treated as linker-relaxable.
+ auto *Sec = F.getParent();
+ if (F.getLayoutOrder() <= Sec->firstLinkerRelaxable())
return false;
- unsigned MinNopLen = STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4;
+
+ // Use default handling unless the alignment is larger than the nop size.
+ const MCSubtargetInfo *STI = F.getSubtargetInfo();
+ unsigned MinNopLen =
+ AlignRvc || STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4;
if (F.getAlignment() <= MinNopLen)
return false;
@@ -321,7 +335,6 @@ bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_RISCV_ALIGN);
F.setVarFixups({Fixup});
F.setLinkerRelaxable();
- F.getParent()->setLinkerRelaxable();
return true;
}
@@ -474,8 +487,9 @@ bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
// TODO: emit a mapping symbol right here
if (Count % 4 == 2) {
- // The canonical nop with Zca is c.nop.
- OS.write(STI->hasFeature(RISCV::FeatureStdExtZca) ? "\x01\0" : "\0\0", 2);
+ // The canonical nop with Zca is c.nop. For .balign 4, we generate a 2-byte
+ // c.nop even in a norvc region.
+ OS.write("\x01\0", 2);
Count -= 2;
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 05d504c..6a1f4b3 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -114,6 +114,9 @@ public:
bool enableScalableVectorization() const override {
return ST->hasVInstructions();
}
+ bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override {
+ return ST->hasVInstructions();
+ }
TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override {
return ST->hasVInstructions() ? TailFoldingStyle::DataWithEVL
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
index 74aec4f..2b34f61 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
@@ -359,18 +359,15 @@ static void lowerExpectAssume(IntrinsicInst *II) {
}
}
-static bool toSpvOverloadedIntrinsic(IntrinsicInst *II, Intrinsic::ID NewID,
- ArrayRef<unsigned> OpNos) {
- Function *F = nullptr;
- if (OpNos.empty()) {
- F = Intrinsic::getOrInsertDeclaration(II->getModule(), NewID);
- } else {
- SmallVector<Type *, 4> Tys;
- for (unsigned OpNo : OpNos)
- Tys.push_back(II->getOperand(OpNo)->getType());
- F = Intrinsic::getOrInsertDeclaration(II->getModule(), NewID, Tys);
- }
- II->setCalledFunction(F);
+static bool toSpvLifetimeIntrinsic(IntrinsicInst *II, Intrinsic::ID NewID) {
+ IRBuilder<> Builder(II);
+ auto *Alloca = cast<AllocaInst>(II->getArgOperand(0));
+ std::optional<TypeSize> Size =
+ Alloca->getAllocationSize(Alloca->getDataLayout());
+ Value *SizeVal = Builder.getInt64(Size ? *Size : -1);
+ Builder.CreateIntrinsic(NewID, Alloca->getType(),
+ {SizeVal, II->getArgOperand(0)});
+ II->eraseFromParent();
return true;
}
@@ -406,8 +403,8 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) {
break;
case Intrinsic::lifetime_start:
if (!STI.isShader()) {
- Changed |= toSpvOverloadedIntrinsic(
- II, Intrinsic::SPVIntrinsics::spv_lifetime_start, {1});
+ Changed |= toSpvLifetimeIntrinsic(
+ II, Intrinsic::SPVIntrinsics::spv_lifetime_start);
} else {
II->eraseFromParent();
Changed = true;
@@ -415,8 +412,8 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) {
break;
case Intrinsic::lifetime_end:
if (!STI.isShader()) {
- Changed |= toSpvOverloadedIntrinsic(
- II, Intrinsic::SPVIntrinsics::spv_lifetime_end, {1});
+ Changed |= toSpvLifetimeIntrinsic(
+ II, Intrinsic::SPVIntrinsics::spv_lifetime_end);
} else {
II->eraseFromParent();
Changed = true;
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index f32c9bd..2611c29 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -436,20 +436,6 @@ bool SystemZTTIImpl::isLSRCostLess(
C2.ScaleCost, C2.SetupCost);
}
-bool SystemZTTIImpl::areInlineCompatible(const Function *Caller,
- const Function *Callee) const {
- const TargetMachine &TM = getTLI()->getTargetMachine();
-
- const FeatureBitset &CallerBits =
- TM.getSubtargetImpl(*Caller)->getFeatureBits();
- const FeatureBitset &CalleeBits =
- TM.getSubtargetImpl(*Callee)->getFeatureBits();
-
- // Support only equal feature bitsets. Restriction should be relaxed in the
- // future to allow inlining when callee's bits are subset of the caller's.
- return CallerBits == CalleeBits;
-}
-
unsigned SystemZTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
bool Vector = (ClassID == 1);
if (!Vector)
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index dc5736e..fc681de 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -65,9 +65,6 @@ public:
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2) const override;
- bool areInlineCompatible(const Function *Caller,
- const Function *Callee) const override;
-
/// @}
/// \name Vector TTI Implementations
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 3320508..b775c43 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -1821,7 +1821,7 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
// only used outside the region.
if (Valid && Lifetimes.size() != 0) {
auto *NewLifetime = Lifetimes[0]->clone();
- NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), AI);
+ NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(0), AI);
NewLifetime->insertBefore(DomBB->getTerminator()->getIterator());
// All the outsided lifetime.start markers are no longer necessary.
diff --git a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
index da60f52..6ed3b62 100644
--- a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
+++ b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
@@ -226,13 +226,6 @@ public:
/*IsVarArgs=*/false);
}
- static ConstantInt *sizeOfAlloca(LLVMContext &Ctx, const DataLayout &DL,
- AllocaInst *Alloced) {
- std::optional<TypeSize> AllocaTypeSize = Alloced->getAllocationSize(DL);
- uint64_t AsInt = AllocaTypeSize ? AllocaTypeSize->getFixedValue() : 0;
- return ConstantInt::get(Type::getInt64Ty(Ctx), AsInt);
- }
-
bool expansionApplicableToFunction(Module &M, Function *F) {
if (F->isIntrinsic() || !F->isVarArg() ||
F->hasFnAttribute(Attribute::Naked))
@@ -577,8 +570,7 @@ ExpandVariadics::defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
AllocaInst *VaListInstance =
Builder.CreateAlloca(VaListTy, nullptr, "va_start");
- Builder.CreateLifetimeStart(VaListInstance,
- sizeOfAlloca(Ctx, DL, VaListInstance));
+ Builder.CreateLifetimeStart(VaListInstance);
Builder.CreateIntrinsic(Intrinsic::vastart, {DL.getAllocaPtrType(Ctx)},
{VaListInstance});
@@ -595,8 +587,7 @@ ExpandVariadics::defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
Builder.CreateIntrinsic(Intrinsic::vaend, {DL.getAllocaPtrType(Ctx)},
{VaListInstance});
- Builder.CreateLifetimeEnd(VaListInstance,
- sizeOfAlloca(Ctx, DL, VaListInstance));
+ Builder.CreateLifetimeEnd(VaListInstance);
if (Result->getType()->isVoidTy())
Builder.CreateRetVoid();
@@ -746,7 +737,7 @@ bool ExpandVariadics::expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB,
// Initialize the fields in the struct
Builder.SetInsertPoint(CB);
- Builder.CreateLifetimeStart(Alloced, sizeOfAlloca(Ctx, DL, Alloced));
+ Builder.CreateLifetimeStart(Alloced);
Frame.initializeStructAlloca(DL, Builder, Alloced);
const unsigned NumArgs = FuncType->getNumParams();
@@ -762,7 +753,7 @@ bool ExpandVariadics::expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB,
Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
VaList = Builder.CreateAlloca(VaListTy, nullptr, "va_argument");
Builder.SetInsertPoint(CB);
- Builder.CreateLifetimeStart(VaList, sizeOfAlloca(Ctx, DL, VaList));
+ Builder.CreateLifetimeStart(VaList);
}
Builder.SetInsertPoint(CB);
Args.push_back(ABI->initializeVaList(M, Ctx, Builder, VaList, Alloced));
@@ -802,9 +793,9 @@ bool ExpandVariadics::expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB,
}
if (VaList)
- Builder.CreateLifetimeEnd(VaList, sizeOfAlloca(Ctx, DL, VaList));
+ Builder.CreateLifetimeEnd(VaList);
- Builder.CreateLifetimeEnd(Alloced, sizeOfAlloca(Ctx, DL, Alloced));
+ Builder.CreateLifetimeEnd(Alloced);
NewCB->setAttributes(PAL);
NewCB->takeName(CB);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 47e017e..d7a2ef7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1532,6 +1532,51 @@ static Instruction *foldBitOrderCrossLogicOp(Value *V,
return nullptr;
}
+/// Helper to match idempotent binary intrinsics, namely, intrinsics where
+/// `f(f(x, y), y) == f(x, y)` holds.
+static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID) {
+ switch (IID) {
+ case Intrinsic::smax:
+ case Intrinsic::smin:
+ case Intrinsic::umax:
+ case Intrinsic::umin:
+ case Intrinsic::maximum:
+ case Intrinsic::minimum:
+ case Intrinsic::maximumnum:
+ case Intrinsic::minimumnum:
+ case Intrinsic::maxnum:
+ case Intrinsic::minnum:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Attempt to simplify value-accumulating recurrences of kind:
+/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
+/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
+/// And let the idempotent binary intrinsic be hoisted, when the operands are
+/// known to be loop-invariant.
+static Value *foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC,
+ IntrinsicInst *II) {
+ PHINode *PN;
+ Value *Init, *OtherOp;
+
+ // A binary intrinsic recurrence with loop-invariant operands is equivalent to
+ // `call @llvm.binary.intrinsic(Init, OtherOp)`.
+ auto IID = II->getIntrinsicID();
+ if (!isIdempotentBinaryIntrinsic(IID) ||
+ !matchSimpleBinaryIntrinsicRecurrence(II, PN, Init, OtherOp) ||
+ !IC.getDominatorTree().dominates(OtherOp, PN))
+ return nullptr;
+
+ auto *InvariantBinaryInst =
+ IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
+ if (isa<FPMathOperator>(InvariantBinaryInst))
+ cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
+ return InvariantBinaryInst;
+}
+
static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
if (!CanReorderLanes)
return nullptr;
@@ -3912,6 +3957,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Value *Reverse = foldReversedIntrinsicOperands(II))
return replaceInstUsesWith(*II, Reverse);
+ if (Value *Res = foldIdempotentBinaryIntrinsicRecurrence(*this, II))
+ return replaceInstUsesWith(*II, Res);
+
// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
// context, so it is handled in visitCallBase and we should trigger it.
return visitCallBase(*II);
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 8da65c5..50258af 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1211,23 +1211,19 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
return;
if (!II.isLifetimeStartOrEnd())
return;
- // Found lifetime intrinsic, add ASan instrumentation if necessary.
- auto *Size = cast<ConstantInt>(II.getArgOperand(0));
- // If size argument is undefined, don't do anything.
- if (Size->isMinusOne()) return;
- // Check that size doesn't saturate uint64_t and can
- // be stored in IntptrTy.
- const uint64_t SizeValue = Size->getValue().getLimitedValue();
- if (SizeValue == ~0ULL ||
- !ConstantInt::isValueValidForType(IntptrTy, SizeValue))
- return;
// Find alloca instruction that corresponds to llvm.lifetime argument.
- AllocaInst *AI = dyn_cast<AllocaInst>(II.getArgOperand(1));
+ AllocaInst *AI = dyn_cast<AllocaInst>(II.getArgOperand(0));
// We're interested only in allocas we can handle.
if (!AI || !ASan.isInterestingAlloca(*AI))
return;
+
+ std::optional<TypeSize> Size = AI->getAllocationSize(AI->getDataLayout());
+ // Check that size is known and can be stored in IntptrTy.
+ if (!Size || !ConstantInt::isValueValidForType(IntptrTy, *Size))
+ return;
+
bool DoPoison = (ID == Intrinsic::lifetime_end);
- AllocaPoisonCall APC = {&II, AI, SizeValue, DoPoison};
+ AllocaPoisonCall APC = {&II, AI, *Size, DoPoison};
if (AI->isStaticAlloca())
StaticAllocaPoisonCallVec.push_back(APC);
else if (ClInstrumentDynamicAllocas)
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index bcb90d6..fc34d14 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -1469,22 +1469,6 @@ void HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
size_t Size = memtag::getAllocaSizeInBytes(*AI);
size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
- auto HandleLifetime = [&](IntrinsicInst *II) {
- // Set the lifetime intrinsic to cover the whole alloca. This reduces the
- // set of assumptions we need to make about the lifetime. Without this we
- // would need to ensure that we can track the lifetime pointer to a
- // constant offset from the alloca, and would still need to change the
- // size to include the extra alignment we use for the untagging to make
- // the size consistent.
- //
- // The check for standard lifetime below makes sure that we have exactly
- // one set of start / end in any execution (i.e. the ends are not
- // reachable from each other), so this will not cause any problems.
- II->setArgOperand(0, ConstantInt::get(Int64Ty, AlignedSize));
- };
- llvm::for_each(Info.LifetimeStart, HandleLifetime);
- llvm::for_each(Info.LifetimeEnd, HandleLifetime);
-
AI->replaceUsesWithIf(Replacement, [AILong](const Use &U) {
auto *User = U.getUser();
return User != AILong && !isa<LifetimeIntrinsic>(User);
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 7d3c940..6e81387 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3301,7 +3301,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void handleLifetimeStart(IntrinsicInst &I) {
if (!PoisonStack)
return;
- AllocaInst *AI = dyn_cast<AllocaInst>(I.getArgOperand(1));
+ AllocaInst *AI = dyn_cast<AllocaInst>(I.getArgOperand(0));
if (AI)
LifetimeStartList.push_back(std::make_pair(&I, AI));
}
diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
index 4edf25c..9471ae3 100644
--- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp
@@ -818,12 +818,12 @@ bool TypeSanitizer::instrumentMemInst(Value *V, Instruction *ShadowBase,
}
}
} else if (auto *II = dyn_cast<LifetimeIntrinsic>(I)) {
- auto *AI = dyn_cast<AllocaInst>(II->getArgOperand(1));
+ auto *AI = dyn_cast<AllocaInst>(II->getArgOperand(0));
if (!AI)
return false;
Size = GetAllocaSize(AI);
- Dest = II->getArgOperand(1);
+ Dest = II->getArgOperand(0);
} else if (auto *AI = dyn_cast<AllocaInst>(I)) {
// We need to clear the types for new stack allocations (or else we might
// read stale type information from a previous function execution).
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 9b87180..f46d54b 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1363,7 +1363,7 @@ struct DSEState {
if (auto *CB = dyn_cast<CallBase>(I)) {
if (CB->getIntrinsicID() == Intrinsic::lifetime_end)
return {
- std::make_pair(MemoryLocation::getForArgument(CB, 1, &TLI), false)};
+ std::make_pair(MemoryLocation::getForArgument(CB, 0, &TLI), false)};
if (Value *FreedOp = getFreedOperand(CB, &TLI))
return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
}
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 85ee824..a097d33 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -434,7 +434,7 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
NewV = NewV->stripPointerCasts();
Function *NewDecl = Intrinsic::getOrInsertDeclaration(
M, II->getIntrinsicID(), {NewV->getType()});
- II->setArgOperand(1, NewV);
+ II->setArgOperand(0, NewV);
II->setCalledFunction(NewDecl);
return true;
}
@@ -491,7 +491,7 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
}
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end: {
- appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(1),
+ appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
PostorderStack, Visited);
break;
}
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index b3bffeb..fcdb8a9 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -2166,7 +2166,7 @@ public:
// If the loads don't alias the lifetime.end, it won't interfere with
// fusion.
- MemoryLocation EndLoc = MemoryLocation::getForArgument(End, 1, nullptr);
+ MemoryLocation EndLoc = MemoryLocation::getForArgument(End, 0, nullptr);
if (!EndLoc.Ptr)
continue;
if (AA->isNoAlias(Load0Loc, EndLoc) && AA->isNoAlias(Load1Loc, EndLoc))
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 79721dc..f237322 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -915,7 +915,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
// move the bitcast as well, which we don't handle.
if (SkippedLifetimeStart) {
auto *LifetimeArg =
- dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1));
+ dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(0));
if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
C->comesBefore(LifetimeArg))
return false;
@@ -1010,7 +1010,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
// Lifetime of srcAlloca ends at lifetime.end.
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
- II->getArgOperand(1) == srcAlloca)
+ II->getArgOperand(0) == srcAlloca)
break;
}
@@ -1393,7 +1393,7 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst()))
if (II->getIntrinsicID() == Intrinsic::lifetime_start)
if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V)))
- return II->getArgOperand(1) == Alloca;
+ return II->getArgOperand(0) == Alloca;
return false;
}
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 1a52af1..40eeeb2 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -1535,7 +1535,7 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
if (auto *II = dyn_cast<IntrinsicInst>(DepInst)) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
- auto *LifetimePtr = II->getOperand(1);
+ auto *LifetimePtr = II->getOperand(0);
if (LoadPtr == lookupOperandLeader(LifetimePtr) ||
AA->isMustAlias(LoadPtr, LifetimePtr))
return createConstantExpression(UndefValue::get(LoadType));
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 03d9f32..d6e27aa 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -1260,10 +1260,7 @@ private:
return PI.setAborted(&II);
if (II.isLifetimeStartOrEnd()) {
- ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
- uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
- Length->getLimitedValue());
- insertUse(II, Offset, Size, true);
+ insertUse(II, Offset, AllocSize, true);
return;
}
@@ -3614,30 +3611,14 @@ private:
return true;
}
- assert(II.getArgOperand(1) == OldPtr);
- // Lifetime intrinsics are only promotable if they cover the whole alloca.
- // Therefore, we drop lifetime intrinsics which don't cover the whole
- // alloca.
- // (In theory, intrinsics which partially cover an alloca could be
- // promoted, but PromoteMemToReg doesn't handle that case.)
- // FIXME: Check whether the alloca is promotable before dropping the
- // lifetime intrinsics?
- if (NewBeginOffset != NewAllocaBeginOffset ||
- NewEndOffset != NewAllocaEndOffset)
- return true;
-
- ConstantInt *Size =
- ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
- NewEndOffset - NewBeginOffset);
- // Lifetime intrinsics always expect an i8* so directly get such a pointer
- // for the new alloca slice.
+ assert(II.getArgOperand(0) == OldPtr);
Type *PointerTy = IRB.getPtrTy(OldPtr->getType()->getPointerAddressSpace());
Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
Value *New;
if (II.getIntrinsicID() == Intrinsic::lifetime_start)
- New = IRB.CreateLifetimeStart(Ptr, Size);
+ New = IRB.CreateLifetimeStart(Ptr);
else
- New = IRB.CreateLifetimeEnd(Ptr, Size);
+ New = IRB.CreateLifetimeEnd(Ptr);
(void)New;
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 7a9dd37..bbd1ed6 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1099,7 +1099,7 @@ static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
// Get the memory operand of the lifetime marker. If the underlying
// object is a sunk alloca, or is otherwise defined in the extraction
// region, the lifetime marker must not be erased.
- Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
+ Value *Mem = II->getOperand(0);
if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
continue;
@@ -1115,8 +1115,6 @@ static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
static void insertLifetimeMarkersSurroundingCall(
Module *M, ArrayRef<Value *> LifetimesStart, ArrayRef<Value *> LifetimesEnd,
CallInst *TheCall) {
- LLVMContext &Ctx = M->getContext();
- auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
Instruction *Term = TheCall->getParent()->getTerminator();
// Emit lifetime markers for the pointers given in \p Objects. Insert the
@@ -1130,7 +1128,7 @@ static void insertLifetimeMarkersSurroundingCall(
Function *Func =
Intrinsic::getOrInsertDeclaration(M, MarkerFunc, Mem->getType());
- auto Marker = CallInst::Create(Func, {NegativeOne, Mem});
+ auto Marker = CallInst::Create(Func, Mem);
if (InsertBefore)
Marker->insertBefore(TheCall->getIterator());
else
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 59a47a9..fa3c467 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -3004,31 +3004,11 @@ void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI,
if (hasLifetimeMarkers(AI))
continue;
- // Try to determine the size of the allocation.
- ConstantInt *AllocaSize = nullptr;
- if (ConstantInt *AIArraySize =
- dyn_cast<ConstantInt>(AI->getArraySize())) {
- auto &DL = Caller->getDataLayout();
- Type *AllocaType = AI->getAllocatedType();
- TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
- uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
-
- // Don't add markers for zero-sized allocas.
- if (AllocaArraySize == 0)
- continue;
-
- // Check that array size doesn't saturate uint64_t and doesn't
- // overflow when it's multiplied by type size.
- if (!AllocaTypeSize.isScalable() &&
- AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
- std::numeric_limits<uint64_t>::max() / AllocaArraySize >=
- AllocaTypeSize.getFixedValue()) {
- AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
- AllocaArraySize * AllocaTypeSize);
- }
- }
+ std::optional<TypeSize> Size = AI->getAllocationSize(AI->getDataLayout());
+ if (Size && Size->isZero())
+ continue;
- builder.CreateLifetimeStart(AI, AllocaSize);
+ builder.CreateLifetimeStart(AI);
for (ReturnInst *RI : Returns) {
// Don't insert llvm.lifetime.end calls between a musttail or deoptimize
// call and a return. The return kills all local allocas.
@@ -3038,7 +3018,7 @@ void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI,
if (InlinedDeoptimizeCalls &&
RI->getParent()->getTerminatingDeoptimizeCall())
continue;
- IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);
+ IRBuilder<>(RI).CreateLifetimeEnd(AI);
}
}
}
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 2619e73..b559212 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -481,7 +481,7 @@ bool llvm::wouldInstructionBeTriviallyDead(const Instruction *I,
return true;
if (II->isLifetimeStartOrEnd()) {
- auto *Arg = II->getArgOperand(1);
+ auto *Arg = II->getArgOperand(0);
if (isa<PoisonValue>(Arg))
return true;
diff --git a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
index 472c03f..1f59b17 100644
--- a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
+++ b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp
@@ -155,7 +155,7 @@ void StackInfoBuilder::visit(OptimizationRemarkEmitter &ORE,
return;
}
if (auto *II = dyn_cast<LifetimeIntrinsic>(&Inst)) {
- AllocaInst *AI = dyn_cast<AllocaInst>(II->getArgOperand(1));
+ AllocaInst *AI = dyn_cast<AllocaInst>(II->getArgOperand(0));
if (!AI ||
getAllocaInterestingness(*AI) != AllocaInterestingness::kInteresting)
return;