aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Analysis')
-rw-r--r--llvm/lib/Analysis/CMakeLists.txt1
-rw-r--r--llvm/lib/Analysis/ConstantFolding.cpp185
-rw-r--r--llvm/lib/Analysis/DXILResource.cpp135
-rw-r--r--llvm/lib/Analysis/DependenceAnalysis.cpp17
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp2
-rw-r--r--llvm/lib/Analysis/Loads.cpp2
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp99
-rw-r--r--llvm/lib/Analysis/MemoryDependenceAnalysis.cpp44
-rw-r--r--llvm/lib/Analysis/ProfileSummaryInfo.cpp14
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp35
-rw-r--r--llvm/lib/Analysis/StackLifetime.cpp5
-rw-r--r--llvm/lib/Analysis/TargetLibraryInfo.cpp28
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp6
-rw-r--r--llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp4
-rw-r--r--llvm/lib/Analysis/UniformityAnalysis.cpp1
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp18
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp30
17 files changed, 384 insertions, 242 deletions
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index cfde787..16dd6f8 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -175,6 +175,7 @@ add_llvm_component_library(LLVMAnalysis
LINK_COMPONENTS
BinaryFormat
Core
+ FrontendHLSL
Object
ProfileData
Support
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 9c1c2c6..dd98b62 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -929,12 +929,11 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
if (!AllConstantInt)
break;
- // TODO: Try to intersect two inrange attributes?
- if (!InRange) {
- InRange = GEP->getInRange();
- if (InRange)
- // Adjust inrange by offset until now.
- InRange = InRange->sextOrTrunc(BitWidth).subtract(Offset);
+ // Adjust inrange offset and intersect inrange attributes
+ if (auto GEPRange = GEP->getInRange()) {
+ auto AdjustedGEPRange = GEPRange->sextOrTrunc(BitWidth).subtract(Offset);
+ InRange =
+ InRange ? InRange->intersectWith(AdjustedGEPRange) : AdjustedGEPRange;
}
Ptr = cast<Constant>(GEP->getOperand(0));
@@ -1374,7 +1373,7 @@ Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *Inst,
if (ConstantFP *CFP = dyn_cast<ConstantFP>(Operand))
return flushDenormalConstantFP(CFP, Inst, IsOutput);
- if (isa<ConstantAggregateZero, UndefValue, ConstantExpr>(Operand))
+ if (isa<ConstantAggregateZero, UndefValue>(Operand))
return Operand;
Type *Ty = Operand->getType();
@@ -1390,6 +1389,9 @@ Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *Inst,
Ty = VecTy->getElementType();
}
+ if (isa<ConstantExpr>(Operand))
+ return Operand;
+
if (const auto *CV = dyn_cast<ConstantVector>(Operand)) {
SmallVector<Constant *, 16> NewElts;
for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
@@ -1801,6 +1803,44 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::nvvm_d2ull_rn:
case Intrinsic::nvvm_d2ull_rp:
case Intrinsic::nvvm_d2ull_rz:
+
+ // NVVM math intrinsics:
+ case Intrinsic::nvvm_ceil_d:
+ case Intrinsic::nvvm_ceil_f:
+ case Intrinsic::nvvm_ceil_ftz_f:
+
+ case Intrinsic::nvvm_fabs:
+ case Intrinsic::nvvm_fabs_ftz:
+
+ case Intrinsic::nvvm_floor_d:
+ case Intrinsic::nvvm_floor_f:
+ case Intrinsic::nvvm_floor_ftz_f:
+
+ case Intrinsic::nvvm_rcp_rm_d:
+ case Intrinsic::nvvm_rcp_rm_f:
+ case Intrinsic::nvvm_rcp_rm_ftz_f:
+ case Intrinsic::nvvm_rcp_rn_d:
+ case Intrinsic::nvvm_rcp_rn_f:
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
+ case Intrinsic::nvvm_rcp_rp_d:
+ case Intrinsic::nvvm_rcp_rp_f:
+ case Intrinsic::nvvm_rcp_rp_ftz_f:
+ case Intrinsic::nvvm_rcp_rz_d:
+ case Intrinsic::nvvm_rcp_rz_f:
+ case Intrinsic::nvvm_rcp_rz_ftz_f:
+
+ case Intrinsic::nvvm_round_d:
+ case Intrinsic::nvvm_round_f:
+ case Intrinsic::nvvm_round_ftz_f:
+
+ case Intrinsic::nvvm_saturate_d:
+ case Intrinsic::nvvm_saturate_f:
+ case Intrinsic::nvvm_saturate_ftz_f:
+
+ case Intrinsic::nvvm_sqrt_f:
+ case Intrinsic::nvvm_sqrt_rn_d:
+ case Intrinsic::nvvm_sqrt_rn_f:
+ case Intrinsic::nvvm_sqrt_rn_ftz_f:
return !Call->isStrictFP();
// Sign operations are actually bitwise operations, they do not raise
@@ -1818,6 +1858,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::nearbyint:
case Intrinsic::rint:
case Intrinsic::canonicalize:
+
// Constrained intrinsics can be folded if FP environment is known
// to compiler.
case Intrinsic::experimental_constrained_fma:
@@ -1971,16 +2012,49 @@ static APFloat FTZPreserveSign(const APFloat &V) {
return V;
}
-Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
- Type *Ty) {
+static APFloat FlushToPositiveZero(const APFloat &V) {
+ if (V.isDenormal())
+ return APFloat::getZero(V.getSemantics(), false);
+ return V;
+}
+
+static APFloat FlushWithDenormKind(const APFloat &V,
+ DenormalMode::DenormalModeKind DenormKind) {
+ assert(DenormKind != DenormalMode::DenormalModeKind::Invalid &&
+ DenormKind != DenormalMode::DenormalModeKind::Dynamic);
+ switch (DenormKind) {
+ case DenormalMode::DenormalModeKind::IEEE:
+ return V;
+ case DenormalMode::DenormalModeKind::PreserveSign:
+ return FTZPreserveSign(V);
+ case DenormalMode::DenormalModeKind::PositiveZero:
+ return FlushToPositiveZero(V);
+ default:
+ llvm_unreachable("Invalid denormal mode!");
+ }
+}
+
+Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, Type *Ty,
+ DenormalMode DenormMode = DenormalMode::getIEEE()) {
+ if (!DenormMode.isValid() ||
+ DenormMode.Input == DenormalMode::DenormalModeKind::Dynamic ||
+ DenormMode.Output == DenormalMode::DenormalModeKind::Dynamic)
+ return nullptr;
+
llvm_fenv_clearexcept();
- double Result = NativeFP(V.convertToDouble());
+ auto Input = FlushWithDenormKind(V, DenormMode.Input);
+ double Result = NativeFP(Input.convertToDouble());
if (llvm_fenv_testexcept()) {
llvm_fenv_clearexcept();
return nullptr;
}
- return GetConstantFoldFPValue(Result, Ty);
+ Constant *Output = GetConstantFoldFPValue(Result, Ty);
+ if (DenormMode.Output == DenormalMode::DenormalModeKind::IEEE)
+ return Output;
+ const auto *CFP = static_cast<ConstantFP *>(Output);
+ const auto Res = FlushWithDenormKind(CFP->getValueAPF(), DenormMode.Output);
+ return ConstantFP::get(Ty->getContext(), Res);
}
#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
@@ -2550,6 +2624,95 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return ConstantFoldFP(atan, APF, Ty);
case Intrinsic::sqrt:
return ConstantFoldFP(sqrt, APF, Ty);
+
+ // NVVM Intrinsics:
+ case Intrinsic::nvvm_ceil_ftz_f:
+ case Intrinsic::nvvm_ceil_f:
+ case Intrinsic::nvvm_ceil_d:
+ return ConstantFoldFP(
+ ceil, APF, Ty,
+ nvvm::GetNVVMDenormMode(
+ nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
+
+ case Intrinsic::nvvm_fabs_ftz:
+ case Intrinsic::nvvm_fabs:
+ return ConstantFoldFP(
+ fabs, APF, Ty,
+ nvvm::GetNVVMDenormMode(
+ nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
+
+ case Intrinsic::nvvm_floor_ftz_f:
+ case Intrinsic::nvvm_floor_f:
+ case Intrinsic::nvvm_floor_d:
+ return ConstantFoldFP(
+ floor, APF, Ty,
+ nvvm::GetNVVMDenormMode(
+ nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
+
+ case Intrinsic::nvvm_rcp_rm_ftz_f:
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
+ case Intrinsic::nvvm_rcp_rp_ftz_f:
+ case Intrinsic::nvvm_rcp_rz_ftz_f:
+ case Intrinsic::nvvm_rcp_rm_d:
+ case Intrinsic::nvvm_rcp_rm_f:
+ case Intrinsic::nvvm_rcp_rn_d:
+ case Intrinsic::nvvm_rcp_rn_f:
+ case Intrinsic::nvvm_rcp_rp_d:
+ case Intrinsic::nvvm_rcp_rp_f:
+ case Intrinsic::nvvm_rcp_rz_d:
+ case Intrinsic::nvvm_rcp_rz_f: {
+ APFloat::roundingMode RoundMode = nvvm::GetRCPRoundingMode(IntrinsicID);
+ bool IsFTZ = nvvm::RCPShouldFTZ(IntrinsicID);
+
+ auto Denominator = IsFTZ ? FTZPreserveSign(APF) : APF;
+ APFloat Res = APFloat::getOne(APF.getSemantics());
+ APFloat::opStatus Status = Res.divide(Denominator, RoundMode);
+
+ if (Status == APFloat::opOK || Status == APFloat::opInexact) {
+ if (IsFTZ)
+ Res = FTZPreserveSign(Res);
+ return ConstantFP::get(Ty->getContext(), Res);
+ }
+ return nullptr;
+ }
+
+ case Intrinsic::nvvm_round_ftz_f:
+ case Intrinsic::nvvm_round_f:
+ case Intrinsic::nvvm_round_d: {
+ // nvvm_round is lowered to PTX cvt.rni, which will round to nearest
+ // integer, choosing even integer if source is equidistant between two
+ // integers, so the semantics are closer to "rint" rather than "round".
+ bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
+ auto V = IsFTZ ? FTZPreserveSign(APF) : APF;
+ V.roundToIntegral(APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
+
+ case Intrinsic::nvvm_saturate_ftz_f:
+ case Intrinsic::nvvm_saturate_d:
+ case Intrinsic::nvvm_saturate_f: {
+ bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
+ auto V = IsFTZ ? FTZPreserveSign(APF) : APF;
+ if (V.isNegative() || V.isZero() || V.isNaN())
+ return ConstantFP::getZero(Ty);
+ APFloat One = APFloat::getOne(APF.getSemantics());
+ if (V > One)
+ return ConstantFP::get(Ty->getContext(), One);
+ return ConstantFP::get(Ty->getContext(), APF);
+ }
+
+ case Intrinsic::nvvm_sqrt_rn_ftz_f:
+ case Intrinsic::nvvm_sqrt_f:
+ case Intrinsic::nvvm_sqrt_rn_d:
+ case Intrinsic::nvvm_sqrt_rn_f:
+ if (APF.isNegative())
+ return nullptr;
+ return ConstantFoldFP(
+ sqrt, APF, Ty,
+ nvvm::GetNVVMDenormMode(
+ nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
+
+ // AMDGCN Intrinsics:
case Intrinsic::amdgcn_cos:
case Intrinsic::amdgcn_sin: {
double V = getValueAsDouble(Op);
diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp
index 2da6468..629fa7cd 100644
--- a/llvm/lib/Analysis/DXILResource.cpp
+++ b/llvm/lib/Analysis/DXILResource.cpp
@@ -995,18 +995,7 @@ SmallVector<dxil::ResourceInfo *> DXILResourceMap::findByUse(const Value *Key) {
//===----------------------------------------------------------------------===//
void DXILResourceBindingInfo::populate(Module &M, DXILResourceTypeMap &DRTM) {
- struct Binding {
- ResourceClass RC;
- uint32_t Space;
- uint32_t LowerBound;
- uint32_t UpperBound;
- Value *Name;
- Binding(ResourceClass RC, uint32_t Space, uint32_t LowerBound,
- uint32_t UpperBound, Value *Name)
- : RC(RC), Space(Space), LowerBound(LowerBound), UpperBound(UpperBound),
- Name(Name) {}
- };
- SmallVector<Binding> Bindings;
+ hlsl::BindingInfoBuilder Builder;
// collect all of the llvm.dx.resource.handlefrombinding calls;
// make a note if there is llvm.dx.resource.handlefromimplicitbinding
@@ -1036,132 +1025,20 @@ void DXILResourceBindingInfo::populate(Module &M, DXILResourceTypeMap &DRTM) {
assert((Size < 0 || (unsigned)LowerBound + Size - 1 <= UINT32_MAX) &&
"upper bound register overflow");
uint32_t UpperBound = Size < 0 ? UINT32_MAX : LowerBound + Size - 1;
- Bindings.emplace_back(RTI.getResourceClass(), Space, LowerBound,
- UpperBound, Name);
+ Builder.trackBinding(RTI.getResourceClass(), Space, LowerBound,
+ UpperBound, Name);
}
break;
}
case Intrinsic::dx_resource_handlefromimplicitbinding: {
- ImplicitBinding = true;
+ HasImplicitBinding = true;
break;
}
}
}
- // sort all the collected bindings
- llvm::stable_sort(Bindings, [](auto &LHS, auto &RHS) {
- return std::tie(LHS.RC, LHS.Space, LHS.LowerBound) <
- std::tie(RHS.RC, RHS.Space, RHS.LowerBound);
- });
-
- // remove duplicates
- Binding *NewEnd = llvm::unique(Bindings, [](auto &LHS, auto &RHS) {
- return std::tie(LHS.RC, LHS.Space, LHS.LowerBound, LHS.UpperBound,
- LHS.Name) == std::tie(RHS.RC, RHS.Space, RHS.LowerBound,
- RHS.UpperBound, RHS.Name);
- });
- if (NewEnd != Bindings.end())
- Bindings.erase(NewEnd);
-
- // Go over the sorted bindings and build up lists of free register ranges
- // for each binding type and used spaces. Bindings are sorted by resource
- // class, space, and lower bound register slot.
- BindingSpaces *BS = &SRVSpaces;
- for (const Binding &B : Bindings) {
- if (BS->RC != B.RC)
- // move to the next resource class spaces
- BS = &getBindingSpaces(B.RC);
-
- RegisterSpace *S = BS->Spaces.empty() ? &BS->Spaces.emplace_back(B.Space)
- : &BS->Spaces.back();
- assert(S->Space <= B.Space && "bindings not sorted correctly?");
- if (B.Space != S->Space)
- // add new space
- S = &BS->Spaces.emplace_back(B.Space);
-
- // the space is full - set flag to report overlapping binding later
- if (S->FreeRanges.empty()) {
- OverlappingBinding = true;
- continue;
- }
-
- // adjust the last free range lower bound, split it in two, or remove it
- BindingRange &LastFreeRange = S->FreeRanges.back();
- assert(LastFreeRange.UpperBound == UINT32_MAX);
- if (LastFreeRange.LowerBound == B.LowerBound) {
- if (B.UpperBound < UINT32_MAX)
- LastFreeRange.LowerBound = B.UpperBound + 1;
- else
- S->FreeRanges.pop_back();
- } else if (LastFreeRange.LowerBound < B.LowerBound) {
- LastFreeRange.UpperBound = B.LowerBound - 1;
- if (B.UpperBound < UINT32_MAX)
- S->FreeRanges.emplace_back(B.UpperBound + 1, UINT32_MAX);
- } else {
- OverlappingBinding = true;
- if (B.UpperBound < UINT32_MAX)
- LastFreeRange.LowerBound =
- std::max(LastFreeRange.LowerBound, B.UpperBound + 1);
- else
- S->FreeRanges.pop_back();
- }
- }
-}
-
-// returns std::nulopt if binding could not be found in given space
-std::optional<uint32_t>
-DXILResourceBindingInfo::findAvailableBinding(dxil::ResourceClass RC,
- uint32_t Space, int32_t Size) {
- BindingSpaces &BS = getBindingSpaces(RC);
- RegisterSpace &RS = BS.getOrInsertSpace(Space);
- return RS.findAvailableBinding(Size);
-}
-
-DXILResourceBindingInfo::RegisterSpace &
-DXILResourceBindingInfo::BindingSpaces::getOrInsertSpace(uint32_t Space) {
- for (auto *I = Spaces.begin(); I != Spaces.end(); ++I) {
- if (I->Space == Space)
- return *I;
- if (I->Space < Space)
- continue;
- return *Spaces.insert(I, Space);
- }
- return Spaces.emplace_back(Space);
-}
-
-std::optional<uint32_t>
-DXILResourceBindingInfo::RegisterSpace::findAvailableBinding(int32_t Size) {
- assert((Size == -1 || Size > 0) && "invalid size");
-
- if (FreeRanges.empty())
- return std::nullopt;
-
- // unbounded array
- if (Size == -1) {
- BindingRange &Last = FreeRanges.back();
- if (Last.UpperBound != UINT32_MAX)
- // this space is already occupied by an unbounded array
- return std::nullopt;
- uint32_t RegSlot = Last.LowerBound;
- FreeRanges.pop_back();
- return RegSlot;
- }
-
- // single resource or fixed-size array
- for (BindingRange &R : FreeRanges) {
- // compare the size as uint64_t to prevent overflow for range (0,
- // UINT32_MAX)
- if ((uint64_t)R.UpperBound - R.LowerBound + 1 < (uint64_t)Size)
- continue;
- uint32_t RegSlot = R.LowerBound;
- // This might create a range where (LowerBound == UpperBound + 1). When
- // that happens, the next time this function is called the range will
- // skipped over by the check above (at this point Size is always > 0).
- R.LowerBound += Size;
- return RegSlot;
- }
-
- return std::nullopt;
+ Bindings = Builder.calculateBindingInfo(
+ [this](auto, auto) { this->HasOverlappingBinding = true; });
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index dd9a44b..f1473b2 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -3383,6 +3383,10 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
SrcSubscripts, DstSubscripts))
return false;
+ assert(isLoopInvariant(SrcBase, SrcLoop) &&
+ isLoopInvariant(DstBase, DstLoop) &&
+ "Expected SrcBase and DstBase to be loop invariant");
+
int Size = SrcSubscripts.size();
LLVM_DEBUG({
dbgs() << "\nSrcSubscripts: ";
@@ -3666,6 +3670,19 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
SCEVUnionPredicate(Assume, *SE));
}
+ // Even if the base pointers are the same, they may not be loop-invariant. It
+ // could lead to incorrect results, as we're analyzing loop-carried
+ // dependencies. Src and Dst can be in different loops, so we need to check
+ // the base pointer is invariant in both loops.
+ Loop *SrcLoop = LI->getLoopFor(Src->getParent());
+ Loop *DstLoop = LI->getLoopFor(Dst->getParent());
+ if (!isLoopInvariant(SrcBase, SrcLoop) ||
+ !isLoopInvariant(DstBase, DstLoop)) {
+ LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n");
+ return std::make_unique<Dependence>(Src, Dst,
+ SCEVUnionPredicate(Assume, *SE));
+ }
+
uint64_t EltSize = SrcLoc.Size.toRaw();
const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase);
const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase);
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 82530e7..5907e21 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5366,7 +5366,7 @@ static Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
Type *MidTy = CI->getType();
Type *DstTy = Ty;
if (Src->getType() == Ty) {
- auto FirstOp = static_cast<Instruction::CastOps>(CI->getOpcode());
+ auto FirstOp = CI->getOpcode();
auto SecondOp = static_cast<Instruction::CastOps>(CastOpc);
Type *SrcIntPtrTy =
SrcTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(SrcTy) : nullptr;
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 393f264..6fc81d787 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -342,7 +342,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(
: SE.getConstantMaxBackedgeTakenCount(L);
}
const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess(
- L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr);
+ L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr, &DT, AC);
if (isa<SCEVCouldNotCompute>(AccessStart) ||
isa<SCEVCouldNotCompute>(AccessEnd))
return false;
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index f3a32d3..a553533 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -23,6 +23,8 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AssumeBundleQueries.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -208,28 +210,46 @@ static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B,
/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
/// \p MaxBTC is guaranteed inbounds of the accessed object.
-static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
- const SCEV *MaxBTC,
- const SCEV *EltSize,
- ScalarEvolution &SE,
- const DataLayout &DL) {
+static bool
+evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
+ const SCEV *MaxBTC, const SCEV *EltSize,
+ ScalarEvolution &SE, const DataLayout &DL,
+ DominatorTree *DT, AssumptionCache *AC) {
auto *PointerBase = SE.getPointerBase(AR->getStart());
auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
if (!StartPtr)
return false;
+ const Loop *L = AR->getLoop();
bool CheckForNonNull, CheckForFreed;
- uint64_t DerefBytes = StartPtr->getValue()->getPointerDereferenceableBytes(
+ Value *StartPtrV = StartPtr->getValue();
+ uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes(
DL, CheckForNonNull, CheckForFreed);
- if (CheckForNonNull || CheckForFreed)
+ if (DerefBytes && (CheckForNonNull || CheckForFreed))
return false;
const SCEV *Step = AR->getStepRecurrence(SE);
+ Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
+ const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);
+
+ // Check if we have a suitable dereferencable assumption we can use.
+ if (!StartPtrV->canBeFreed()) {
+ RetainedKnowledge DerefRK = getKnowledgeValidInContext(
+ StartPtrV, {Attribute::Dereferenceable}, *AC,
+ L->getLoopPredecessor()->getTerminator(), DT);
+ if (DerefRK) {
+ DerefBytesSCEV = SE.getUMaxExpr(
+ DerefBytesSCEV, SE.getConstant(WiderTy, DerefRK.ArgValue));
+ }
+ }
+
+ if (DerefBytesSCEV->isZero())
+ return false;
+
bool IsKnownNonNegative = SE.isKnownNonNegative(Step);
if (!IsKnownNonNegative && !SE.isKnownNegative(Step))
return false;
- Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
Step = SE.getNoopOrSignExtend(Step, WiderTy);
MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy);
@@ -256,8 +276,7 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
const SCEV *EndBytes = addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE);
if (!EndBytes)
return false;
- return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes,
- SE.getConstant(WiderTy, DerefBytes));
+ return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
}
// For negative steps check if
@@ -265,15 +284,15 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
// * StartOffset <= DerefBytes.
assert(SE.isKnownNegative(Step) && "must be known negative");
return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
- SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset,
- SE.getConstant(WiderTy, DerefBytes));
+ SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV);
}
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
const SCEV *MaxBTC, ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
- std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
+ std::pair<const SCEV *, const SCEV *>> *PointerBounds,
+ DominatorTree *DT, AssumptionCache *AC) {
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
if (PointerBounds) {
auto [Iter, Ins] = PointerBounds->insert(
@@ -308,8 +327,8 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
// sets ScEnd to the maximum unsigned value for the type. Note that LAA
// separately checks that accesses cannot not wrap, so unsigned max
// represents an upper bound.
- if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE,
- DL)) {
+ if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, DL,
+ DT, AC)) {
ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
} else {
ScEnd = SE->getAddExpr(
@@ -356,9 +375,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
bool NeedsFreeze) {
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
const SCEV *BTC = PSE.getBackedgeTakenCount();
- const auto &[ScStart, ScEnd] =
- getStartAndEndForAccess(Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC,
- PSE.getSE(), &DC.getPointerBounds());
+ const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
+ Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
+ &DC.getPointerBounds(), DC.getDT(), DC.getAC());
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
!isa<SCEVCouldNotCompute>(ScEnd) &&
"must be able to compute both start and end expressions");
@@ -589,11 +608,11 @@ void RuntimePointerChecking::groupChecks(
// dependence. Not grouping the checks for a[i] and a[i + 9000] allows
// us to perform an accurate check in this case.
//
- // The above case requires that we have an UnknownDependence between
- // accesses to the same underlying object. This cannot happen unless
- // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
- // is also false. In this case we will use the fallback path and create
- // separate checking groups for all pointers.
+ // In the above case, we have a non-constant distance and an Unknown
+ // dependence between accesses to the same underlying object, and could retry
+ // with runtime checks. Therefore UseDependencies is false. In this case we
+ // will use the fallback path and create separate checking groups for all
+ // pointers.
// If we don't have the dependency partitions, construct a new
// checking pointer group for each pointer. This is also required
@@ -819,7 +838,7 @@ public:
/// perform dependency checking.
///
/// Note that this can later be cleared if we retry memcheck analysis without
- /// dependency checking (i.e. FoundNonConstantDistanceDependence).
+ /// dependency checking (i.e. ShouldRetryWithRuntimeChecks).
bool isDependencyCheckNeeded() const { return !CheckDeps.empty(); }
/// We decided that no dependence analysis would be used. Reset the state.
@@ -896,7 +915,7 @@ private:
///
/// Note that, this is different from isDependencyCheckNeeded. When we retry
/// memcheck analysis without dependency checking
- /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
+ /// (i.e. ShouldRetryWithRuntimeChecks), isDependencyCheckNeeded is
/// cleared while this remains set if we have potentially dependent accesses.
bool IsRTCheckAnalysisNeeded = false;
@@ -1961,13 +1980,15 @@ bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
const SCEV *BTC = PSE.getBackedgeTakenCount();
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
ScalarEvolution &SE = *PSE.getSE();
- const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
- InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds);
+ const auto &[SrcStart_, SrcEnd_] =
+ getStartAndEndForAccess(InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC,
+ &SE, &PointerBounds, DT, AC);
if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_))
return false;
- const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
- InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds);
+ const auto &[SinkStart_, SinkEnd_] =
+ getStartAndEndForAccess(InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC,
+ &SE, &PointerBounds, DT, AC);
if (isa<SCEVCouldNotCompute>(SinkStart_) ||
isa<SCEVCouldNotCompute>(SinkEnd_))
return false;
@@ -2079,11 +2100,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
if (StrideAScaled == StrideBScaled)
CommonStride = StrideAScaled;
- // TODO: FoundNonConstantDistanceDependence is used as a necessary condition
- // to consider retrying with runtime checks. Historically, we did not set it
- // when (unscaled) strides were different but there is no inherent reason to.
+ // TODO: Historically, we didn't retry with runtime checks when (unscaled)
+ // strides were different but there is no inherent reason to.
if (!isa<SCEVConstant>(Dist))
- FoundNonConstantDistanceDependence |= StrideAPtrInt == StrideBPtrInt;
+ ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt;
// If distance is a SCEVCouldNotCompute, return Unknown immediately.
if (isa<SCEVCouldNotCompute>(Dist)) {
@@ -2712,7 +2732,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
DepsAreSafe =
DepChecker->areDepsSafe(DepCands, Accesses.getDependenciesToCheck());
- if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) {
+ if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeChecks()) {
LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
// Clear the dependency checks. We assume they are not needed.
@@ -3003,7 +3023,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const TargetTransformInfo *TTI,
const TargetLibraryInfo *TLI, AAResults *AA,
DominatorTree *DT, LoopInfo *LI,
- bool AllowPartial)
+ AssumptionCache *AC, bool AllowPartial)
: PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) {
unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
@@ -3013,8 +3033,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
MaxTargetVectorWidthInBits =
TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2;
- DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides,
- MaxTargetVectorWidthInBits);
+ DepChecker = std::make_unique<MemoryDepChecker>(
+ *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits);
PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
if (canAnalyzeLoop())
CanVecMem = analyzeLoop(AA, LI, TLI, DT);
@@ -3083,7 +3103,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L,
// or if it was created with a different value of AllowPartial.
if (Inserted || It->second->hasAllowPartial() != AllowPartial)
It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT,
- &LI, AllowPartial);
+ &LI, AC, AllowPartial);
return *It->second;
}
@@ -3126,7 +3146,8 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
auto &LI = FAM.getResult<LoopAnalysis>(F);
auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
- return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI);
+ auto &AC = FAM.getResult<AssumptionAnalysis>(F);
+ return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI, &AC);
}
AnalysisKey LoopAccessAnalysis::Key;
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 3aa9909..2b0f212 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -983,33 +983,37 @@ MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
static void
SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
unsigned NumSortedEntries) {
- switch (Cache.size() - NumSortedEntries) {
- case 0:
- // done, no new entries.
- break;
- case 2: {
- // Two new entries, insert the last one into place.
- NonLocalDepEntry Val = Cache.back();
- Cache.pop_back();
- MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
- std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
- Cache.insert(Entry, Val);
- [[fallthrough]];
+
+ // If only one entry, don't sort.
+ if (Cache.size() < 2)
+ return;
+
+ unsigned s = Cache.size() - NumSortedEntries;
+
+ // If the cache is already sorted, don't sort it again.
+ if (s == 0)
+ return;
+
+ // If no entry is sorted, sort the whole cache.
+ if (NumSortedEntries == 0) {
+ llvm::sort(Cache);
+ return;
}
- case 1:
- // One new entry, Just insert the new value at the appropriate position.
- if (Cache.size() != 1) {
+
+ // If the number of unsorted entires is small and the cache size is big, using
+ // insertion sort is faster. Here use Log2_32 to quickly choose the sort
+ // method.
+ if (s < Log2_32(Cache.size())) {
+ while (s > 0) {
NonLocalDepEntry Val = Cache.back();
Cache.pop_back();
MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
- llvm::upper_bound(Cache, Val);
+ std::upper_bound(Cache.begin(), Cache.end() - s + 1, Val);
Cache.insert(Entry, Val);
+ s--;
}
- break;
- default:
- // Added many values, do a full scale sort.
+ } else {
llvm::sort(Cache);
- break;
}
}
diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index e8d4e37..f1c3155 100644
--- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -121,8 +121,18 @@ void ProfileSummaryInfo::computeThresholds() {
ProfileSummaryBuilder::getHotCountThreshold(DetailedSummary);
ColdCountThreshold =
ProfileSummaryBuilder::getColdCountThreshold(DetailedSummary);
- assert(ColdCountThreshold <= HotCountThreshold &&
- "Cold count threshold cannot exceed hot count threshold!");
+ // When the hot and cold thresholds are identical, we would classify
+ // a count value as both hot and cold since we are doing an inclusive check
+ // (see ::is{Hot|Cold}Count(). To avoid this undesirable overlap, ensure the
+ // thresholds are distinct.
+ if (HotCountThreshold == ColdCountThreshold) {
+ if (ColdCountThreshold > 0)
+ (*ColdCountThreshold)--;
+ else
+ (*HotCountThreshold)++;
+ }
+ assert(ColdCountThreshold < HotCountThreshold &&
+ "Cold count threshold should be less than hot count threshold!");
if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) {
HasHugeWorkingSetSize =
HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 24adfa3..477e477 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -2682,6 +2682,20 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
return getAddExpr(NewOps, PreservedFlags);
}
}
+
+ // Try to push the constant operand into a ZExt: A + zext (-A + B) -> zext
+ // (B), if trunc (A) + -A + B does not unsigned-wrap.
+ const SCEVAddExpr *InnerAdd;
+ if (match(B, m_scev_ZExt(m_scev_Add(InnerAdd)))) {
+ const SCEV *NarrowA = getTruncateExpr(A, InnerAdd->getType());
+ if (NarrowA == getNegativeSCEV(InnerAdd->getOperand(0)) &&
+ getZeroExtendExpr(NarrowA, B->getType()) == A &&
+ hasFlags(StrengthenNoWrapFlags(this, scAddExpr, {NarrowA, InnerAdd},
+ SCEV::FlagAnyWrap),
+ SCEV::FlagNUW)) {
+ return getZeroExtendExpr(getAddExpr(NarrowA, InnerAdd), B->getType());
+ }
+ }
}
// Canonicalize (-1 * urem X, Y) + X --> (Y * X/Y)
@@ -11418,8 +11432,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred,
XNonConstOp = X;
XFlagsPresent = ExpectedFlags;
}
- if (!isa<SCEVConstant>(XConstOp) ||
- (XFlagsPresent & ExpectedFlags) != ExpectedFlags)
+ if (!isa<SCEVConstant>(XConstOp))
return false;
if (!splitBinaryAdd(Y, YConstOp, YNonConstOp, YFlagsPresent)) {
@@ -11428,13 +11441,21 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred,
YFlagsPresent = ExpectedFlags;
}
- if (!isa<SCEVConstant>(YConstOp) ||
- (YFlagsPresent & ExpectedFlags) != ExpectedFlags)
+ if (YNonConstOp != XNonConstOp)
return false;
- if (YNonConstOp != XNonConstOp)
+ if (!isa<SCEVConstant>(YConstOp))
return false;
+ // When matching ADDs with NUW flags (and unsigned predicates), only the
+ // second ADD (with the larger constant) requires NUW.
+ if ((YFlagsPresent & ExpectedFlags) != ExpectedFlags)
+ return false;
+ if (ExpectedFlags != SCEV::FlagNUW &&
+ (XFlagsPresent & ExpectedFlags) != ExpectedFlags) {
+ return false;
+ }
+
OutC1 = cast<SCEVConstant>(XConstOp)->getAPInt();
OutC2 = cast<SCEVConstant>(YConstOp)->getAPInt();
@@ -11472,7 +11493,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred,
std::swap(LHS, RHS);
[[fallthrough]];
case ICmpInst::ICMP_ULE:
- // (X + C1)<nuw> u<= (X + C2)<nuw> for C1 u<= C2.
+ // (X + C1) u<= (X + C2)<nuw> for C1 u<= C2.
if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNUW) && C1.ule(C2))
return true;
@@ -11482,7 +11503,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(CmpPredicate Pred,
std::swap(LHS, RHS);
[[fallthrough]];
case ICmpInst::ICMP_ULT:
- // (X + C1)<nuw> u< (X + C2)<nuw> if C1 u< C2.
+ // (X + C1) u< (X + C2)<nuw> if C1 u< C2.
if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNUW) && C1.ult(C2))
return true;
break;
diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp
index 21f54c7..34a7a04 100644
--- a/llvm/lib/Analysis/StackLifetime.cpp
+++ b/llvm/lib/Analysis/StackLifetime.cpp
@@ -63,10 +63,7 @@ bool StackLifetime::isAliveAfter(const AllocaInst *AI,
// markers has the same size and points to the alloca start.
static const AllocaInst *findMatchingAlloca(const IntrinsicInst &II,
const DataLayout &DL) {
- const AllocaInst *AI = findAllocaForValue(II.getArgOperand(1), true);
- if (!AI)
- return nullptr;
-
+ const AllocaInst *AI = cast<AllocaInst>(II.getArgOperand(1));
auto AllocaSize = AI->getAllocationSize(DL);
if (!AllocaSize)
return nullptr;
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index e475be2..6e92766 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -875,6 +875,34 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_toascii);
}
+ if (T.isOSFreeBSD()) {
+ TLI.setAvailable(LibFunc_dunder_strtok_r);
+ TLI.setAvailable(LibFunc_memalign);
+ TLI.setAvailable(LibFunc_fputc_unlocked);
+ TLI.setAvailable(LibFunc_fputs_unlocked);
+ TLI.setAvailable(LibFunc_fread_unlocked);
+ TLI.setAvailable(LibFunc_fwrite_unlocked);
+ TLI.setAvailable(LibFunc_getc_unlocked);
+ TLI.setAvailable(LibFunc_getchar_unlocked);
+ TLI.setAvailable(LibFunc_putc_unlocked);
+ TLI.setAvailable(LibFunc_putchar_unlocked);
+
+ TLI.setUnavailable(LibFunc___kmpc_alloc_shared);
+ TLI.setUnavailable(LibFunc___kmpc_free_shared);
+ TLI.setUnavailable(LibFunc_dunder_strndup);
+ TLI.setUnavailable(LibFunc_memccpy_chk);
+ TLI.setUnavailable(LibFunc_strlen_chk);
+ TLI.setUnavailable(LibFunc_fmaximum_num);
+ TLI.setUnavailable(LibFunc_fmaximum_numf);
+ TLI.setUnavailable(LibFunc_fmaximum_numl);
+ TLI.setUnavailable(LibFunc_fminimum_num);
+ TLI.setUnavailable(LibFunc_fminimum_numf);
+ TLI.setUnavailable(LibFunc_fminimum_numl);
+ TLI.setUnavailable(LibFunc_roundeven);
+ TLI.setUnavailable(LibFunc_roundevenf);
+ TLI.setUnavailable(LibFunc_roundevenl);
+ }
+
// As currently implemented in clang, NVPTX code has no standard library to
// speak of. Headers provide a standard-ish library implementation, but many
// of the signatures are wrong -- for example, many libm functions are not
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 8a470eb..c7eb2ec 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1423,7 +1423,7 @@ bool TargetTransformInfo::hasArmWideBranch(bool Thumb) const {
return TTIImpl->hasArmWideBranch(Thumb);
}
-uint64_t TargetTransformInfo::getFeatureMask(const Function &F) const {
+APInt TargetTransformInfo::getFeatureMask(const Function &F) const {
return TTIImpl->getFeatureMask(F);
}
@@ -1486,6 +1486,10 @@ void TargetTransformInfo::collectKernelLaunchBounds(
return TTIImpl->collectKernelLaunchBounds(F, LB);
}
+bool TargetTransformInfo::allowVectorElementIndexingUsingGEP() const {
+ return TTIImpl->allowVectorElementIndexingUsingGEP();
+}
+
TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default;
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index c871070..7025b83 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -525,6 +525,8 @@ AAMDNodes AAMDNodes::merge(const AAMDNodes &Other) const {
Result.TBAAStruct = nullptr;
Result.Scope = MDNode::getMostGenericAliasScope(Scope, Other.Scope);
Result.NoAlias = MDNode::intersect(NoAlias, Other.NoAlias);
+ Result.NoAliasAddrSpace = MDNode::getMostGenericNoaliasAddrspace(
+ NoAliasAddrSpace, Other.NoAliasAddrSpace);
return Result;
}
@@ -533,6 +535,8 @@ AAMDNodes AAMDNodes::concat(const AAMDNodes &Other) const {
Result.TBAA = Result.TBAAStruct = nullptr;
Result.Scope = MDNode::getMostGenericAliasScope(Scope, Other.Scope);
Result.NoAlias = MDNode::intersect(NoAlias, Other.NoAlias);
+ Result.NoAliasAddrSpace = MDNode::getMostGenericNoaliasAddrspace(
+ NoAliasAddrSpace, Other.NoAliasAddrSpace);
return Result;
}
diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index 15107c2..2e4063f 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -178,6 +178,7 @@ bool UniformityInfoWrapperPass::runOnFunction(Function &F) {
void UniformityInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
OS << "UniformityInfo for function '" << m_function->getName() << "':\n";
+ m_uniformityInfo.print(OS);
}
void UniformityInfoWrapperPass::releaseMemory() {
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 61a322b..af85ce4 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -7912,6 +7912,8 @@ bool llvm::intrinsicPropagatesPoison(Intrinsic::ID IID) {
case Intrinsic::ushl_sat:
case Intrinsic::smul_fix:
case Intrinsic::smul_fix_sat:
+ case Intrinsic::umul_fix:
+ case Intrinsic::umul_fix_sat:
case Intrinsic::pow:
case Intrinsic::powi:
case Intrinsic::sin:
@@ -7928,6 +7930,22 @@ bool llvm::intrinsicPropagatesPoison(Intrinsic::ID IID) {
case Intrinsic::atan2:
case Intrinsic::canonicalize:
case Intrinsic::sqrt:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::exp10:
+ case Intrinsic::log:
+ case Intrinsic::log2:
+ case Intrinsic::log10:
+ case Intrinsic::modf:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
+ case Intrinsic::round:
+ case Intrinsic::roundeven:
+ case Intrinsic::lrint:
+ case Intrinsic::llrint:
return true;
default:
return false;
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 1b3da59..b3b4c37 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -240,30 +240,6 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,
return Intrinsic::not_intrinsic;
}
-struct InterleaveIntrinsic {
- Intrinsic::ID Interleave, Deinterleave;
-};
-
-static InterleaveIntrinsic InterleaveIntrinsics[] = {
- {Intrinsic::vector_interleave2, Intrinsic::vector_deinterleave2},
- {Intrinsic::vector_interleave3, Intrinsic::vector_deinterleave3},
- {Intrinsic::vector_interleave4, Intrinsic::vector_deinterleave4},
- {Intrinsic::vector_interleave5, Intrinsic::vector_deinterleave5},
- {Intrinsic::vector_interleave6, Intrinsic::vector_deinterleave6},
- {Intrinsic::vector_interleave7, Intrinsic::vector_deinterleave7},
- {Intrinsic::vector_interleave8, Intrinsic::vector_deinterleave8},
-};
-
-Intrinsic::ID llvm::getInterleaveIntrinsicID(unsigned Factor) {
- assert(Factor >= 2 && Factor <= 8 && "Unexpected factor");
- return InterleaveIntrinsics[Factor - 2].Interleave;
-}
-
-Intrinsic::ID llvm::getDeinterleaveIntrinsicID(unsigned Factor) {
- assert(Factor >= 2 && Factor <= 8 && "Unexpected factor");
- return InterleaveIntrinsics[Factor - 2].Deinterleave;
-}
-
unsigned llvm::getInterleaveIntrinsicFactor(Intrinsic::ID ID) {
switch (ID) {
case Intrinsic::vector_interleave2:
@@ -1141,7 +1117,7 @@ Constant *
llvm::createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
const InterleaveGroup<Instruction> &Group) {
// All 1's means mask is not needed.
- if (Group.getNumMembers() == Group.getFactor())
+ if (Group.isFull())
return nullptr;
// TODO: support reversed access.
@@ -1687,7 +1663,7 @@ void InterleavedAccessInfo::analyzeInterleaving(
// Case 1: A full group. Can Skip the checks; For full groups, if the wide
// load would wrap around the address space we would do a memory access at
// nullptr even without the transformation.
- if (Group->getNumMembers() == Group->getFactor())
+ if (Group->isFull())
continue;
// Case 2: If first and last members of the group don't wrap this implies
@@ -1722,7 +1698,7 @@ void InterleavedAccessInfo::analyzeInterleaving(
// Case 1: A full group. Can Skip the checks; For full groups, if the wide
// store would wrap around the address space we would do a memory access at
// nullptr even without the transformation.
- if (Group->getNumMembers() == Group->getFactor())
+ if (Group->isFull())
continue;
// Interleave-store-group with gaps is implemented using masked wide store.