aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp12
-rw-r--r--llvm/lib/Transforms/IPO/LowerTypeTests.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp78
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp25
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp23
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h3
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp6
-rw-r--r--llvm/lib/Transforms/Instrumentation/AllocToken.cpp75
-rw-r--r--llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/BasicBlockUtils.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/PredicateInfo.cpp13
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp19
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp1
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h12
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp118
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.h22
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanValue.h10
17 files changed, 223 insertions, 207 deletions
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index bbbac45..7a95df4 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -907,10 +907,20 @@ static bool mergeConsecutivePartStores(ArrayRef<PartStore> Parts,
StoreInst *Store = Builder.CreateAlignedStore(
Val, First.Store->getPointerOperand(), First.Store->getAlign());
+ // Merge various metadata onto the new store.
AAMDNodes AATags = First.Store->getAAMetadata();
- for (const PartStore &Part : drop_begin(Parts))
+ SmallVector<Instruction *> Stores = {First.Store};
+ Stores.reserve(Parts.size());
+ SmallVector<DebugLoc> DbgLocs = {First.Store->getDebugLoc()};
+ DbgLocs.reserve(Parts.size());
+ for (const PartStore &Part : drop_begin(Parts)) {
AATags = AATags.concat(Part.Store->getAAMetadata());
+ Stores.push_back(Part.Store);
+ DbgLocs.push_back(Part.Store->getDebugLoc());
+ }
Store->setAAMetadata(AATags);
+ Store->mergeDIAssignID(Stores);
+ Store->setDebugLoc(DebugLoc::getMergedLocations(DbgLocs));
// Remove the old stores.
for (const PartStore &Part : Parts)
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 46fb567..aa1346d 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1271,7 +1271,7 @@ bool LowerTypeTestsModule::hasBranchTargetEnforcement() {
// the module flags.
if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
M.getModuleFlag("branch-target-enforcement")))
- HasBranchTargetEnforcement = (BTE->getZExtValue() != 0);
+ HasBranchTargetEnforcement = !BTE->isZero();
else
HasBranchTargetEnforcement = 0;
}
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 76e588b..a0f7ec6 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -24,7 +24,8 @@
// returns 0, or a single vtable's function returns 1, replace each virtual
// call with a comparison of the vptr against that vtable's address.
//
-// This pass is intended to be used during the regular and thin LTO pipelines:
+// This pass is intended to be used during the regular/thin and non-LTO
+// pipelines:
//
// During regular LTO, the pass determines the best optimization for each
// virtual call and applies the resolutions directly to virtual calls that are
@@ -48,6 +49,14 @@
// is supported.
// - Import phase: (same as with hybrid case above).
//
+// During Speculative devirtualization mode -not restricted to LTO-:
+// - The pass applies speculative devirtualization without requiring any type of
+// visibility.
+// - Skips other features like virtual constant propagation, uniform return
+// value optimization, unique return value optimization and branch funnels as
+// they need LTO.
+// - This mode is enabled via 'devirtualize-speculatively' flag.
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -61,7 +70,9 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/BitcodeWriter.h"
@@ -145,6 +156,13 @@ static cl::opt<std::string> ClWriteSummary(
"bitcode, otherwise YAML"),
cl::Hidden);
+// TODO: This option eventually should support any public visibility vtables
+// with/out LTO.
+static cl::opt<bool> ClDevirtualizeSpeculatively(
+ "devirtualize-speculatively",
+ cl::desc("Enable speculative devirtualization optimization"),
+ cl::init(false));
+
static cl::opt<unsigned>
ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
cl::init(10),
@@ -892,6 +910,8 @@ void llvm::updatePublicTypeTestCalls(Module &M,
CI->eraseFromParent();
}
} else {
+ // TODO: Don't replace public type tests when speculative devirtualization
+ // gets enabled in LTO mode.
auto *True = ConstantInt::getTrue(M.getContext());
for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
auto *CI = cast<CallInst>(U.getUser());
@@ -1083,10 +1103,10 @@ bool DevirtModule::tryFindVirtualCallTargets(
if (!TM.Bits->GV->isConstant())
return false;
- // We cannot perform whole program devirtualization analysis on a vtable
- // with public LTO visibility.
- if (TM.Bits->GV->getVCallVisibility() ==
- GlobalObject::VCallVisibilityPublic)
+ // Without ClDevirtualizeSpeculatively, we cannot perform whole program
+ // devirtualization analysis on a vtable with public LTO visibility.
+ if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() ==
+ GlobalObject::VCallVisibilityPublic)
return false;
Function *Fn = nullptr;
@@ -1105,6 +1125,12 @@ bool DevirtModule::tryFindVirtualCallTargets(
if (Fn->getName() == "__cxa_pure_virtual")
continue;
+ // In most cases empty functions will be overridden by the
+ // implementation of the derived class, so we can skip them.
+ if (ClDevirtualizeSpeculatively && Fn->getReturnType()->isVoidTy() &&
+ Fn->getInstructionCount() <= 1)
+ continue;
+
// We can disregard unreachable functions as possible call targets, as
// unreachable functions shouldn't be called.
if (mustBeUnreachableFunction(Fn, ExportSummary))
@@ -1223,10 +1249,12 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
CallTrap->setDebugLoc(CB.getDebugLoc());
}
- // If fallback checking is enabled, add support to compare the virtual
- // function pointer to the devirtualized target. In case of a mismatch,
- // fall back to indirect call.
- if (DevirtCheckMode == WPDCheckMode::Fallback) {
+ // If fallback checking or speculative devirtualization are enabled,
+ // add support to compare the virtual function pointer to the
+ // devirtualized target. In case of a mismatch, fall back to indirect
+ // call.
+ if (DevirtCheckMode == WPDCheckMode::Fallback ||
+ ClDevirtualizeSpeculatively) {
MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights();
// Version the indirect call site. If the called value is equal to the
// given callee, 'NewInst' will be executed, otherwise the original call
@@ -2057,15 +2085,15 @@ void DevirtModule::scanTypeTestUsers(
Function *TypeTestFunc,
DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
// Find all virtual calls via a virtual table pointer %p under an assumption
- // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
- // points to a member of the type identifier %md. Group calls by (type ID,
- // offset) pair (effectively the identity of the virtual function) and store
- // to CallSlots.
+ // of the form llvm.assume(llvm.type.test(%p, %md)) or
+ // llvm.assume(llvm.public.type.test(%p, %md)).
+ // This indicates that %p points to a member of the type identifier %md.
+ // Group calls by (type ID, offset) pair (effectively the identity of the
+ // virtual function) and store to CallSlots.
for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
auto *CI = dyn_cast<CallInst>(U.getUser());
if (!CI)
continue;
-
// Search for virtual calls based on %p and add them to DevirtCalls.
SmallVector<DevirtCallSite, 1> DevirtCalls;
SmallVector<CallInst *, 1> Assumes;
@@ -2348,6 +2376,12 @@ bool DevirtModule::run() {
(ImportSummary && ImportSummary->partiallySplitLTOUnits()))
return false;
+ Function *PublicTypeTestFunc = nullptr;
+ // If we are in speculative devirtualization mode, we can work on the public
+ // type test intrinsics.
+ if (ClDevirtualizeSpeculatively)
+ PublicTypeTestFunc =
+ Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
Function *TypeTestFunc =
Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
Function *TypeCheckedLoadFunc =
@@ -2361,8 +2395,9 @@ bool DevirtModule::run() {
// module, this pass has nothing to do. But if we are exporting, we also need
// to handle any users that appear only in the function summaries.
if (!ExportSummary &&
- (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
- AssumeFunc->use_empty()) &&
+ (((!PublicTypeTestFunc || PublicTypeTestFunc->use_empty()) &&
+ (!TypeTestFunc || TypeTestFunc->use_empty())) ||
+ !AssumeFunc || AssumeFunc->use_empty()) &&
(!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
(!TypeCheckedLoadRelativeFunc ||
TypeCheckedLoadRelativeFunc->use_empty()))
@@ -2373,6 +2408,9 @@ bool DevirtModule::run() {
DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
buildTypeIdentifierMap(Bits, TypeIdMap);
+ if (PublicTypeTestFunc && AssumeFunc)
+ scanTypeTestUsers(PublicTypeTestFunc, TypeIdMap);
+
if (TypeTestFunc && AssumeFunc)
scanTypeTestUsers(TypeTestFunc, TypeIdMap);
@@ -2472,8 +2510,12 @@ bool DevirtModule::run() {
.WPDRes[S.first.ByteOffset];
if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
S.first.ByteOffset, ExportSummary)) {
-
- if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
+ bool SingleImplDevirt =
+ trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
+ // Out of speculative devirtualization mode, Try to apply virtual constant
+ // propagation or branch funneling.
+ // TODO: This should eventually be enabled for non-public type tests.
+ if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) {
DidVirtualConstProp |=
tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 73ec451..9bee523 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2760,21 +2760,34 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
// Optimize pointer differences into the same array into a size. Consider:
// &A[10] - &A[0]: we should compile this to "10".
Value *LHSOp, *RHSOp;
- if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
- match(Op1, m_PtrToInt(m_Value(RHSOp))))
+ if (match(Op0, m_PtrToIntOrAddr(m_Value(LHSOp))) &&
+ match(Op1, m_PtrToIntOrAddr(m_Value(RHSOp))))
if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType(),
I.hasNoUnsignedWrap()))
return replaceInstUsesWith(I, Res);
// trunc(p)-trunc(q) -> trunc(p-q)
- if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
- match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
+ if (match(Op0, m_Trunc(m_PtrToIntOrAddr(m_Value(LHSOp)))) &&
+ match(Op1, m_Trunc(m_PtrToIntOrAddr(m_Value(RHSOp)))))
if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType(),
/* IsNUW */ false))
return replaceInstUsesWith(I, Res);
- if (match(Op0, m_ZExt(m_PtrToIntSameSize(DL, m_Value(LHSOp)))) &&
- match(Op1, m_ZExtOrSelf(m_PtrToInt(m_Value(RHSOp))))) {
+ auto MatchSubOfZExtOfPtrToIntOrAddr = [&]() {
+ if (match(Op0, m_ZExt(m_PtrToIntSameSize(DL, m_Value(LHSOp)))) &&
+ match(Op1, m_ZExt(m_PtrToIntSameSize(DL, m_Value(RHSOp)))))
+ return true;
+ if (match(Op0, m_ZExt(m_PtrToAddr(m_Value(LHSOp)))) &&
+ match(Op1, m_ZExt(m_PtrToAddr(m_Value(RHSOp)))))
+ return true;
+ // Special case for non-canonical ptrtoint in constant expression,
+ // where the zext has been folded into the ptrtoint.
+ if (match(Op0, m_ZExt(m_PtrToIntSameSize(DL, m_Value(LHSOp)))) &&
+ match(Op1, m_PtrToInt(m_Value(RHSOp))))
+ return true;
+ return false;
+ };
+ if (MatchSubOfZExtOfPtrToIntOrAddr()) {
if (auto *GEP = dyn_cast<GEPOperator>(LHSOp)) {
if (GEP->getPointerOperand() == RHSOp) {
if (GEP->hasNoUnsignedWrap() || GEP->hasNoUnsignedSignedWrap()) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index dab200d..669d4f0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -4003,18 +4003,29 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// Try to fold intrinsic into select/phi operands. This is legal if:
// * The intrinsic is speculatable.
- // * The select condition is not a vector, or the intrinsic does not
- // perform cross-lane operations.
- if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI) &&
- isNotCrossLaneOperation(II))
+ // * The operand is one of the following:
+ // - a phi.
+ // - a select with a scalar condition.
+ // - a select with a vector condition and II is not a cross lane operation.
+ if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI)) {
for (Value *Op : II->args()) {
- if (auto *Sel = dyn_cast<SelectInst>(Op))
- if (Instruction *R = FoldOpIntoSelect(*II, Sel))
+ if (auto *Sel = dyn_cast<SelectInst>(Op)) {
+ bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
+ if (IsVectorCond && !isNotCrossLaneOperation(II))
+ continue;
+ // Don't replace a scalar select with a more expensive vector select if
+ // we can't simplify both arms of the select.
+ bool SimplifyBothArms =
+ !Op->getType()->isVectorTy() && II->getType()->isVectorTy();
+ if (Instruction *R = FoldOpIntoSelect(
+ *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
return R;
+ }
if (auto *Phi = dyn_cast<PHINode>(Op))
if (Instruction *R = foldOpIntoPhi(*II, Phi))
return R;
}
+ }
if (Instruction *Shuf = foldShuffledIntrinsicOperands(II))
return Shuf;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 943c223..ede73f8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -664,7 +664,8 @@ public:
/// This also works for Cast instructions, which obviously do not have a
/// second operand.
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
- bool FoldWithMultiUse = false);
+ bool FoldWithMultiUse = false,
+ bool SimplifyBothArms = false);
/// This is a convenience wrapper function for the above two functions.
Instruction *foldBinOpIntoSelectOrPhi(BinaryOperator &I);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 3f11cae..67e2aae 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1777,7 +1777,8 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
}
Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
- bool FoldWithMultiUse) {
+ bool FoldWithMultiUse,
+ bool SimplifyBothArms) {
// Don't modify shared select instructions unless set FoldWithMultiUse
if (!SI->hasOneUse() && !FoldWithMultiUse)
return nullptr;
@@ -1821,6 +1822,9 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
if (!NewTV && !NewFV)
return nullptr;
+ if (SimplifyBothArms && !(NewTV && NewFV))
+ return nullptr;
+
// Create an instruction for the arm that did not fold.
if (!NewTV)
NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
index 29968b8..8181e4e 100644
--- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
+#include "llvm/Support/AllocToken.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -54,47 +55,14 @@
#include <variant>
using namespace llvm;
+using TokenMode = AllocTokenMode;
#define DEBUG_TYPE "alloc-token"
namespace {
-//===--- Constants --------------------------------------------------------===//
-
-enum class TokenMode : unsigned {
- /// Incrementally increasing token ID.
- Increment = 0,
-
- /// Simple mode that returns a statically-assigned random token ID.
- Random = 1,
-
- /// Token ID based on allocated type hash.
- TypeHash = 2,
-
- /// Token ID based on allocated type hash, where the top half ID-space is
- /// reserved for types that contain pointers and the bottom half for types
- /// that do not contain pointers.
- TypeHashPointerSplit = 3,
-};
-
//===--- Command-line options ---------------------------------------------===//
-cl::opt<TokenMode> ClMode(
- "alloc-token-mode", cl::Hidden, cl::desc("Token assignment mode"),
- cl::init(TokenMode::TypeHashPointerSplit),
- cl::values(
- clEnumValN(TokenMode::Increment, "increment",
- "Incrementally increasing token ID"),
- clEnumValN(TokenMode::Random, "random",
- "Statically-assigned random token ID"),
- clEnumValN(TokenMode::TypeHash, "typehash",
- "Token ID based on allocated type hash"),
- clEnumValN(
- TokenMode::TypeHashPointerSplit, "typehashpointersplit",
- "Token ID based on allocated type hash, where the top half "
- "ID-space is reserved for types that contain pointers and the "
- "bottom half for types that do not contain pointers. ")));
-
cl::opt<std::string> ClFuncPrefix("alloc-token-prefix",
cl::desc("The allocation function prefix"),
cl::Hidden, cl::init("__alloc_token_"));
@@ -217,22 +185,19 @@ public:
using ModeBase::ModeBase;
uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
- const auto [N, H] = getHash(CB, ORE);
- return N ? boundedToken(H) : H;
- }
-protected:
- std::pair<MDNode *, uint64_t> getHash(const CallBase &CB,
- OptimizationRemarkEmitter &ORE) {
if (MDNode *N = getAllocTokenMetadata(CB)) {
MDString *S = cast<MDString>(N->getOperand(0));
- return {N, getStableSipHash(S->getString())};
+ AllocTokenMetadata Metadata{S->getString(), containsPointer(N)};
+ if (auto Token = getAllocToken(TokenMode::TypeHash, Metadata, MaxTokens))
+ return *Token;
}
// Fallback.
remarkNoMetadata(CB, ORE);
- return {nullptr, ClFallbackToken};
+ return ClFallbackToken;
}
+protected:
/// Remark that there was no precise type information.
static void remarkNoMetadata(const CallBase &CB,
OptimizationRemarkEmitter &ORE) {
@@ -253,20 +218,18 @@ public:
using TypeHashMode::TypeHashMode;
uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
- if (MaxTokens == 1)
- return 0;
- const uint64_t HalfTokens = MaxTokens / 2;
- const auto [N, H] = getHash(CB, ORE);
- if (!N) {
- // Pick the fallback token (ClFallbackToken), which by default is 0,
- // meaning it'll fall into the pointer-less bucket. Override by setting
- // -alloc-token-fallback if that is the wrong choice.
- return H;
+ if (MDNode *N = getAllocTokenMetadata(CB)) {
+ MDString *S = cast<MDString>(N->getOperand(0));
+ AllocTokenMetadata Metadata{S->getString(), containsPointer(N)};
+ if (auto Token = getAllocToken(TokenMode::TypeHashPointerSplit, Metadata,
+ MaxTokens))
+ return *Token;
}
- uint64_t Hash = H % HalfTokens; // base hash
- if (containsPointer(N))
- Hash += HalfTokens;
- return Hash;
+ // Pick the fallback token (ClFallbackToken), which by default is 0, meaning
+ // it'll fall into the pointer-less bucket. Override by setting
+ // -alloc-token-fallback if that is the wrong choice.
+ remarkNoMetadata(CB, ORE);
+ return ClFallbackToken;
}
};
@@ -286,7 +249,7 @@ public:
: Options(transformOptionsFromCl(std::move(Opts))), Mod(M),
FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
Mode(IncrementMode(*IntPtrTy, *Options.MaxTokens)) {
- switch (ClMode.getValue()) {
+ switch (Options.Mode) {
case TokenMode::Increment:
break;
case TokenMode::Random:
diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
index d18c0d0..80e77e09 100644
--- a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
@@ -2020,7 +2020,6 @@ static void moveFastMathFlags(Function &F,
F.removeFnAttr(attr); \
FMF.set##setter(); \
}
- MOVE_FLAG("unsafe-fp-math", Fast)
MOVE_FLAG("no-infs-fp-math", NoInfs)
MOVE_FLAG("no-nans-fp-math", NoNaNs)
MOVE_FLAG("no-signed-zeros-fp-math", NoSignedZeros)
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 8714741a..9829d4d 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -1793,3 +1793,13 @@ bool llvm::hasOnlySimpleTerminator(const Function &F) {
}
return true;
}
+
+Printable llvm::printBasicBlock(const BasicBlock *BB) {
+ return Printable([BB](raw_ostream &OS) {
+ if (!BB) {
+ OS << "<nullptr>";
+ return;
+ }
+ BB->printAsOperand(OS);
+ });
+}
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 978d5a2..371d9e6 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -260,9 +260,16 @@ bool PredicateInfoBuilder::stackIsInScope(const ValueDFSStack &Stack,
// next to the defs they must go with so that we can know it's time to pop
// the stack when we hit the end of the phi uses for a given def.
const ValueDFS &Top = *Stack.back().V;
- if (Top.LocalNum == LN_Last && Top.PInfo) {
- if (!VDUse.U)
- return false;
+ assert(Top.PInfo && "RenameStack should only contain predicate infos (defs)");
+ if (Top.LocalNum == LN_Last) {
+ if (!VDUse.U) {
+ assert(VDUse.PInfo && "A non-use VDUse should have a predicate info");
+ // We should reserve adjacent LN_Last defs for the same phi use.
+ return VDUse.LocalNum == LN_Last &&
+ // If the two phi defs have the same edge, they must be designated
+ // for the same succ BB.
+ getBlockEdge(Top.PInfo) == getBlockEdge(VDUse.PInfo);
+ }
auto *PHI = dyn_cast<PHINode>(VDUse.U->getUser());
if (!PHI)
return false;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index adf27be..3356516 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7231,6 +7231,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
return DenseMap<const SCEV *, Value *>();
}
+ VPlanTransforms::narrowInterleaveGroups(
+ BestVPlan, BestVF,
+ TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
VPlanTransforms::removeDeadRecipes(BestVPlan);
VPlanTransforms::convertToConcreteRecipes(BestVPlan);
@@ -8199,10 +8202,6 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
if (CM.foldTailWithEVL())
VPlanTransforms::runPass(VPlanTransforms::addExplicitVectorLength,
*Plan, CM.getMaxSafeElements());
-
- if (auto P = VPlanTransforms::narrowInterleaveGroups(*Plan, TTI))
- VPlans.push_back(std::move(P));
-
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
VPlans.push_back(std::move(Plan));
}
@@ -9860,6 +9859,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Get user vectorization factor and interleave count.
ElementCount UserVF = Hints.getWidth();
unsigned UserIC = Hints.getInterleave();
+ if (UserIC > 1 && !LVL.isSafeForAnyVectorWidth())
+ UserIC = 1;
// Plan how to best vectorize.
LVP.plan(UserVF, UserIC);
@@ -9924,7 +9925,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VectorizeLoop = false;
}
- if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) {
+ if (UserIC == 1 && Hints.getInterleave() > 1) {
+ assert(!LVL.isSafeForAnyVectorWidth() &&
+ "UserIC should only be ignored due to unsafe dependencies");
+ LLVM_DEBUG(dbgs() << "LV: Ignoring user-specified interleave count.\n");
+ IntDiagMsg = {"InterleavingUnsafe",
+ "Ignoring user-specified interleave count due to possibly "
+ "unsafe dependencies in the loop."};
+ InterleaveLoop = false;
+ } else if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) {
// Tell the user interleaving was avoided up-front, despite being explicitly
// requested.
LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and "
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index c95c887..428a8f4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1191,7 +1191,6 @@ VPlan *VPlan::duplicate() {
}
Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
Old2NewVPValues[&VF] = &NewPlan->VF;
- Old2NewVPValues[&UF] = &NewPlan->UF;
Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
if (BackedgeTakenCount) {
NewPlan->BackedgeTakenCount = new VPValue();
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 167ba55..06bea2f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -4152,9 +4152,6 @@ class VPlan {
/// Represents the vectorization factor of the loop.
VPValue VF;
- /// Represents the symbolic unroll factor of the loop.
- VPValue UF;
-
/// Represents the loop-invariant VF * UF of the vector loop region.
VPValue VFxUF;
@@ -4308,9 +4305,6 @@ public:
VPValue &getVF() { return VF; };
const VPValue &getVF() const { return VF; };
- /// Returns the symbolic UF of the vector loop region.
- VPValue &getSymbolicUF() { return UF; };
-
/// Returns VF * UF of the vector loop region.
VPValue &getVFxUF() { return VFxUF; }
@@ -4320,12 +4314,6 @@ public:
void addVF(ElementCount VF) { VFs.insert(VF); }
- /// Remove \p VF from the plan.
- void removeVF(ElementCount VF) {
- assert(hasVF(VF) && "tried to remove VF not present in plan");
- VFs.remove(VF);
- }
-
void setVF(ElementCount VF) {
assert(hasVF(VF) && "Cannot set VF not already in plan");
VFs.clear();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index ff25ef5..f5a3af4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3956,9 +3956,6 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
// used.
// TODO: Assert that they aren't used.
- VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF()));
- Plan.getSymbolicUF().replaceAllUsesWith(UF);
-
// If there are no users of the runtime VF, compute VFxUF by constant folding
// the multiplication of VF and UF.
if (VF.getNumUsers() == 0) {
@@ -3978,6 +3975,7 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
}
VF.replaceAllUsesWith(RuntimeVF);
+ VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF()));
VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF});
VFxUF.replaceAllUsesWith(MulByUF);
}
@@ -4045,14 +4043,14 @@ static bool canNarrowLoad(VPWidenRecipe *WideMember0, unsigned OpIdx,
return false;
}
-/// Returns VF from \p VFs if \p IR is a full interleave group with factor and
-/// number of members both equal to VF. The interleave group must also access
-/// the full vector width.
-static std::optional<ElementCount> isConsecutiveInterleaveGroup(
- VPInterleaveRecipe *InterleaveR, ArrayRef<ElementCount> VFs,
- VPTypeAnalysis &TypeInfo, const TargetTransformInfo &TTI) {
- if (!InterleaveR)
- return std::nullopt;
+/// Returns true if \p IR is a full interleave group with factor and number of
+/// members both equal to \p VF. The interleave group must also access the full
+/// vector width \p VectorRegWidth.
+static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR,
+ unsigned VF, VPTypeAnalysis &TypeInfo,
+ unsigned VectorRegWidth) {
+ if (!InterleaveR || InterleaveR->getMask())
+ return false;
Type *GroupElementTy = nullptr;
if (InterleaveR->getStoredValues().empty()) {
@@ -4061,7 +4059,7 @@ static std::optional<ElementCount> isConsecutiveInterleaveGroup(
[&TypeInfo, GroupElementTy](VPValue *Op) {
return TypeInfo.inferScalarType(Op) == GroupElementTy;
}))
- return std::nullopt;
+ return false;
} else {
GroupElementTy =
TypeInfo.inferScalarType(InterleaveR->getStoredValues()[0]);
@@ -4069,27 +4067,13 @@ static std::optional<ElementCount> isConsecutiveInterleaveGroup(
[&TypeInfo, GroupElementTy](VPValue *Op) {
return TypeInfo.inferScalarType(Op) == GroupElementTy;
}))
- return std::nullopt;
+ return false;
}
- auto GetVectorWidthForVF = [&TTI](ElementCount VF) {
- TypeSize Size = TTI.getRegisterBitWidth(
- VF.isFixed() ? TargetTransformInfo::RGK_FixedWidthVector
- : TargetTransformInfo::RGK_ScalableVector);
- assert(Size.isScalable() == VF.isScalable() &&
- "if Size is scalable, VF must to and vice versa");
- return Size.getKnownMinValue();
- };
-
- for (ElementCount VF : VFs) {
- unsigned MinVal = VF.getKnownMinValue();
- unsigned GroupSize = GroupElementTy->getScalarSizeInBits() * MinVal;
- auto IG = InterleaveR->getInterleaveGroup();
- if (IG->getFactor() == MinVal && IG->getNumMembers() == MinVal &&
- GroupSize == GetVectorWidthForVF(VF))
- return {VF};
- }
- return std::nullopt;
+ unsigned GroupSize = GroupElementTy->getScalarSizeInBits() * VF;
+ auto IG = InterleaveR->getInterleaveGroup();
+ return IG->getFactor() == VF && IG->getNumMembers() == VF &&
+ GroupSize == VectorRegWidth;
}
/// Returns true if \p VPValue is a narrow VPValue.
@@ -4100,18 +4084,16 @@ static bool isAlreadyNarrow(VPValue *VPV) {
return RepR && RepR->isSingleScalar();
}
-std::unique_ptr<VPlan>
-VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
- const TargetTransformInfo &TTI) {
- using namespace llvm::VPlanPatternMatch;
+void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
+ unsigned VectorRegWidth) {
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
-
if (!VectorLoop)
- return nullptr;
+ return;
VPTypeAnalysis TypeInfo(Plan);
+
+ unsigned VFMinVal = VF.getKnownMinValue();
SmallVector<VPInterleaveRecipe *> StoreGroups;
- std::optional<ElementCount> VFToOptimize;
for (auto &R : *VectorLoop->getEntryBasicBlock()) {
if (isa<VPCanonicalIVPHIRecipe>(&R) || match(&R, m_BranchOnCount()))
continue;
@@ -4125,33 +4107,30 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
// * recipes writing to memory except interleave groups
// Only support plans with a canonical induction phi.
if (R.isPhi())
- return nullptr;
+ return;
auto *InterleaveR = dyn_cast<VPInterleaveRecipe>(&R);
if (R.mayWriteToMemory() && !InterleaveR)
- return nullptr;
+ return;
+
+ // Do not narrow interleave groups if there are VectorPointer recipes and
+ // the plan was unrolled. The recipe implicitly uses VF from
+ // VPTransformState.
+ // TODO: Remove restriction once the VF for the VectorPointer offset is
+ // modeled explicitly as operand.
+ if (isa<VPVectorPointerRecipe>(&R) && Plan.getUF() > 1)
+ return;
// All other ops are allowed, but we reject uses that cannot be converted
// when checking all allowed consumers (store interleave groups) below.
if (!InterleaveR)
continue;
- // Try to find a single VF, where all interleave groups are consecutive and
- // saturate the full vector width. If we already have a candidate VF, check
- // if it is applicable for the current InterleaveR, otherwise look for a
- // suitable VF across the Plans VFs.
- //
- if (VFToOptimize) {
- if (!isConsecutiveInterleaveGroup(InterleaveR, {*VFToOptimize}, TypeInfo,
- TTI))
- return nullptr;
- } else {
- if (auto VF = isConsecutiveInterleaveGroup(
- InterleaveR, to_vector(Plan.vectorFactors()), TypeInfo, TTI))
- VFToOptimize = *VF;
- else
- return nullptr;
- }
+ // Bail out on non-consecutive interleave groups.
+ if (!isConsecutiveInterleaveGroup(InterleaveR, VFMinVal, TypeInfo,
+ VectorRegWidth))
+ return;
+
// Skip read interleave groups.
if (InterleaveR->getStoredValues().empty())
continue;
@@ -4185,34 +4164,24 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
auto *WideMember0 = dyn_cast_or_null<VPWidenRecipe>(
InterleaveR->getStoredValues()[0]->getDefiningRecipe());
if (!WideMember0)
- return nullptr;
+ return;
for (const auto &[I, V] : enumerate(InterleaveR->getStoredValues())) {
auto *R = dyn_cast_or_null<VPWidenRecipe>(V->getDefiningRecipe());
if (!R || R->getOpcode() != WideMember0->getOpcode() ||
R->getNumOperands() > 2)
- return nullptr;
+ return;
if (any_of(enumerate(R->operands()),
[WideMember0, Idx = I](const auto &P) {
const auto &[OpIdx, OpV] = P;
return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
}))
- return nullptr;
+ return;
}
StoreGroups.push_back(InterleaveR);
}
if (StoreGroups.empty())
- return nullptr;
-
- // All interleave groups in Plan can be narrowed for VFToOptimize. Split the
- // original Plan into 2: a) a new clone which contains all VFs of Plan, except
- // VFToOptimize, and b) the original Plan with VFToOptimize as single VF.
- std::unique_ptr<VPlan> NewPlan;
- if (size(Plan.vectorFactors()) != 1) {
- NewPlan = std::unique_ptr<VPlan>(Plan.duplicate());
- Plan.setVF(*VFToOptimize);
- NewPlan->removeVF(*VFToOptimize);
- }
+ return;
// Convert InterleaveGroup \p R to a single VPWidenLoadRecipe.
SmallPtrSet<VPValue *, 4> NarrowedOps;
@@ -4283,8 +4252,9 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
VPBuilder PHBuilder(Plan.getVectorPreheader());
- VPValue *UF = &Plan.getSymbolicUF();
- if (VFToOptimize->isScalable()) {
+ VPValue *UF = Plan.getOrAddLiveIn(
+ ConstantInt::get(CanIV->getScalarType(), 1 * Plan.getUF()));
+ if (VF.isScalable()) {
VPValue *VScale = PHBuilder.createElementCount(
CanIV->getScalarType(), ElementCount::getScalable(1));
VPValue *VScaleUF = PHBuilder.createNaryOp(Instruction::Mul, {VScale, UF});
@@ -4296,10 +4266,6 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
}
removeDeadRecipes(Plan);
- assert(none_of(*VectorLoop->getEntryBasicBlock(),
- IsaPred<VPVectorPointerRecipe>) &&
- "All VPVectorPointerRecipes should have been removed");
- return NewPlan;
}
/// Add branch weight metadata, if the \p Plan's middle block is terminated by a
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index ca8d956..b28559b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -341,20 +341,14 @@ struct VPlanTransforms {
static DenseMap<const SCEV *, Value *> expandSCEVs(VPlan &Plan,
ScalarEvolution &SE);
- /// Try to find a single VF among \p Plan's VFs for which all interleave
- /// groups (with known minimum VF elements) can be replaced by wide loads and
- /// stores processing VF elements, if all transformed interleave groups access
- /// the full vector width (checked via the maximum vector register width). If
- /// the transformation can be applied, the original \p Plan will be split in
- /// 2:
- /// 1. The original Plan with the single VF containing the optimized recipes
- /// using wide loads instead of interleave groups.
- /// 2. A new clone which contains all VFs of Plan except the optimized VF.
- ///
- /// This effectively is a very simple form of loop-aware SLP, where we use
- /// interleave groups to identify candidates.
- static std::unique_ptr<VPlan>
- narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI);
+ /// Try to convert a plan with interleave groups with VF elements to a plan
+ /// with the interleave groups replaced by wide loads and stores processing VF
+ /// elements, if all transformed interleave groups access the full vector
+ /// width (checked via \o VectorRegWidth). This effectively is a very simple
+ /// form of loop-aware SLP, where we use interleave groups to identify
+ /// candidates.
+ static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
+ unsigned VectorRegWidth);
/// Predicate and linearize the control-flow in the only loop region of
/// \p Plan. If \p FoldTail is true, create a mask guarding the loop
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 0678bc90..83e3fca 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -41,10 +41,10 @@ class VPRecipeBase;
class VPInterleaveBase;
class VPPhiAccessors;
-// This is the base class of the VPlan Def/Use graph, used for modeling the data
-// flow into, within and out of the VPlan. VPValues can stand for live-ins
-// coming from the input IR and instructions which VPlan will generate if
-// executed.
+/// This is the base class of the VPlan Def/Use graph, used for modeling the
+/// data flow into, within and out of the VPlan. VPValues can stand for live-ins
+/// coming from the input IR and instructions which VPlan will generate if
+/// executed.
class LLVM_ABI_FOR_TEST VPValue {
friend class VPDef;
friend struct VPDoubleValueDef;
@@ -57,7 +57,7 @@ class LLVM_ABI_FOR_TEST VPValue {
SmallVector<VPUser *, 1> Users;
protected:
- // Hold the underlying Value, if any, attached to this VPValue.
+ /// Hold the underlying Value, if any, attached to this VPValue.
Value *UnderlyingVal;
/// Pointer to the VPDef that defines this VPValue. If it is nullptr, the