aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp12
-rw-r--r--llvm/lib/Transforms/IPO/LowerTypeTests.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp78
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp30
-rw-r--r--llvm/lib/Transforms/Instrumentation/AllocToken.cpp148
-rw-r--r--llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/BasicBlockUtils.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/PredicateInfo.cpp13
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp12
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp34
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h26
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp4
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp27
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanValue.h10
15 files changed, 260 insertions, 149 deletions
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index bbbac45..7a95df4 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -907,10 +907,20 @@ static bool mergeConsecutivePartStores(ArrayRef<PartStore> Parts,
StoreInst *Store = Builder.CreateAlignedStore(
Val, First.Store->getPointerOperand(), First.Store->getAlign());
+ // Merge various metadata onto the new store.
AAMDNodes AATags = First.Store->getAAMetadata();
- for (const PartStore &Part : drop_begin(Parts))
+ SmallVector<Instruction *> Stores = {First.Store};
+ Stores.reserve(Parts.size());
+ SmallVector<DebugLoc> DbgLocs = {First.Store->getDebugLoc()};
+ DbgLocs.reserve(Parts.size());
+ for (const PartStore &Part : drop_begin(Parts)) {
AATags = AATags.concat(Part.Store->getAAMetadata());
+ Stores.push_back(Part.Store);
+ DbgLocs.push_back(Part.Store->getDebugLoc());
+ }
Store->setAAMetadata(AATags);
+ Store->mergeDIAssignID(Stores);
+ Store->setDebugLoc(DebugLoc::getMergedLocations(DbgLocs));
// Remove the old stores.
for (const PartStore &Part : Parts)
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 46fb567..aa1346d 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1271,7 +1271,7 @@ bool LowerTypeTestsModule::hasBranchTargetEnforcement() {
// the module flags.
if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
M.getModuleFlag("branch-target-enforcement")))
- HasBranchTargetEnforcement = (BTE->getZExtValue() != 0);
+ HasBranchTargetEnforcement = !BTE->isZero();
else
HasBranchTargetEnforcement = 0;
}
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 76e588b..a0f7ec6 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -24,7 +24,8 @@
// returns 0, or a single vtable's function returns 1, replace each virtual
// call with a comparison of the vptr against that vtable's address.
//
-// This pass is intended to be used during the regular and thin LTO pipelines:
+// This pass is intended to be used during the regular/thin and non-LTO
+// pipelines:
//
// During regular LTO, the pass determines the best optimization for each
// virtual call and applies the resolutions directly to virtual calls that are
@@ -48,6 +49,14 @@
// is supported.
// - Import phase: (same as with hybrid case above).
//
+// During Speculative devirtualization mode -not restricted to LTO-:
+// - The pass applies speculative devirtualization without requiring any type of
+// visibility.
+// - Skips other features like virtual constant propagation, uniform return
+// value optimization, unique return value optimization and branch funnels as
+// they need LTO.
+// - This mode is enabled via 'devirtualize-speculatively' flag.
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -61,7 +70,9 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/BitcodeWriter.h"
@@ -145,6 +156,13 @@ static cl::opt<std::string> ClWriteSummary(
"bitcode, otherwise YAML"),
cl::Hidden);
+// TODO: This option eventually should support any public visibility vtables
+// with/out LTO.
+static cl::opt<bool> ClDevirtualizeSpeculatively(
+ "devirtualize-speculatively",
+ cl::desc("Enable speculative devirtualization optimization"),
+ cl::init(false));
+
static cl::opt<unsigned>
ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
cl::init(10),
@@ -892,6 +910,8 @@ void llvm::updatePublicTypeTestCalls(Module &M,
CI->eraseFromParent();
}
} else {
+ // TODO: Don't replace public type tests when speculative devirtualization
+ // gets enabled in LTO mode.
auto *True = ConstantInt::getTrue(M.getContext());
for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
auto *CI = cast<CallInst>(U.getUser());
@@ -1083,10 +1103,10 @@ bool DevirtModule::tryFindVirtualCallTargets(
if (!TM.Bits->GV->isConstant())
return false;
- // We cannot perform whole program devirtualization analysis on a vtable
- // with public LTO visibility.
- if (TM.Bits->GV->getVCallVisibility() ==
- GlobalObject::VCallVisibilityPublic)
+ // Without ClDevirtualizeSpeculatively, we cannot perform whole program
+ // devirtualization analysis on a vtable with public LTO visibility.
+ if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() ==
+ GlobalObject::VCallVisibilityPublic)
return false;
Function *Fn = nullptr;
@@ -1105,6 +1125,12 @@ bool DevirtModule::tryFindVirtualCallTargets(
if (Fn->getName() == "__cxa_pure_virtual")
continue;
+ // In most cases empty functions will be overridden by the
+ // implementation of the derived class, so we can skip them.
+ if (ClDevirtualizeSpeculatively && Fn->getReturnType()->isVoidTy() &&
+ Fn->getInstructionCount() <= 1)
+ continue;
+
// We can disregard unreachable functions as possible call targets, as
// unreachable functions shouldn't be called.
if (mustBeUnreachableFunction(Fn, ExportSummary))
@@ -1223,10 +1249,12 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
CallTrap->setDebugLoc(CB.getDebugLoc());
}
- // If fallback checking is enabled, add support to compare the virtual
- // function pointer to the devirtualized target. In case of a mismatch,
- // fall back to indirect call.
- if (DevirtCheckMode == WPDCheckMode::Fallback) {
+ // If fallback checking or speculative devirtualization are enabled,
+ // add support to compare the virtual function pointer to the
+ // devirtualized target. In case of a mismatch, fall back to indirect
+ // call.
+ if (DevirtCheckMode == WPDCheckMode::Fallback ||
+ ClDevirtualizeSpeculatively) {
MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights();
// Version the indirect call site. If the called value is equal to the
// given callee, 'NewInst' will be executed, otherwise the original call
@@ -2057,15 +2085,15 @@ void DevirtModule::scanTypeTestUsers(
Function *TypeTestFunc,
DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
// Find all virtual calls via a virtual table pointer %p under an assumption
- // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
- // points to a member of the type identifier %md. Group calls by (type ID,
- // offset) pair (effectively the identity of the virtual function) and store
- // to CallSlots.
+ // of the form llvm.assume(llvm.type.test(%p, %md)) or
+ // llvm.assume(llvm.public.type.test(%p, %md)).
+ // This indicates that %p points to a member of the type identifier %md.
+ // Group calls by (type ID, offset) pair (effectively the identity of the
+ // virtual function) and store to CallSlots.
for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
auto *CI = dyn_cast<CallInst>(U.getUser());
if (!CI)
continue;
-
// Search for virtual calls based on %p and add them to DevirtCalls.
SmallVector<DevirtCallSite, 1> DevirtCalls;
SmallVector<CallInst *, 1> Assumes;
@@ -2348,6 +2376,12 @@ bool DevirtModule::run() {
(ImportSummary && ImportSummary->partiallySplitLTOUnits()))
return false;
+ Function *PublicTypeTestFunc = nullptr;
+ // If we are in speculative devirtualization mode, we can work on the public
+ // type test intrinsics.
+ if (ClDevirtualizeSpeculatively)
+ PublicTypeTestFunc =
+ Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
Function *TypeTestFunc =
Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
Function *TypeCheckedLoadFunc =
@@ -2361,8 +2395,9 @@ bool DevirtModule::run() {
// module, this pass has nothing to do. But if we are exporting, we also need
// to handle any users that appear only in the function summaries.
if (!ExportSummary &&
- (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
- AssumeFunc->use_empty()) &&
+ (((!PublicTypeTestFunc || PublicTypeTestFunc->use_empty()) &&
+ (!TypeTestFunc || TypeTestFunc->use_empty())) ||
+ !AssumeFunc || AssumeFunc->use_empty()) &&
(!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
(!TypeCheckedLoadRelativeFunc ||
TypeCheckedLoadRelativeFunc->use_empty()))
@@ -2373,6 +2408,9 @@ bool DevirtModule::run() {
DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
buildTypeIdentifierMap(Bits, TypeIdMap);
+ if (PublicTypeTestFunc && AssumeFunc)
+ scanTypeTestUsers(PublicTypeTestFunc, TypeIdMap);
+
if (TypeTestFunc && AssumeFunc)
scanTypeTestUsers(TypeTestFunc, TypeIdMap);
@@ -2472,8 +2510,12 @@ bool DevirtModule::run() {
.WPDRes[S.first.ByteOffset];
if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
S.first.ByteOffset, ExportSummary)) {
-
- if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
+ bool SingleImplDevirt =
+ trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
+ // Out of speculative devirtualization mode, Try to apply virtual constant
+ // propagation or branch funneling.
+ // TODO: This should eventually be enabled for non-public type tests.
+ if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) {
DidVirtualConstProp |=
tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 975498f..5aa8de3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3455,27 +3455,45 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
// select a, false, b -> select !a, b, false
if (match(TrueVal, m_Specific(Zero))) {
Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
- return SelectInst::Create(NotCond, FalseVal, Zero);
+ Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI;
+ SelectInst *NewSI =
+ SelectInst::Create(NotCond, FalseVal, Zero, "", nullptr, MDFrom);
+ NewSI->swapProfMetadata();
+ return NewSI;
}
// select a, b, true -> select !a, true, b
if (match(FalseVal, m_Specific(One))) {
Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
- return SelectInst::Create(NotCond, One, TrueVal);
+ Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI;
+ SelectInst *NewSI =
+ SelectInst::Create(NotCond, One, TrueVal, "", nullptr, MDFrom);
+ NewSI->swapProfMetadata();
+ return NewSI;
}
// DeMorgan in select form: !a && !b --> !(a || b)
// select !a, !b, false --> not (select a, true, b)
if (match(&SI, m_LogicalAnd(m_Not(m_Value(A)), m_Not(m_Value(B)))) &&
(CondVal->hasOneUse() || TrueVal->hasOneUse()) &&
- !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr()))
- return BinaryOperator::CreateNot(Builder.CreateSelect(A, One, B));
+ !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr())) {
+ Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI;
+ SelectInst *NewSI =
+ cast<SelectInst>(Builder.CreateSelect(A, One, B, "", MDFrom));
+ NewSI->swapProfMetadata();
+ return BinaryOperator::CreateNot(NewSI);
+ }
// DeMorgan in select form: !a || !b --> !(a && b)
// select !a, true, !b --> not (select a, b, false)
if (match(&SI, m_LogicalOr(m_Not(m_Value(A)), m_Not(m_Value(B)))) &&
(CondVal->hasOneUse() || FalseVal->hasOneUse()) &&
- !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr()))
- return BinaryOperator::CreateNot(Builder.CreateSelect(A, B, Zero));
+ !match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr())) {
+ Instruction *MDFrom = ProfcheckDisableMetadataFixes ? nullptr : &SI;
+ SelectInst *NewSI =
+ cast<SelectInst>(Builder.CreateSelect(A, B, Zero, "", MDFrom));
+ NewSI->swapProfMetadata();
+ return BinaryOperator::CreateNot(NewSI);
+ }
// select (select a, true, b), true, b -> select a, true, b
if (match(CondVal, m_Select(m_Value(A), m_One(), m_Value(B))) &&
diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
index 40720ae..8181e4e 100644
--- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
@@ -31,10 +31,12 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
+#include "llvm/Support/AllocToken.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -53,47 +55,14 @@
#include <variant>
using namespace llvm;
+using TokenMode = AllocTokenMode;
#define DEBUG_TYPE "alloc-token"
namespace {
-//===--- Constants --------------------------------------------------------===//
-
-enum class TokenMode : unsigned {
- /// Incrementally increasing token ID.
- Increment = 0,
-
- /// Simple mode that returns a statically-assigned random token ID.
- Random = 1,
-
- /// Token ID based on allocated type hash.
- TypeHash = 2,
-
- /// Token ID based on allocated type hash, where the top half ID-space is
- /// reserved for types that contain pointers and the bottom half for types
- /// that do not contain pointers.
- TypeHashPointerSplit = 3,
-};
-
//===--- Command-line options ---------------------------------------------===//
-cl::opt<TokenMode> ClMode(
- "alloc-token-mode", cl::Hidden, cl::desc("Token assignment mode"),
- cl::init(TokenMode::TypeHashPointerSplit),
- cl::values(
- clEnumValN(TokenMode::Increment, "increment",
- "Incrementally increasing token ID"),
- clEnumValN(TokenMode::Random, "random",
- "Statically-assigned random token ID"),
- clEnumValN(TokenMode::TypeHash, "typehash",
- "Token ID based on allocated type hash"),
- clEnumValN(
- TokenMode::TypeHashPointerSplit, "typehashpointersplit",
- "Token ID based on allocated type hash, where the top half "
- "ID-space is reserved for types that contain pointers and the "
- "bottom half for types that do not contain pointers. ")));
-
cl::opt<std::string> ClFuncPrefix("alloc-token-prefix",
cl::desc("The allocation function prefix"),
cl::Hidden, cl::init("__alloc_token_"));
@@ -131,7 +100,7 @@ cl::opt<uint64_t> ClFallbackToken(
//===--- Statistics -------------------------------------------------------===//
-STATISTIC(NumFunctionsInstrumented, "Functions instrumented");
+STATISTIC(NumFunctionsModified, "Functions modified");
STATISTIC(NumAllocationsInstrumented, "Allocations instrumented");
//===----------------------------------------------------------------------===//
@@ -140,9 +109,19 @@ STATISTIC(NumAllocationsInstrumented, "Allocations instrumented");
///
/// Expected format is: !{<type-name>, <contains-pointer>}
MDNode *getAllocTokenMetadata(const CallBase &CB) {
- MDNode *Ret = CB.getMetadata(LLVMContext::MD_alloc_token);
- if (!Ret)
- return nullptr;
+ MDNode *Ret = nullptr;
+ if (auto *II = dyn_cast<IntrinsicInst>(&CB);
+ II && II->getIntrinsicID() == Intrinsic::alloc_token_id) {
+ auto *MDV = cast<MetadataAsValue>(II->getArgOperand(0));
+ Ret = cast<MDNode>(MDV->getMetadata());
+ // If the intrinsic has an empty MDNode, type inference failed.
+ if (Ret->getNumOperands() == 0)
+ return nullptr;
+ } else {
+ Ret = CB.getMetadata(LLVMContext::MD_alloc_token);
+ if (!Ret)
+ return nullptr;
+ }
assert(Ret->getNumOperands() == 2 && "bad !alloc_token");
assert(isa<MDString>(Ret->getOperand(0)));
assert(isa<ConstantAsMetadata>(Ret->getOperand(1)));
@@ -206,22 +185,19 @@ public:
using ModeBase::ModeBase;
uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
- const auto [N, H] = getHash(CB, ORE);
- return N ? boundedToken(H) : H;
- }
-protected:
- std::pair<MDNode *, uint64_t> getHash(const CallBase &CB,
- OptimizationRemarkEmitter &ORE) {
if (MDNode *N = getAllocTokenMetadata(CB)) {
MDString *S = cast<MDString>(N->getOperand(0));
- return {N, getStableSipHash(S->getString())};
+ AllocTokenMetadata Metadata{S->getString(), containsPointer(N)};
+ if (auto Token = getAllocToken(TokenMode::TypeHash, Metadata, MaxTokens))
+ return *Token;
}
// Fallback.
remarkNoMetadata(CB, ORE);
- return {nullptr, ClFallbackToken};
+ return ClFallbackToken;
}
+protected:
/// Remark that there was no precise type information.
static void remarkNoMetadata(const CallBase &CB,
OptimizationRemarkEmitter &ORE) {
@@ -242,20 +218,18 @@ public:
using TypeHashMode::TypeHashMode;
uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
- if (MaxTokens == 1)
- return 0;
- const uint64_t HalfTokens = MaxTokens / 2;
- const auto [N, H] = getHash(CB, ORE);
- if (!N) {
- // Pick the fallback token (ClFallbackToken), which by default is 0,
- // meaning it'll fall into the pointer-less bucket. Override by setting
- // -alloc-token-fallback if that is the wrong choice.
- return H;
+ if (MDNode *N = getAllocTokenMetadata(CB)) {
+ MDString *S = cast<MDString>(N->getOperand(0));
+ AllocTokenMetadata Metadata{S->getString(), containsPointer(N)};
+ if (auto Token = getAllocToken(TokenMode::TypeHashPointerSplit, Metadata,
+ MaxTokens))
+ return *Token;
}
- uint64_t Hash = H % HalfTokens; // base hash
- if (containsPointer(N))
- Hash += HalfTokens;
- return Hash;
+ // Pick the fallback token (ClFallbackToken), which by default is 0, meaning
+ // it'll fall into the pointer-less bucket. Override by setting
+ // -alloc-token-fallback if that is the wrong choice.
+ remarkNoMetadata(CB, ORE);
+ return ClFallbackToken;
}
};
@@ -275,7 +249,7 @@ public:
: Options(transformOptionsFromCl(std::move(Opts))), Mod(M),
FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
Mode(IncrementMode(*IntPtrTy, *Options.MaxTokens)) {
- switch (ClMode.getValue()) {
+ switch (Options.Mode) {
case TokenMode::Increment:
break;
case TokenMode::Random:
@@ -315,6 +289,9 @@ private:
FunctionCallee getTokenAllocFunction(const CallBase &CB, uint64_t TokenID,
LibFunc OriginalFunc);
+ /// Lower alloc_token_* intrinsics.
+ void replaceIntrinsicInst(IntrinsicInst *II, OptimizationRemarkEmitter &ORE);
+
/// Return the token ID from metadata in the call.
uint64_t getToken(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
return std::visit([&](auto &&Mode) { return Mode(CB, ORE); }, Mode);
@@ -336,21 +313,32 @@ bool AllocToken::instrumentFunction(Function &F) {
// Do not apply any instrumentation for naked functions.
if (F.hasFnAttribute(Attribute::Naked))
return false;
- if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
- return false;
// Don't touch available_externally functions, their actual body is elsewhere.
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
return false;
- // Only instrument functions that have the sanitize_alloc_token attribute.
- if (!F.hasFnAttribute(Attribute::SanitizeAllocToken))
- return false;
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
SmallVector<std::pair<CallBase *, LibFunc>, 4> AllocCalls;
+ SmallVector<IntrinsicInst *, 4> IntrinsicInsts;
+
+ // Only instrument functions that have the sanitize_alloc_token attribute.
+ const bool InstrumentFunction =
+ F.hasFnAttribute(Attribute::SanitizeAllocToken) &&
+ !F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation);
// Collect all allocation calls to avoid iterator invalidation.
for (Instruction &I : instructions(F)) {
+ // Collect all alloc_token_* intrinsics.
+ if (auto *II = dyn_cast<IntrinsicInst>(&I);
+ II && II->getIntrinsicID() == Intrinsic::alloc_token_id) {
+ IntrinsicInsts.emplace_back(II);
+ continue;
+ }
+
+ if (!InstrumentFunction)
+ continue;
+
auto *CB = dyn_cast<CallBase>(&I);
if (!CB)
continue;
@@ -359,11 +347,21 @@ bool AllocToken::instrumentFunction(Function &F) {
}
bool Modified = false;
- for (auto &[CB, Func] : AllocCalls)
- Modified |= replaceAllocationCall(CB, Func, ORE, TLI);
- if (Modified)
- NumFunctionsInstrumented++;
+ if (!AllocCalls.empty()) {
+ for (auto &[CB, Func] : AllocCalls)
+ Modified |= replaceAllocationCall(CB, Func, ORE, TLI);
+ if (Modified)
+ NumFunctionsModified++;
+ }
+
+ if (!IntrinsicInsts.empty()) {
+ for (auto *II : IntrinsicInsts)
+ replaceIntrinsicInst(II, ORE);
+ Modified = true;
+ NumFunctionsModified++;
+ }
+
return Modified;
}
@@ -381,7 +379,7 @@ AllocToken::shouldInstrumentCall(const CallBase &CB,
if (TLI.getLibFunc(*Callee, Func)) {
if (isInstrumentableLibFunc(Func, CB, TLI))
return Func;
- } else if (Options.Extended && getAllocTokenMetadata(CB)) {
+ } else if (Options.Extended && CB.getMetadata(LLVMContext::MD_alloc_token)) {
return NotLibFunc;
}
@@ -528,6 +526,16 @@ FunctionCallee AllocToken::getTokenAllocFunction(const CallBase &CB,
return TokenAlloc;
}
+void AllocToken::replaceIntrinsicInst(IntrinsicInst *II,
+ OptimizationRemarkEmitter &ORE) {
+ assert(II->getIntrinsicID() == Intrinsic::alloc_token_id);
+
+ uint64_t TokenID = getToken(*II, ORE);
+ Value *V = ConstantInt::get(IntPtrTy, TokenID);
+ II->replaceAllUsesWith(V);
+ II->eraseFromParent();
+}
+
} // namespace
AllocTokenPass::AllocTokenPass(AllocTokenOptions Opts)
diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
index d18c0d0..80e77e09 100644
--- a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
@@ -2020,7 +2020,6 @@ static void moveFastMathFlags(Function &F,
F.removeFnAttr(attr); \
FMF.set##setter(); \
}
- MOVE_FLAG("unsafe-fp-math", Fast)
MOVE_FLAG("no-infs-fp-math", NoInfs)
MOVE_FLAG("no-nans-fp-math", NoNaNs)
MOVE_FLAG("no-signed-zeros-fp-math", NoSignedZeros)
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 8714741a..9829d4d 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -1793,3 +1793,13 @@ bool llvm::hasOnlySimpleTerminator(const Function &F) {
}
return true;
}
+
+Printable llvm::printBasicBlock(const BasicBlock *BB) {
+ return Printable([BB](raw_ostream &OS) {
+ if (!BB) {
+ OS << "<nullptr>";
+ return;
+ }
+ BB->printAsOperand(OS);
+ });
+}
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 978d5a2..371d9e6 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -260,9 +260,16 @@ bool PredicateInfoBuilder::stackIsInScope(const ValueDFSStack &Stack,
// next to the defs they must go with so that we can know it's time to pop
// the stack when we hit the end of the phi uses for a given def.
const ValueDFS &Top = *Stack.back().V;
- if (Top.LocalNum == LN_Last && Top.PInfo) {
- if (!VDUse.U)
- return false;
+ assert(Top.PInfo && "RenameStack should only contain predicate infos (defs)");
+ if (Top.LocalNum == LN_Last) {
+ if (!VDUse.U) {
+ assert(VDUse.PInfo && "A non-use VDUse should have a predicate info");
+ // We should reserve adjacent LN_Last defs for the same phi use.
+ return VDUse.LocalNum == LN_Last &&
+ // If the two phi defs have the same edge, they must be designated
+ // for the same succ BB.
+ getBlockEdge(Top.PInfo) == getBlockEdge(VDUse.PInfo);
+ }
auto *PHI = dyn_cast<PHINode>(VDUse.U->getUser());
if (!PHI)
return false;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index adf27be..d2c100c9 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9860,6 +9860,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Get user vectorization factor and interleave count.
ElementCount UserVF = Hints.getWidth();
unsigned UserIC = Hints.getInterleave();
+ if (UserIC > 1 && !LVL.isSafeForAnyVectorWidth())
+ UserIC = 1;
// Plan how to best vectorize.
LVP.plan(UserVF, UserIC);
@@ -9924,7 +9926,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VectorizeLoop = false;
}
- if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) {
+ if (UserIC == 1 && Hints.getInterleave() > 1) {
+ assert(!LVL.isSafeForAnyVectorWidth() &&
+ "UserIC should only be ignored due to unsafe dependencies");
+ LLVM_DEBUG(dbgs() << "LV: Ignoring user-specified interleave count.\n");
+ IntDiagMsg = {"InterleavingUnsafe",
+ "Ignoring user-specified interleave count due to possibly "
+ "unsafe dependencies in the loop."};
+ InterleaveLoop = false;
+ } else if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) {
// Tell the user interleaving was avoided up-front, despite being explicitly
// requested.
LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and "
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index b4e4dc2..c95c887 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -217,32 +217,6 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
return Parent->getEnclosingBlockWithPredecessors();
}
-bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
- const VPDominatorTree &VPDT) {
- auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
- if (!VPBB)
- return false;
-
- // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
- // VPBB as its entry, i.e., free of predecessors.
- if (auto *R = VPBB->getParent())
- return !R->isReplicator() && !VPBB->hasPredecessors();
-
- // A header dominates its second predecessor (the latch), with the other
- // predecessor being the preheader
- return VPB->getPredecessors().size() == 2 &&
- VPDT.dominates(VPB, VPB->getPredecessors()[1]);
-}
-
-bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
- const VPDominatorTree &VPDT) {
- // A latch has a header as its second successor, with its other successor
- // leaving the loop. A preheader OTOH has a header as its first (and only)
- // successor.
- return VPB->getNumSuccessors() == 2 &&
- VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
-}
-
VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
iterator It = begin();
while (It != end() && It->isPhi())
@@ -768,8 +742,12 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
VPRegionBlock *VPRegionBlock::clone() {
const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
- auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting,
- getName(), isReplicator());
+ VPlan &Plan = *getPlan();
+ VPRegionBlock *NewRegion =
+ isReplicator()
+ ? Plan.createReplicateRegion(NewEntry, NewExiting, getName())
+ : Plan.createLoopRegion(getName(), NewEntry, NewExiting);
+
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
Block->setParent(NewRegion);
return NewRegion;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8274431..167ba55 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -4450,22 +4450,24 @@ public:
return VPB;
}
- /// Create a new VPRegionBlock with \p Entry, \p Exiting and \p Name. If \p
- /// IsReplicator is true, the region is a replicate region. The returned block
- /// is owned by the VPlan and deleted once the VPlan is destroyed.
- VPRegionBlock *createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
- const std::string &Name = "",
- bool IsReplicator = false) {
- auto *VPB = new VPRegionBlock(Entry, Exiting, Name, IsReplicator);
+ /// Create a new loop region with \p Name and entry and exiting blocks set
+ /// to \p Entry and \p Exiting respectively, if set. The returned block is
+ /// owned by the VPlan and deleted once the VPlan is destroyed.
+ VPRegionBlock *createLoopRegion(const std::string &Name = "",
+ VPBlockBase *Entry = nullptr,
+ VPBlockBase *Exiting = nullptr) {
+ auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
+ : new VPRegionBlock(Name);
CreatedBlocks.push_back(VPB);
return VPB;
}
- /// Create a new loop VPRegionBlock with \p Name and entry and exiting blocks set
- /// to nullptr. The returned block is owned by the VPlan and deleted once the
- /// VPlan is destroyed.
- VPRegionBlock *createVPRegionBlock(const std::string &Name = "") {
- auto *VPB = new VPRegionBlock(Name);
+ /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
+ /// returned block is owned by the VPlan and deleted once the VPlan is
+ /// destroyed.
+ VPRegionBlock *createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting,
+ const std::string &Name = "") {
+ auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
CreatedBlocks.push_back(VPB);
return VPB;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 332791a..65688a3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -406,7 +406,7 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
// LatchExitVPB, taking care to preserve the original predecessor & successor
// order of blocks. Set region entry and exiting after both HeaderVPB and
// LatchVPBB have been disconnected from their predecessors/successors.
- auto *R = Plan.createVPRegionBlock();
+ auto *R = Plan.createLoopRegion();
VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, R);
VPBlockUtils::disconnectBlocks(LatchVPBB, R);
VPBlockUtils::connectBlocks(PreheaderVPBB, R);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 7bf8d83..48cf763 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -372,7 +372,7 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,
auto *Exiting =
Plan.createVPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
VPRegionBlock *Region =
- Plan.createVPRegionBlock(Entry, Exiting, RegionName, true);
+ Plan.createReplicateRegion(Entry, Exiting, RegionName);
// Note: first set Entry as region entry and then connect successors starting
// from it in order, to propagate the "parent" of each VPBasicBlock.
@@ -4051,7 +4051,7 @@ static bool canNarrowLoad(VPWidenRecipe *WideMember0, unsigned OpIdx,
static std::optional<ElementCount> isConsecutiveInterleaveGroup(
VPInterleaveRecipe *InterleaveR, ArrayRef<ElementCount> VFs,
VPTypeAnalysis &TypeInfo, const TargetTransformInfo &TTI) {
- if (!InterleaveR)
+ if (!InterleaveR || InterleaveR->getMask())
return std::nullopt;
Type *GroupElementTy = nullptr;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 10801c0..32e4b88 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -8,6 +8,7 @@
#include "VPlanUtils.h"
#include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
#include "VPlanPatternMatch.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -253,3 +254,29 @@ vputils::getRecipesForUncountableExit(VPlan &Plan,
return UncountableCondition;
}
+
+bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
+ const VPDominatorTree &VPDT) {
+ auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
+ if (!VPBB)
+ return false;
+
+ // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
+ // VPBB as its entry, i.e., free of predecessors.
+ if (auto *R = VPBB->getParent())
+ return !R->isReplicator() && !VPBB->hasPredecessors();
+
+ // A header dominates its second predecessor (the latch), with the other
+ // predecessor being the preheader
+ return VPB->getPredecessors().size() == 2 &&
+ VPDT.dominates(VPB, VPB->getPredecessors()[1]);
+}
+
+bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
+ const VPDominatorTree &VPDT) {
+ // A latch has a header as its second successor, with its other successor
+ // leaving the loop. A preheader OTOH has a header as its first (and only)
+ // successor.
+ return VPB->getNumSuccessors() == 2 &&
+ VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 0678bc90..83e3fca 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -41,10 +41,10 @@ class VPRecipeBase;
class VPInterleaveBase;
class VPPhiAccessors;
-// This is the base class of the VPlan Def/Use graph, used for modeling the data
-// flow into, within and out of the VPlan. VPValues can stand for live-ins
-// coming from the input IR and instructions which VPlan will generate if
-// executed.
+/// This is the base class of the VPlan Def/Use graph, used for modeling the
+/// data flow into, within and out of the VPlan. VPValues can stand for live-ins
+/// coming from the input IR and instructions which VPlan will generate if
+/// executed.
class LLVM_ABI_FOR_TEST VPValue {
friend class VPDef;
friend struct VPDoubleValueDef;
@@ -57,7 +57,7 @@ class LLVM_ABI_FOR_TEST VPValue {
SmallVector<VPUser *, 1> Users;
protected:
- // Hold the underlying Value, if any, attached to this VPValue.
+ /// Hold the underlying Value, if any, attached to this VPValue.
Value *UnderlyingVal;
/// Pointer to the VPDef that defines this VPValue. If it is nullptr, the