aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp5
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp10
-rw-r--r--llvm/lib/Transforms/Instrumentation/AllocToken.cpp494
-rw-r--r--llvm/lib/Transforms/Instrumentation/CMakeLists.txt1
-rw-r--r--llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp108
-rw-r--r--llvm/lib/Transforms/Utils/CodeExtractor.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp92
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp5
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp4
13 files changed, 658 insertions, 92 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 9ca8194..56194fe 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -137,13 +137,10 @@ InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
Instruction::CastOps secondOp = CI2->getOpcode();
Type *SrcIntPtrTy =
SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
- Type *MidIntPtrTy =
- MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
Type *DstIntPtrTy =
DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
- DstTy, SrcIntPtrTy, MidIntPtrTy,
- DstIntPtrTy);
+ DstTy, &DL);
// We don't want to form an inttoptr or ptrtoint that converts to an integer
// type that differs from the pointer size.
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index cdae9a7..3704ad7 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2662,7 +2662,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB,
G->eraseFromParent();
NewGlobals[i] = NewGlobal;
- Constant *ODRIndicator = ConstantPointerNull::get(PtrTy);
+ Constant *ODRIndicator = Constant::getNullValue(IntptrTy);
GlobalValue *InstrumentedGlobal = NewGlobal;
bool CanUsePrivateAliases =
@@ -2677,8 +2677,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB,
// ODR should not happen for local linkage.
if (NewGlobal->hasLocalLinkage()) {
- ODRIndicator =
- ConstantExpr::getIntToPtr(ConstantInt::get(IntptrTy, -1), PtrTy);
+ ODRIndicator = ConstantInt::get(IntptrTy, -1);
} else if (UseOdrIndicator) {
// With local aliases, we need to provide another externally visible
// symbol __odr_asan_XXX to detect ODR violation.
@@ -2692,7 +2691,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB,
ODRIndicatorSym->setVisibility(NewGlobal->getVisibility());
ODRIndicatorSym->setDLLStorageClass(NewGlobal->getDLLStorageClass());
ODRIndicatorSym->setAlignment(Align(1));
- ODRIndicator = ODRIndicatorSym;
+ ODRIndicator = ConstantExpr::getPtrToInt(ODRIndicatorSym, IntptrTy);
}
Constant *Initializer = ConstantStruct::get(
@@ -2703,8 +2702,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB,
ConstantExpr::getPointerCast(Name, IntptrTy),
ConstantExpr::getPointerCast(getOrCreateModuleName(), IntptrTy),
ConstantInt::get(IntptrTy, MD.IsDynInit),
- Constant::getNullValue(IntptrTy),
- ConstantExpr::getPointerCast(ODRIndicator, IntptrTy));
+ Constant::getNullValue(IntptrTy), ODRIndicator);
LLVM_DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
new file mode 100644
index 0000000..782d5a1
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
@@ -0,0 +1,494 @@
+//===- AllocToken.cpp - Allocation token instrumentation ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements AllocToken, an instrumentation pass that
+// replaces allocation calls with token-enabled versions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/AllocToken.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Analysis.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Support/SipHash.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <variant>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "alloc-token"
+
+namespace {
+
+//===--- Constants --------------------------------------------------------===//
+
+enum class TokenMode : unsigned {
+ /// Incrementally increasing token ID.
+ Increment = 0,
+
+ /// Simple mode that returns a statically-assigned random token ID.
+ Random = 1,
+
+ /// Token ID based on allocated type hash.
+ TypeHash = 2,
+};
+
+//===--- Command-line options ---------------------------------------------===//
+
+cl::opt<TokenMode>
+ ClMode("alloc-token-mode", cl::Hidden, cl::desc("Token assignment mode"),
+ cl::init(TokenMode::TypeHash),
+ cl::values(clEnumValN(TokenMode::Increment, "increment",
+ "Incrementally increasing token ID"),
+ clEnumValN(TokenMode::Random, "random",
+ "Statically-assigned random token ID"),
+ clEnumValN(TokenMode::TypeHash, "typehash",
+ "Token ID based on allocated type hash")));
+
+cl::opt<std::string> ClFuncPrefix("alloc-token-prefix",
+ cl::desc("The allocation function prefix"),
+ cl::Hidden, cl::init("__alloc_token_"));
+
+cl::opt<uint64_t> ClMaxTokens("alloc-token-max",
+ cl::desc("Maximum number of tokens (0 = no max)"),
+ cl::Hidden, cl::init(0));
+
+cl::opt<bool>
+ ClFastABI("alloc-token-fast-abi",
+ cl::desc("The token ID is encoded in the function name"),
+ cl::Hidden, cl::init(false));
+
+// Instrument libcalls only by default - compatible allocators only need to take
+// care of providing standard allocation functions. With extended coverage, also
+// instrument non-libcall allocation function calls with !alloc_token
+// metadata.
+cl::opt<bool>
+ ClExtended("alloc-token-extended",
+ cl::desc("Extend coverage to custom allocation functions"),
+ cl::Hidden, cl::init(false));
+
+// C++ defines ::operator new (and variants) as replaceable (vs. standard
+// library versions), which are nobuiltin, and are therefore not covered by
+// isAllocationFn(). Cover by default, as users of AllocToken are already
+// required to provide token-aware allocation functions (no defaults).
+cl::opt<bool> ClCoverReplaceableNew("alloc-token-cover-replaceable-new",
+ cl::desc("Cover replaceable operator new"),
+ cl::Hidden, cl::init(true));
+
+cl::opt<uint64_t> ClFallbackToken(
+ "alloc-token-fallback",
+ cl::desc("The default fallback token where none could be determined"),
+ cl::Hidden, cl::init(0));
+
+//===--- Statistics -------------------------------------------------------===//
+
+STATISTIC(NumFunctionsInstrumented, "Functions instrumented");
+STATISTIC(NumAllocationsInstrumented, "Allocations instrumented");
+
+//===----------------------------------------------------------------------===//
+
+/// Returns the !alloc_token metadata if available.
+///
+/// Expected format is: !{<type-name>}
+MDNode *getAllocTokenMetadata(const CallBase &CB) {
+ MDNode *Ret = CB.getMetadata(LLVMContext::MD_alloc_token);
+ if (!Ret)
+ return nullptr;
+ assert(Ret->getNumOperands() == 1 && "bad !alloc_token");
+ assert(isa<MDString>(Ret->getOperand(0)));
+ return Ret;
+}
+
+class ModeBase {
+public:
+ explicit ModeBase(const IntegerType &TokenTy, uint64_t MaxTokens)
+ : MaxTokens(MaxTokens ? MaxTokens : TokenTy.getBitMask()) {
+ assert(MaxTokens <= TokenTy.getBitMask());
+ }
+
+protected:
+ uint64_t boundedToken(uint64_t Val) const {
+ assert(MaxTokens != 0);
+ return Val % MaxTokens;
+ }
+
+ const uint64_t MaxTokens;
+};
+
+/// Implementation for TokenMode::Increment.
+class IncrementMode : public ModeBase {
+public:
+ using ModeBase::ModeBase;
+
+ uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &) {
+ return boundedToken(Counter++);
+ }
+
+private:
+ uint64_t Counter = 0;
+};
+
+/// Implementation for TokenMode::Random.
+class RandomMode : public ModeBase {
+public:
+ RandomMode(const IntegerType &TokenTy, uint64_t MaxTokens,
+ std::unique_ptr<RandomNumberGenerator> RNG)
+ : ModeBase(TokenTy, MaxTokens), RNG(std::move(RNG)) {}
+ uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &) {
+ return boundedToken((*RNG)());
+ }
+
+private:
+ std::unique_ptr<RandomNumberGenerator> RNG;
+};
+
+/// Implementation for TokenMode::TypeHash. The implementation ensures
+/// hashes are stable across different compiler invocations. Uses SipHash as the
+/// hash function.
+class TypeHashMode : public ModeBase {
+public:
+ using ModeBase::ModeBase;
+
+ uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
+ if (MDNode *N = getAllocTokenMetadata(CB)) {
+ MDString *S = cast<MDString>(N->getOperand(0));
+ return boundedToken(getStableSipHash(S->getString()));
+ }
+ remarkNoMetadata(CB, ORE);
+ return ClFallbackToken;
+ }
+
+ /// Remark that there was no precise type information.
+ static void remarkNoMetadata(const CallBase &CB,
+ OptimizationRemarkEmitter &ORE) {
+ ORE.emit([&] {
+ ore::NV FuncNV("Function", CB.getParent()->getParent());
+ const Function *Callee = CB.getCalledFunction();
+ ore::NV CalleeNV("Callee", Callee ? Callee->getName() : "<unknown>");
+ return OptimizationRemark(DEBUG_TYPE, "NoAllocToken", &CB)
+ << "Call to '" << CalleeNV << "' in '" << FuncNV
+ << "' without source-level type token";
+ });
+ }
+};
+
+// Apply opt overrides.
+AllocTokenOptions transformOptionsFromCl(AllocTokenOptions Opts) {
+ if (!Opts.MaxTokens.has_value())
+ Opts.MaxTokens = ClMaxTokens;
+ Opts.FastABI |= ClFastABI;
+ Opts.Extended |= ClExtended;
+ return Opts;
+}
+
+class AllocToken {
+public:
+ explicit AllocToken(AllocTokenOptions Opts, Module &M,
+ ModuleAnalysisManager &MAM)
+ : Options(transformOptionsFromCl(std::move(Opts))), Mod(M),
+ FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
+ Mode(IncrementMode(*IntPtrTy, *Options.MaxTokens)) {
+ switch (ClMode.getValue()) {
+ case TokenMode::Increment:
+ break;
+ case TokenMode::Random:
+ Mode.emplace<RandomMode>(*IntPtrTy, *Options.MaxTokens,
+ M.createRNG(DEBUG_TYPE));
+ break;
+ case TokenMode::TypeHash:
+ Mode.emplace<TypeHashMode>(*IntPtrTy, *Options.MaxTokens);
+ break;
+ }
+ }
+
+ bool instrumentFunction(Function &F);
+
+private:
+ /// Returns the LibFunc (or NotLibFunc) if this call should be instrumented.
+ std::optional<LibFunc>
+ shouldInstrumentCall(const CallBase &CB, const TargetLibraryInfo &TLI) const;
+
+ /// Returns true for functions that are eligible for instrumentation.
+ static bool isInstrumentableLibFunc(LibFunc Func, const CallBase &CB,
+ const TargetLibraryInfo &TLI);
+
+ /// Returns true for isAllocationFn() functions that we should ignore.
+ static bool ignoreInstrumentableLibFunc(LibFunc Func);
+
+ /// Replace a call/invoke with a call/invoke to the allocation function
+ /// with token ID.
+ bool replaceAllocationCall(CallBase *CB, LibFunc Func,
+ OptimizationRemarkEmitter &ORE,
+ const TargetLibraryInfo &TLI);
+
+ /// Return replacement function for a LibFunc that takes a token ID.
+ FunctionCallee getTokenAllocFunction(const CallBase &CB, uint64_t TokenID,
+ LibFunc OriginalFunc);
+
+ /// Return the token ID from metadata in the call.
+ uint64_t getToken(const CallBase &CB, OptimizationRemarkEmitter &ORE) {
+ return std::visit([&](auto &&Mode) { return Mode(CB, ORE); }, Mode);
+ }
+
+ const AllocTokenOptions Options;
+ Module &Mod;
+ IntegerType *IntPtrTy = Mod.getDataLayout().getIntPtrType(Mod.getContext());
+ FunctionAnalysisManager &FAM;
+ // Cache for replacement functions.
+ DenseMap<std::pair<LibFunc, uint64_t>, FunctionCallee> TokenAllocFunctions;
+ // Selected mode.
+ std::variant<IncrementMode, RandomMode, TypeHashMode> Mode;
+};
+
+bool AllocToken::instrumentFunction(Function &F) {
+ // Do not apply any instrumentation for naked functions.
+ if (F.hasFnAttribute(Attribute::Naked))
+ return false;
+ if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
+ return false;
+ // Don't touch available_externally functions, their actual body is elsewhere.
+ if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
+ return false;
+ // Only instrument functions that have the sanitize_alloc_token attribute.
+ if (!F.hasFnAttribute(Attribute::SanitizeAllocToken))
+ return false;
+
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+ SmallVector<std::pair<CallBase *, LibFunc>, 4> AllocCalls;
+
+ // Collect all allocation calls to avoid iterator invalidation.
+ for (Instruction &I : instructions(F)) {
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
+ continue;
+ if (std::optional<LibFunc> Func = shouldInstrumentCall(*CB, TLI))
+ AllocCalls.emplace_back(CB, Func.value());
+ }
+
+ bool Modified = false;
+ for (auto &[CB, Func] : AllocCalls)
+ Modified |= replaceAllocationCall(CB, Func, ORE, TLI);
+
+ if (Modified)
+ NumFunctionsInstrumented++;
+ return Modified;
+}
+
+std::optional<LibFunc>
+AllocToken::shouldInstrumentCall(const CallBase &CB,
+ const TargetLibraryInfo &TLI) const {
+ const Function *Callee = CB.getCalledFunction();
+ if (!Callee)
+ return std::nullopt;
+
+ // Ignore nobuiltin of the CallBase, so that we can cover nobuiltin libcalls
+ // if requested via isInstrumentableLibFunc(). Note that isAllocationFn() is
+ // returning false for nobuiltin calls.
+ LibFunc Func;
+ if (TLI.getLibFunc(*Callee, Func)) {
+ if (isInstrumentableLibFunc(Func, CB, TLI))
+ return Func;
+ } else if (Options.Extended && getAllocTokenMetadata(CB)) {
+ return NotLibFunc;
+ }
+
+ return std::nullopt;
+}
+
+bool AllocToken::isInstrumentableLibFunc(LibFunc Func, const CallBase &CB,
+ const TargetLibraryInfo &TLI) {
+ if (ignoreInstrumentableLibFunc(Func))
+ return false;
+
+ if (isAllocationFn(&CB, &TLI))
+ return true;
+
+ switch (Func) {
+ // These libfuncs don't return normal pointers, and are therefore not handled
+ // by isAllocationFn().
+ case LibFunc_posix_memalign:
+ case LibFunc_size_returning_new:
+ case LibFunc_size_returning_new_hot_cold:
+ case LibFunc_size_returning_new_aligned:
+ case LibFunc_size_returning_new_aligned_hot_cold:
+ return true;
+
+ // See comment above ClCoverReplaceableNew.
+ case LibFunc_Znwj:
+ case LibFunc_ZnwjRKSt9nothrow_t:
+ case LibFunc_ZnwjSt11align_val_t:
+ case LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t:
+ case LibFunc_Znwm:
+ case LibFunc_Znwm12__hot_cold_t:
+ case LibFunc_ZnwmRKSt9nothrow_t:
+ case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
+ case LibFunc_ZnwmSt11align_val_t:
+ case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
+ case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
+ case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
+ case LibFunc_Znaj:
+ case LibFunc_ZnajRKSt9nothrow_t:
+ case LibFunc_ZnajSt11align_val_t:
+ case LibFunc_ZnajSt11align_val_tRKSt9nothrow_t:
+ case LibFunc_Znam:
+ case LibFunc_Znam12__hot_cold_t:
+ case LibFunc_ZnamRKSt9nothrow_t:
+ case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
+ case LibFunc_ZnamSt11align_val_t:
+ case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
+ case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
+ case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
+ return ClCoverReplaceableNew;
+
+ default:
+ return false;
+ }
+}
+
+bool AllocToken::ignoreInstrumentableLibFunc(LibFunc Func) {
+ switch (Func) {
+ case LibFunc_strdup:
+ case LibFunc_dunder_strdup:
+ case LibFunc_strndup:
+ case LibFunc_dunder_strndup:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool AllocToken::replaceAllocationCall(CallBase *CB, LibFunc Func,
+ OptimizationRemarkEmitter &ORE,
+ const TargetLibraryInfo &TLI) {
+ uint64_t TokenID = getToken(*CB, ORE);
+
+ FunctionCallee TokenAlloc = getTokenAllocFunction(*CB, TokenID, Func);
+ if (!TokenAlloc)
+ return false;
+ NumAllocationsInstrumented++;
+
+ if (Options.FastABI) {
+ assert(TokenAlloc.getFunctionType()->getNumParams() == CB->arg_size());
+ CB->setCalledFunction(TokenAlloc);
+ return true;
+ }
+
+ IRBuilder<> IRB(CB);
+ // Original args.
+ SmallVector<Value *, 4> NewArgs{CB->args()};
+ // Add token ID, truncated to IntPtrTy width.
+ NewArgs.push_back(ConstantInt::get(IntPtrTy, TokenID));
+ assert(TokenAlloc.getFunctionType()->getNumParams() == NewArgs.size());
+
+ // Preserve invoke vs call semantics for exception handling.
+ CallBase *NewCall;
+ if (auto *II = dyn_cast<InvokeInst>(CB)) {
+ NewCall = IRB.CreateInvoke(TokenAlloc, II->getNormalDest(),
+ II->getUnwindDest(), NewArgs);
+ } else {
+ NewCall = IRB.CreateCall(TokenAlloc, NewArgs);
+ cast<CallInst>(NewCall)->setTailCall(CB->isTailCall());
+ }
+ NewCall->setCallingConv(CB->getCallingConv());
+ NewCall->copyMetadata(*CB);
+ NewCall->setAttributes(CB->getAttributes());
+
+ // Replace all uses and delete the old call.
+ CB->replaceAllUsesWith(NewCall);
+ CB->eraseFromParent();
+ return true;
+}
+
+FunctionCallee AllocToken::getTokenAllocFunction(const CallBase &CB,
+ uint64_t TokenID,
+ LibFunc OriginalFunc) {
+ std::optional<std::pair<LibFunc, uint64_t>> Key;
+ if (OriginalFunc != NotLibFunc) {
+ Key = std::make_pair(OriginalFunc, Options.FastABI ? TokenID : 0);
+ auto It = TokenAllocFunctions.find(*Key);
+ if (It != TokenAllocFunctions.end())
+ return It->second;
+ }
+
+ const Function *Callee = CB.getCalledFunction();
+ if (!Callee)
+ return FunctionCallee();
+ const FunctionType *OldFTy = Callee->getFunctionType();
+ if (OldFTy->isVarArg())
+ return FunctionCallee();
+ // Copy params, and append token ID type.
+ Type *RetTy = OldFTy->getReturnType();
+ SmallVector<Type *, 4> NewParams{OldFTy->params()};
+ std::string TokenAllocName = ClFuncPrefix;
+ if (Options.FastABI)
+ TokenAllocName += utostr(TokenID) + "_";
+ else
+ NewParams.push_back(IntPtrTy); // token ID
+ TokenAllocName += Callee->getName();
+ FunctionType *NewFTy = FunctionType::get(RetTy, NewParams, false);
+ FunctionCallee TokenAlloc = Mod.getOrInsertFunction(TokenAllocName, NewFTy);
+ if (Function *F = dyn_cast<Function>(TokenAlloc.getCallee()))
+ F->copyAttributesFrom(Callee); // preserve attrs
+
+ if (Key.has_value())
+ TokenAllocFunctions[*Key] = TokenAlloc;
+ return TokenAlloc;
+}
+
+} // namespace
+
+AllocTokenPass::AllocTokenPass(AllocTokenOptions Opts)
+ : Options(std::move(Opts)) {}
+
+PreservedAnalyses AllocTokenPass::run(Module &M, ModuleAnalysisManager &MAM) {
+ AllocToken Pass(Options, M, MAM);
+ bool Modified = false;
+
+ for (Function &F : M) {
+ if (F.empty())
+ continue; // declaration
+ Modified |= Pass.instrumentFunction(F);
+ }
+
+ return Modified ? PreservedAnalyses::none().preserveSet<CFGAnalyses>()
+ : PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index 15fd421..80576c6 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -1,5 +1,6 @@
add_llvm_component_library(LLVMInstrumentation
AddressSanitizer.cpp
+ AllocToken.cpp
BoundsChecking.cpp
CGProfile.cpp
ControlHeightReduction.cpp
diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 480ff4a..5ba2167 100644
--- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -261,6 +261,11 @@ static cl::opt<bool> ClIgnorePersonalityRoutine(
"list, do not create a wrapper for it."),
cl::Hidden, cl::init(false));
+static cl::opt<bool> ClAddGlobalNameSuffix(
+ "dfsan-add-global-name-suffix",
+ cl::desc("Whether to add .dfsan suffix to global names"), cl::Hidden,
+ cl::init(true));
+
static StringRef getGlobalTypeString(const GlobalValue &G) {
// Types of GlobalVariables are always pointer types.
Type *GType = G.getValueType();
@@ -1256,6 +1261,9 @@ DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
}
void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) {
+ if (!ClAddGlobalNameSuffix)
+ return;
+
std::string GVName = std::string(GV->getName()), Suffix = ".dfsan";
GV->setName(GVName + Suffix);
@@ -1784,10 +1792,8 @@ bool DataFlowSanitizer::runImpl(
}
Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) {
- Value *Base = IRB.CreatePointerCast(DFS.ArgTLS, DFS.IntptrTy);
- if (ArgOffset)
- Base = IRB.CreateAdd(Base, ConstantInt::get(DFS.IntptrTy, ArgOffset));
- return IRB.CreateIntToPtr(Base, PointerType::get(*DFS.Ctx, 0), "_dfsarg");
+ return IRB.CreatePtrAdd(DFS.ArgTLS, ConstantInt::get(DFS.IntptrTy, ArgOffset),
+ "_dfsarg");
}
Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) {
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index e9a3e98..584cdad 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -89,6 +89,7 @@ STATISTIC(NumTransforms, "Number of transformations done");
STATISTIC(NumCloned, "Number of blocks cloned");
STATISTIC(NumPaths, "Number of individual paths threaded");
+namespace llvm {
static cl::opt<bool>
ClViewCfgBefore("dfa-jump-view-cfg-before",
cl::desc("View the CFG before DFA Jump Threading"),
@@ -120,8 +121,17 @@ static cl::opt<unsigned>
cl::desc("Maximum cost accepted for the transformation"),
cl::Hidden, cl::init(50));
-namespace {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+
+} // namespace llvm
+
+static cl::opt<double> MaxClonedRate(
+ "dfa-max-cloned-rate",
+ cl::desc(
+ "Maximum cloned instructions rate accepted for the transformation"),
+ cl::Hidden, cl::init(7.5));
+namespace {
class SelectInstToUnfold {
SelectInst *SI;
PHINode *SIUse;
@@ -135,10 +145,6 @@ public:
explicit operator bool() const { return SI && SIUse; }
};
-void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
- std::vector<SelectInstToUnfold> *NewSIsToUnfold,
- std::vector<BasicBlock *> *NewBBs);
-
class DFAJumpThreading {
public:
DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
@@ -152,7 +158,8 @@ private:
void
unfoldSelectInstrs(DominatorTree *DT,
const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ // TODO: Have everything use a single lazy DTU
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts);
while (!Stack.empty()) {
@@ -167,16 +174,18 @@ private:
}
}
+ static void unfold(DomTreeUpdater *DTU, LoopInfo *LI,
+ SelectInstToUnfold SIToUnfold,
+ std::vector<SelectInstToUnfold> *NewSIsToUnfold,
+ std::vector<BasicBlock *> *NewBBs);
+
AssumptionCache *AC;
DominatorTree *DT;
LoopInfo *LI;
TargetTransformInfo *TTI;
OptimizationRemarkEmitter *ORE;
};
-
-} // end anonymous namespace
-
-namespace {
+} // namespace
/// Unfold the select instruction held in \p SIToUnfold by replacing it with
/// control flow.
@@ -185,9 +194,10 @@ namespace {
/// created basic blocks into \p NewBBs.
///
/// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
-void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
- std::vector<SelectInstToUnfold> *NewSIsToUnfold,
- std::vector<BasicBlock *> *NewBBs) {
+void DFAJumpThreading::unfold(DomTreeUpdater *DTU, LoopInfo *LI,
+ SelectInstToUnfold SIToUnfold,
+ std::vector<SelectInstToUnfold> *NewSIsToUnfold,
+ std::vector<BasicBlock *> *NewBBs) {
SelectInst *SI = SIToUnfold.getInst();
PHINode *SIUse = SIToUnfold.getUse();
assert(SI->hasOneUse());
@@ -253,7 +263,11 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
// Insert the real conditional branch based on the original condition.
StartBlockTerm->eraseFromParent();
- BranchInst::Create(EndBlock, NewBlock, SI->getCondition(), StartBlock);
+ auto *BI =
+ BranchInst::Create(EndBlock, NewBlock, SI->getCondition(), StartBlock);
+ if (!ProfcheckDisableMetadataFixes)
+ BI->setMetadata(LLVMContext::MD_prof,
+ SI->getMetadata(LLVMContext::MD_prof));
DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock},
{DominatorTree::Insert, StartBlock, NewBlock}});
} else {
@@ -288,7 +302,11 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
// (Use)
BranchInst::Create(EndBlock, NewBlockF);
// Insert the real conditional branch based on the original condition.
- BranchInst::Create(EndBlock, NewBlockF, SI->getCondition(), NewBlockT);
+ auto *BI =
+ BranchInst::Create(EndBlock, NewBlockF, SI->getCondition(), NewBlockT);
+ if (!ProfcheckDisableMetadataFixes)
+ BI->setMetadata(LLVMContext::MD_prof,
+ SI->getMetadata(LLVMContext::MD_prof));
DTU->applyUpdates({{DominatorTree::Insert, NewBlockT, NewBlockF},
{DominatorTree::Insert, NewBlockT, EndBlock},
{DominatorTree::Insert, NewBlockF, EndBlock}});
@@ -342,10 +360,12 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
SI->eraseFromParent();
}
+namespace {
struct ClonedBlock {
BasicBlock *BB;
APInt State; ///< \p State corresponds to the next value of a switch stmnt.
};
+} // namespace
typedef std::deque<BasicBlock *> PathType;
typedef std::vector<PathType> PathsType;
@@ -375,6 +395,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
return OS;
}
+namespace {
/// ThreadingPath is a path in the control flow of a loop that can be threaded
/// by cloning necessary basic blocks and replacing conditional branches with
/// unconditional ones. A threading path includes a list of basic blocks, the
@@ -814,11 +835,13 @@ struct TransformDFA {
: SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
EphValues(EphValues) {}
- void run() {
+ bool run() {
if (isLegalAndProfitableToTransform()) {
createAllExitPaths();
NumTransforms++;
+ return true;
}
+ return false;
}
private:
@@ -828,6 +851,7 @@ private:
/// also returns false if it is illegal to clone some required block.
bool isLegalAndProfitableToTransform() {
CodeMetrics Metrics;
+ uint64_t NumClonedInst = 0;
SwitchInst *Switch = SwitchPaths->getSwitchInst();
// Don't thread switch without multiple successors.
@@ -837,7 +861,6 @@ private:
// Note that DuplicateBlockMap is not being used as intended here. It is
// just being used to ensure (BB, State) pairs are only counted once.
DuplicateBlockMap DuplicateMap;
-
for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
PathType PathBBs = TPath.getPath();
APInt NextState = TPath.getExitValue();
@@ -848,6 +871,7 @@ private:
BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
if (!VisitedBB) {
Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
+ NumClonedInst += BB->sizeWithoutDebug();
DuplicateMap[BB].push_back({BB, NextState});
}
@@ -865,6 +889,7 @@ private:
if (VisitedBB)
continue;
Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
+ NumClonedInst += BB->sizeWithoutDebug();
DuplicateMap[BB].push_back({BB, NextState});
}
@@ -901,6 +926,22 @@ private:
}
}
+ // Too much cloned instructions slow down later optimizations, especially
+ // SLPVectorizer.
+ // TODO: Thread the switch partially before reaching the threshold.
+ uint64_t NumOrigInst = 0;
+ for (auto *BB : DuplicateMap.keys())
+ NumOrigInst += BB->sizeWithoutDebug();
+ if (double(NumClonedInst) / double(NumOrigInst) > MaxClonedRate) {
+ LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much "
+ "instructions wll be cloned\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
+ << "Too much instructions will be cloned.";
+ });
+ return false;
+ }
+
InstructionCost DuplicationCost = 0;
unsigned JumpTableSize = 0;
@@ -951,8 +992,6 @@ private:
/// Transform each threading path to effectively jump thread the DFA.
void createAllExitPaths() {
- DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager);
-
// Move the switch block to the end of the path, since it will be duplicated
BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();
for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
@@ -969,15 +1008,18 @@ private:
SmallPtrSet<BasicBlock *, 16> BlocksToClean;
BlocksToClean.insert_range(successors(SwitchBlock));
- for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
- createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);
- NumPaths++;
- }
+ {
+ DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
+ createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);
+ NumPaths++;
+ }
- // After all paths are cloned, now update the last successor of the cloned
- // path so it skips over the switch statement
- for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
- updateLastSuccessor(TPath, DuplicateMap, &DTU);
+ // After all paths are cloned, now update the last successor of the cloned
+ // path so it skips over the switch statement
+ for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
+ updateLastSuccessor(TPath, DuplicateMap, &DTU);
+ }
// For each instruction that was cloned and used outside, update its uses
updateSSA(NewDefs);
@@ -993,7 +1035,7 @@ private:
/// To remember the correct destination, we have to duplicate blocks
/// corresponding to each state. Also update the terminating instruction of
/// the predecessors, and phis in the successor blocks.
- void createExitPath(DefMap &NewDefs, ThreadingPath &Path,
+ void createExitPath(DefMap &NewDefs, const ThreadingPath &Path,
DuplicateBlockMap &DuplicateMap,
SmallPtrSet<BasicBlock *, 16> &BlocksToClean,
DomTreeUpdater *DTU) {
@@ -1239,7 +1281,7 @@ private:
///
/// Note that this is an optional step and would have been done in later
/// optimizations, but it makes the CFG significantly easier to work with.
- void updateLastSuccessor(ThreadingPath &TPath,
+ void updateLastSuccessor(const ThreadingPath &TPath,
DuplicateBlockMap &DuplicateMap,
DomTreeUpdater *DTU) {
APInt NextState = TPath.getExitValue();
@@ -1336,6 +1378,7 @@ private:
SmallPtrSet<const Value *, 32> EphValues;
std::vector<ThreadingPath> TPaths;
};
+} // namespace
bool DFAJumpThreading::run(Function &F) {
LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n");
@@ -1402,9 +1445,8 @@ bool DFAJumpThreading::run(Function &F) {
for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
- Transform.run();
- MadeChanges = true;
- LoopInfoBroken = true;
+ if (Transform.run())
+ MadeChanges = LoopInfoBroken = true;
}
#ifdef EXPENSIVE_CHECKS
@@ -1415,8 +1457,6 @@ bool DFAJumpThreading::run(Function &F) {
return MadeChanges;
}
-} // end anonymous namespace
-
/// Integrate with the new Pass Manager
PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
FunctionAnalysisManager &AM) {
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index bbd1ed6..5ba6f95f 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -970,6 +970,7 @@ Function *CodeExtractor::constructFunctionDeclaration(
case Attribute::SanitizeMemTag:
case Attribute::SanitizeRealtime:
case Attribute::SanitizeRealtimeBlocking:
+ case Attribute::SanitizeAllocToken:
case Attribute::SpeculativeLoadHardening:
case Attribute::StackProtect:
case Attribute::StackProtectReq:
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 21b2652..b6ca52e 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3031,6 +3031,13 @@ static void combineMetadata(Instruction *K, const Instruction *J,
K->getContext(), MDNode::toCaptureComponents(JMD) |
MDNode::toCaptureComponents(KMD)));
break;
+ case LLVMContext::MD_alloc_token:
+ // Preserve !alloc_token if both K and J have it, and they are equal.
+ if (KMD == JMD)
+ K->setMetadata(Kind, JMD);
+ else
+ K->setMetadata(Kind, nullptr);
+ break;
}
}
// Set !invariant.group from J if J has it. If both instructions have it
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index bf882d7..6312831 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -201,18 +201,27 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
/// unroll count is non-zero.
///
/// This function performs the following:
-/// - Update PHI nodes at the unrolling loop exit and epilog loop exit
-/// - Create PHI nodes at the unrolling loop exit to combine
-/// values that exit the unrolling loop code and jump around it.
+/// - Update PHI nodes at the epilog loop exit
+/// - Create PHI nodes at the unrolling loop exit and epilog preheader to
+/// combine values that exit the unrolling loop code and jump around it.
/// - Update PHI operands in the epilog loop by the new PHI nodes
-/// - Branch around the epilog loop if extra iters (ModVal) is zero.
+/// - At the unrolling loop exit, branch around the epilog loop if extra iters
+// (ModVal) is zero.
+/// - At the epilog preheader, add an llvm.assume call that extra iters is
+/// non-zero. If the unrolling loop exit is the predecessor, the above new
+/// branch guarantees that assumption. If the unrolling loop preheader is the
+/// predecessor, then the required first iteration from the original loop has
+/// yet to be executed, so it must be executed in the epilog loop. If we
+/// later unroll the epilog loop, that llvm.assume call somehow enables
+/// ScalarEvolution to compute a epilog loop maximum trip count, which enables
+/// eliminating the branch at the end of the final unrolled epilog iteration.
///
static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
BasicBlock *Exit, BasicBlock *PreHeader,
BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
ValueToValueMapTy &VMap, DominatorTree *DT,
LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE,
- unsigned Count) {
+ unsigned Count, AssumptionCache &AC) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
@@ -231,7 +240,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
// EpilogLatch
// Exit (EpilogPN)
- // Update PHI nodes at NewExit and Exit.
+ // Update PHI nodes at Exit.
for (PHINode &PN : NewExit->phis()) {
// PN should be used in another PHI located in Exit block as
// Exit was split by SplitBlockPredecessors into Exit and NewExit
@@ -246,15 +255,11 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
// epilogue edges have already been added.
//
// There is EpilogPreHeader incoming block instead of NewExit as
- // NewExit was spilt 1 more time to get EpilogPreHeader.
+ // NewExit was split 1 more time to get EpilogPreHeader.
assert(PN.hasOneUse() && "The phi should have 1 use");
PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
- // Add incoming PreHeader from branch around the Loop
- PN.addIncoming(PoisonValue::get(PN.getType()), PreHeader);
- SE.forgetValue(&PN);
-
Value *V = PN.getIncomingValueForBlock(Latch);
Instruction *I = dyn_cast<Instruction>(V);
if (I && L->contains(I))
@@ -271,35 +276,52 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
NewExit);
// Now PHIs should look like:
// NewExit:
- // PN = PHI [I, Latch], [poison, PreHeader]
+ // PN = PHI [I, Latch]
// ...
// Exit:
// EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
}
- // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
- // Update corresponding PHI nodes in epilog loop.
+ // Create PHI nodes at NewExit (from the unrolling loop Latch) and at
+ // EpilogPreHeader (from PreHeader and NewExit). Update corresponding PHI
+ // nodes in epilog loop.
for (BasicBlock *Succ : successors(Latch)) {
// Skip this as we already updated phis in exit blocks.
if (!L->contains(Succ))
continue;
+
+ // Succ here appears to always be just L->getHeader(). Otherwise, how do we
+ // know its corresponding epilog block (from VMap) is EpilogHeader and thus
+ // EpilogPreHeader is the right incoming block for VPN, as set below?
+ // TODO: Can we thus avoid the enclosing loop over successors?
+ assert(Succ == L->getHeader() &&
+ "Expect the only in-loop successor of latch to be the loop header");
+
for (PHINode &PN : Succ->phis()) {
- // Add new PHI nodes to the loop exit block and update epilog
- // PHIs with the new PHI values.
- PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr");
- NewPN->insertBefore(NewExit->getFirstNonPHIIt());
- // Adding a value to the new PHI node from the unrolling loop preheader.
- NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
- // Adding a value to the new PHI node from the unrolling loop latch.
- NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
+ // Add new PHI nodes to the loop exit block.
+ PHINode *NewPN0 = PHINode::Create(PN.getType(), /*NumReservedValues=*/1,
+ PN.getName() + ".unr");
+ NewPN0->insertBefore(NewExit->getFirstNonPHIIt());
+ // Add value to the new PHI node from the unrolling loop latch.
+ NewPN0->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
+
+ // Add new PHI nodes to EpilogPreHeader.
+ PHINode *NewPN1 = PHINode::Create(PN.getType(), /*NumReservedValues=*/2,
+ PN.getName() + ".epil.init");
+ NewPN1->insertBefore(EpilogPreHeader->getFirstNonPHIIt());
+ // Add value to the new PHI node from the unrolling loop preheader.
+ NewPN1->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
+ // Add value to the new PHI node from the epilog loop guard.
+ NewPN1->addIncoming(NewPN0, NewExit);
// Update the existing PHI node operand with the value from the new PHI
// node. Corresponding instruction in epilog loop should be PHI.
PHINode *VPN = cast<PHINode>(VMap[&PN]);
- VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
+ VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN1);
}
}
+ // In NewExit, branch around the epilog loop if no extra iters.
Instruction *InsertPt = NewExit->getTerminator();
IRBuilder<> B(InsertPt);
Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
@@ -308,7 +330,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
PreserveLCSSA);
- // Add the branch to the exit block (around the unrolling loop)
+ // Add the branch to the exit block (around the epilog loop)
MDNode *BranchWeights = nullptr;
if (hasBranchWeightMD(*Latch->getTerminator())) {
// Assume equal distribution in interval [0, Count).
@@ -322,10 +344,11 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
DT->changeImmediateDominator(Exit, NewDom);
}
- // Split the main loop exit to maintain canonicalization guarantees.
- SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
- SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
- PreserveLCSSA);
+ // In EpilogPreHeader, assume extra iters is non-zero.
+ IRBuilder<> B2(EpilogPreHeader, EpilogPreHeader->getFirstNonPHIIt());
+ Value *ModIsNotNull = B2.CreateIsNotNull(ModVal, "lcmp.mod");
+ AssumeInst *AI = cast<AssumeInst>(B2.CreateAssumption(ModIsNotNull));
+ AC.registerAssumption(AI);
}
/// Create a clone of the blocks in a loop and connect them together. A new
@@ -795,7 +818,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
ConstantInt::get(BECount->getType(),
Count - 1)) :
B.CreateIsNotNull(ModVal, "lcmp.mod");
- BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
+ BasicBlock *RemainderLoop =
+ UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
// Branch to either remainder (extra iterations) loop or unrolling loop.
MDNode *BranchWeights = nullptr;
@@ -808,7 +832,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
PreHeaderBR->eraseFromParent();
if (DT) {
if (UseEpilogRemainder)
- DT->changeImmediateDominator(NewExit, PreHeader);
+ DT->changeImmediateDominator(EpilogPreHeader, PreHeader);
else
DT->changeImmediateDominator(PrologExit, PreHeader);
}
@@ -880,7 +904,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
// from both the original loop and the remainder code reaching the exit
// blocks. While the IDom of these exit blocks were from the original loop,
// now the IDom is the preheader (which decides whether the original loop or
- // remainder code should run).
+ // remainder code should run) unless the block still has just the original
+ // predecessor (such as NewExit in the case of an epilog remainder).
if (DT && !L->getExitingBlock()) {
SmallVector<BasicBlock *, 16> ChildrenToUpdate;
// NB! We have to examine the dom children of all loop blocks, not just
@@ -891,7 +916,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
auto *DomNodeBB = DT->getNode(BB);
for (auto *DomChild : DomNodeBB->children()) {
auto *DomChildBB = DomChild->getBlock();
- if (!L->contains(LI->getLoopFor(DomChildBB)))
+ if (!L->contains(LI->getLoopFor(DomChildBB)) &&
+ DomChildBB->getUniquePredecessor() != BB)
ChildrenToUpdate.push_back(DomChildBB);
}
}
@@ -930,7 +956,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
// Connect the epilog code to the original loop and update the
// PHI functions.
ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,
- NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count);
+ NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count, *AC);
// Update counter in loop for unrolling.
// Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index c167dd7..fb696be 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2263,8 +2263,7 @@ public:
/// debug location \p DL.
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
- : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi, DL),
- Name(Name.str()) {
+ : VPSingleDefRecipe(VPDef::VPWidenPHISC, {}, Phi, DL), Name(Name.str()) {
if (Start)
addOperand(Start);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index c8212af..b36298f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -763,6 +763,8 @@ void VPlanTransforms::addMinimumVectorEpilogueIterationCheck(
// Add the minimum iteration check for the epilogue vector loop.
VPValue *TC = Plan.getOrAddLiveIn(TripCount);
VPBuilder Builder(cast<VPBasicBlock>(Plan.getEntry()));
+ VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount(
+ TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW));
VPValue *Count = Builder.createNaryOp(
Instruction::Sub, {TC, Plan.getOrAddLiveIn(VectorTripCount)},
DebugLoc::getUnknown(), "n.vec.remaining");
@@ -770,9 +772,6 @@ void VPlanTransforms::addMinimumVectorEpilogueIterationCheck(
// Generate code to check if the loop's trip count is less than VF * UF of
// the vector epilogue loop.
auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
- VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount(
- TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW));
-
auto *CheckMinIters = Builder.createICmp(
P, Count, VFxUF, DebugLoc::getUnknown(), "min.epilog.iters.check");
VPInstruction *Branch =
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index ebf833e..c8a2d84 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3180,9 +3180,8 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
DebugLoc::getUnknown(), "induction");
// Create the widened phi of the vector IV.
- auto *WidePHI = new VPWidenPHIRecipe(WidenIVR->getPHINode(), nullptr,
+ auto *WidePHI = new VPWidenPHIRecipe(WidenIVR->getPHINode(), Init,
WidenIVR->getDebugLoc(), "vec.ind");
- WidePHI->addOperand(Init);
WidePHI->insertBefore(WidenIVR);
// Create the backedge value for the vector IV.
@@ -3545,8 +3544,7 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
VPValue *A, *B;
VPValue *Tmp = nullptr;
// Sub reductions could have a sub between the add reduction and vec op.
- if (match(VecOp,
- m_Binary<Instruction::Sub>(m_SpecificInt(0), m_VPValue(Tmp)))) {
+ if (match(VecOp, m_Sub(m_ZeroInt(), m_VPValue(Tmp)))) {
Sub = VecOp->getDefiningRecipe();
VecOp = Tmp;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 0599930..66748c5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -71,8 +71,8 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) {
m_Specific(&Plan.getVF()))) ||
IsWideCanonicalIV(A));
- return match(V, m_Binary<Instruction::ICmp>(m_VPValue(A), m_VPValue(B))) &&
- IsWideCanonicalIV(A) && B == Plan.getOrCreateBackedgeTakenCount();
+ return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) &&
+ B == Plan.getOrCreateBackedgeTakenCount();
}
const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {