diff options
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/AllocToken.cpp | 494 | ||||
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp | 108 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/CodeExtractor.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/Local.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp | 92 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.h | 3 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 4 |
13 files changed, 658 insertions, 92 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 9ca8194..56194fe 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -137,13 +137,10 @@ InstCombinerImpl::isEliminableCastPair(const CastInst *CI1, Instruction::CastOps secondOp = CI2->getOpcode(); Type *SrcIntPtrTy = SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr; - Type *MidIntPtrTy = - MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr; Type *DstIntPtrTy = DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr; unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, - DstTy, SrcIntPtrTy, MidIntPtrTy, - DstIntPtrTy); + DstTy, &DL); // We don't want to form an inttoptr or ptrtoint that converts to an integer // type that differs from the pointer size. diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index cdae9a7..3704ad7 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -2662,7 +2662,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, G->eraseFromParent(); NewGlobals[i] = NewGlobal; - Constant *ODRIndicator = ConstantPointerNull::get(PtrTy); + Constant *ODRIndicator = Constant::getNullValue(IntptrTy); GlobalValue *InstrumentedGlobal = NewGlobal; bool CanUsePrivateAliases = @@ -2677,8 +2677,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, // ODR should not happen for local linkage. if (NewGlobal->hasLocalLinkage()) { - ODRIndicator = - ConstantExpr::getIntToPtr(ConstantInt::get(IntptrTy, -1), PtrTy); + ODRIndicator = ConstantInt::get(IntptrTy, -1); } else if (UseOdrIndicator) { // With local aliases, we need to provide another externally visible // symbol __odr_asan_XXX to detect ODR violation. @@ -2692,7 +2691,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, ODRIndicatorSym->setVisibility(NewGlobal->getVisibility()); ODRIndicatorSym->setDLLStorageClass(NewGlobal->getDLLStorageClass()); ODRIndicatorSym->setAlignment(Align(1)); - ODRIndicator = ODRIndicatorSym; + ODRIndicator = ConstantExpr::getPtrToInt(ODRIndicatorSym, IntptrTy); } Constant *Initializer = ConstantStruct::get( @@ -2703,8 +2702,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, ConstantExpr::getPointerCast(Name, IntptrTy), ConstantExpr::getPointerCast(getOrCreateModuleName(), IntptrTy), ConstantInt::get(IntptrTy, MD.IsDynInit), - Constant::getNullValue(IntptrTy), - ConstantExpr::getPointerCast(ODRIndicator, IntptrTy)); + Constant::getNullValue(IntptrTy), ODRIndicator); LLVM_DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n"); diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp new file mode 100644 index 0000000..782d5a1 --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp @@ -0,0 +1,494 @@ +//===- AllocToken.cpp - Allocation token instrumentation ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements AllocToken, an instrumentation pass that +// replaces allocation calls with token-enabled versions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/AllocToken.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Analysis.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/RandomNumberGenerator.h" +#include "llvm/Support/SipHash.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <limits> +#include <memory> +#include <optional> +#include <string> +#include <utility> +#include <variant> + +using namespace llvm; + +#define DEBUG_TYPE "alloc-token" + +namespace { + +//===--- Constants --------------------------------------------------------===// + +enum class TokenMode : unsigned { + /// Incrementally increasing token ID. + Increment = 0, + + /// Simple mode that returns a statically-assigned random token ID. + Random = 1, + + /// Token ID based on allocated type hash. + TypeHash = 2, +}; + +//===--- Command-line options ---------------------------------------------===// + +cl::opt<TokenMode> + ClMode("alloc-token-mode", cl::Hidden, cl::desc("Token assignment mode"), + cl::init(TokenMode::TypeHash), + cl::values(clEnumValN(TokenMode::Increment, "increment", + "Incrementally increasing token ID"), + clEnumValN(TokenMode::Random, "random", + "Statically-assigned random token ID"), + clEnumValN(TokenMode::TypeHash, "typehash", + "Token ID based on allocated type hash"))); + +cl::opt<std::string> ClFuncPrefix("alloc-token-prefix", + cl::desc("The allocation function prefix"), + cl::Hidden, cl::init("__alloc_token_")); + +cl::opt<uint64_t> ClMaxTokens("alloc-token-max", + cl::desc("Maximum number of tokens (0 = no max)"), + cl::Hidden, cl::init(0)); + +cl::opt<bool> + ClFastABI("alloc-token-fast-abi", + cl::desc("The token ID is encoded in the function name"), + cl::Hidden, cl::init(false)); + +// Instrument libcalls only by default - compatible allocators only need to take +// care of providing standard allocation functions. With extended coverage, also +// instrument non-libcall allocation function calls with !alloc_token +// metadata. +cl::opt<bool> + ClExtended("alloc-token-extended", + cl::desc("Extend coverage to custom allocation functions"), + cl::Hidden, cl::init(false)); + +// C++ defines ::operator new (and variants) as replaceable (vs. standard +// library versions), which are nobuiltin, and are therefore not covered by +// isAllocationFn(). Cover by default, as users of AllocToken are already +// required to provide token-aware allocation functions (no defaults). +cl::opt<bool> ClCoverReplaceableNew("alloc-token-cover-replaceable-new", + cl::desc("Cover replaceable operator new"), + cl::Hidden, cl::init(true)); + +cl::opt<uint64_t> ClFallbackToken( + "alloc-token-fallback", + cl::desc("The default fallback token where none could be determined"), + cl::Hidden, cl::init(0)); + +//===--- Statistics -------------------------------------------------------===// + +STATISTIC(NumFunctionsInstrumented, "Functions instrumented"); +STATISTIC(NumAllocationsInstrumented, "Allocations instrumented"); + +//===----------------------------------------------------------------------===// + +/// Returns the !alloc_token metadata if available. +/// +/// Expected format is: !{<type-name>} +MDNode *getAllocTokenMetadata(const CallBase &CB) { + MDNode *Ret = CB.getMetadata(LLVMContext::MD_alloc_token); + if (!Ret) + return nullptr; + assert(Ret->getNumOperands() == 1 && "bad !alloc_token"); + assert(isa<MDString>(Ret->getOperand(0))); + return Ret; +} + +class ModeBase { +public: + explicit ModeBase(const IntegerType &TokenTy, uint64_t MaxTokens) + : MaxTokens(MaxTokens ? MaxTokens : TokenTy.getBitMask()) { + assert(MaxTokens <= TokenTy.getBitMask()); + } + +protected: + uint64_t boundedToken(uint64_t Val) const { + assert(MaxTokens != 0); + return Val % MaxTokens; + } + + const uint64_t MaxTokens; +}; + +/// Implementation for TokenMode::Increment. +class IncrementMode : public ModeBase { +public: + using ModeBase::ModeBase; + + uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &) { + return boundedToken(Counter++); + } + +private: + uint64_t Counter = 0; +}; + +/// Implementation for TokenMode::Random. +class RandomMode : public ModeBase { +public: + RandomMode(const IntegerType &TokenTy, uint64_t MaxTokens, + std::unique_ptr<RandomNumberGenerator> RNG) + : ModeBase(TokenTy, MaxTokens), RNG(std::move(RNG)) {} + uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &) { + return boundedToken((*RNG)()); + } + +private: + std::unique_ptr<RandomNumberGenerator> RNG; +}; + +/// Implementation for TokenMode::TypeHash. The implementation ensures +/// hashes are stable across different compiler invocations. Uses SipHash as the +/// hash function. +class TypeHashMode : public ModeBase { +public: + using ModeBase::ModeBase; + + uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) { + if (MDNode *N = getAllocTokenMetadata(CB)) { + MDString *S = cast<MDString>(N->getOperand(0)); + return boundedToken(getStableSipHash(S->getString())); + } + remarkNoMetadata(CB, ORE); + return ClFallbackToken; + } + + /// Remark that there was no precise type information. + static void remarkNoMetadata(const CallBase &CB, + OptimizationRemarkEmitter &ORE) { + ORE.emit([&] { + ore::NV FuncNV("Function", CB.getParent()->getParent()); + const Function *Callee = CB.getCalledFunction(); + ore::NV CalleeNV("Callee", Callee ? Callee->getName() : "<unknown>"); + return OptimizationRemark(DEBUG_TYPE, "NoAllocToken", &CB) + << "Call to '" << CalleeNV << "' in '" << FuncNV + << "' without source-level type token"; + }); + } +}; + +// Apply opt overrides. +AllocTokenOptions transformOptionsFromCl(AllocTokenOptions Opts) { + if (!Opts.MaxTokens.has_value()) + Opts.MaxTokens = ClMaxTokens; + Opts.FastABI |= ClFastABI; + Opts.Extended |= ClExtended; + return Opts; +} + +class AllocToken { +public: + explicit AllocToken(AllocTokenOptions Opts, Module &M, + ModuleAnalysisManager &MAM) + : Options(transformOptionsFromCl(std::move(Opts))), Mod(M), + FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), + Mode(IncrementMode(*IntPtrTy, *Options.MaxTokens)) { + switch (ClMode.getValue()) { + case TokenMode::Increment: + break; + case TokenMode::Random: + Mode.emplace<RandomMode>(*IntPtrTy, *Options.MaxTokens, + M.createRNG(DEBUG_TYPE)); + break; + case TokenMode::TypeHash: + Mode.emplace<TypeHashMode>(*IntPtrTy, *Options.MaxTokens); + break; + } + } + + bool instrumentFunction(Function &F); + +private: + /// Returns the LibFunc (or NotLibFunc) if this call should be instrumented. + std::optional<LibFunc> + shouldInstrumentCall(const CallBase &CB, const TargetLibraryInfo &TLI) const; + + /// Returns true for functions that are eligible for instrumentation. + static bool isInstrumentableLibFunc(LibFunc Func, const CallBase &CB, + const TargetLibraryInfo &TLI); + + /// Returns true for isAllocationFn() functions that we should ignore. + static bool ignoreInstrumentableLibFunc(LibFunc Func); + + /// Replace a call/invoke with a call/invoke to the allocation function + /// with token ID. + bool replaceAllocationCall(CallBase *CB, LibFunc Func, + OptimizationRemarkEmitter &ORE, + const TargetLibraryInfo &TLI); + + /// Return replacement function for a LibFunc that takes a token ID. + FunctionCallee getTokenAllocFunction(const CallBase &CB, uint64_t TokenID, + LibFunc OriginalFunc); + + /// Return the token ID from metadata in the call. + uint64_t getToken(const CallBase &CB, OptimizationRemarkEmitter &ORE) { + return std::visit([&](auto &&Mode) { return Mode(CB, ORE); }, Mode); + } + + const AllocTokenOptions Options; + Module &Mod; + IntegerType *IntPtrTy = Mod.getDataLayout().getIntPtrType(Mod.getContext()); + FunctionAnalysisManager &FAM; + // Cache for replacement functions. + DenseMap<std::pair<LibFunc, uint64_t>, FunctionCallee> TokenAllocFunctions; + // Selected mode. + std::variant<IncrementMode, RandomMode, TypeHashMode> Mode; +}; + +bool AllocToken::instrumentFunction(Function &F) { + // Do not apply any instrumentation for naked functions. + if (F.hasFnAttribute(Attribute::Naked)) + return false; + if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) + return false; + // Don't touch available_externally functions, their actual body is elsewhere. + if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) + return false; + // Only instrument functions that have the sanitize_alloc_token attribute. + if (!F.hasFnAttribute(Attribute::SanitizeAllocToken)) + return false; + + auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); + auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); + SmallVector<std::pair<CallBase *, LibFunc>, 4> AllocCalls; + + // Collect all allocation calls to avoid iterator invalidation. + for (Instruction &I : instructions(F)) { + auto *CB = dyn_cast<CallBase>(&I); + if (!CB) + continue; + if (std::optional<LibFunc> Func = shouldInstrumentCall(*CB, TLI)) + AllocCalls.emplace_back(CB, Func.value()); + } + + bool Modified = false; + for (auto &[CB, Func] : AllocCalls) + Modified |= replaceAllocationCall(CB, Func, ORE, TLI); + + if (Modified) + NumFunctionsInstrumented++; + return Modified; +} + +std::optional<LibFunc> +AllocToken::shouldInstrumentCall(const CallBase &CB, + const TargetLibraryInfo &TLI) const { + const Function *Callee = CB.getCalledFunction(); + if (!Callee) + return std::nullopt; + + // Ignore nobuiltin of the CallBase, so that we can cover nobuiltin libcalls + // if requested via isInstrumentableLibFunc(). Note that isAllocationFn() is + // returning false for nobuiltin calls. + LibFunc Func; + if (TLI.getLibFunc(*Callee, Func)) { + if (isInstrumentableLibFunc(Func, CB, TLI)) + return Func; + } else if (Options.Extended && getAllocTokenMetadata(CB)) { + return NotLibFunc; + } + + return std::nullopt; +} + +bool AllocToken::isInstrumentableLibFunc(LibFunc Func, const CallBase &CB, + const TargetLibraryInfo &TLI) { + if (ignoreInstrumentableLibFunc(Func)) + return false; + + if (isAllocationFn(&CB, &TLI)) + return true; + + switch (Func) { + // These libfuncs don't return normal pointers, and are therefore not handled + // by isAllocationFn(). + case LibFunc_posix_memalign: + case LibFunc_size_returning_new: + case LibFunc_size_returning_new_hot_cold: + case LibFunc_size_returning_new_aligned: + case LibFunc_size_returning_new_aligned_hot_cold: + return true; + + // See comment above ClCoverReplaceableNew. + case LibFunc_Znwj: + case LibFunc_ZnwjRKSt9nothrow_t: + case LibFunc_ZnwjSt11align_val_t: + case LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t: + case LibFunc_Znwm: + case LibFunc_Znwm12__hot_cold_t: + case LibFunc_ZnwmRKSt9nothrow_t: + case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t: + case LibFunc_ZnwmSt11align_val_t: + case LibFunc_ZnwmSt11align_val_t12__hot_cold_t: + case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: + case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t: + case LibFunc_Znaj: + case LibFunc_ZnajRKSt9nothrow_t: + case LibFunc_ZnajSt11align_val_t: + case LibFunc_ZnajSt11align_val_tRKSt9nothrow_t: + case LibFunc_Znam: + case LibFunc_Znam12__hot_cold_t: + case LibFunc_ZnamRKSt9nothrow_t: + case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t: + case LibFunc_ZnamSt11align_val_t: + case LibFunc_ZnamSt11align_val_t12__hot_cold_t: + case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: + case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t: + return ClCoverReplaceableNew; + + default: + return false; + } +} + +bool AllocToken::ignoreInstrumentableLibFunc(LibFunc Func) { + switch (Func) { + case LibFunc_strdup: + case LibFunc_dunder_strdup: + case LibFunc_strndup: + case LibFunc_dunder_strndup: + return true; + default: + return false; + } +} + +bool AllocToken::replaceAllocationCall(CallBase *CB, LibFunc Func, + OptimizationRemarkEmitter &ORE, + const TargetLibraryInfo &TLI) { + uint64_t TokenID = getToken(*CB, ORE); + + FunctionCallee TokenAlloc = getTokenAllocFunction(*CB, TokenID, Func); + if (!TokenAlloc) + return false; + NumAllocationsInstrumented++; + + if (Options.FastABI) { + assert(TokenAlloc.getFunctionType()->getNumParams() == CB->arg_size()); + CB->setCalledFunction(TokenAlloc); + return true; + } + + IRBuilder<> IRB(CB); + // Original args. + SmallVector<Value *, 4> NewArgs{CB->args()}; + // Add token ID, truncated to IntPtrTy width. + NewArgs.push_back(ConstantInt::get(IntPtrTy, TokenID)); + assert(TokenAlloc.getFunctionType()->getNumParams() == NewArgs.size()); + + // Preserve invoke vs call semantics for exception handling. + CallBase *NewCall; + if (auto *II = dyn_cast<InvokeInst>(CB)) { + NewCall = IRB.CreateInvoke(TokenAlloc, II->getNormalDest(), + II->getUnwindDest(), NewArgs); + } else { + NewCall = IRB.CreateCall(TokenAlloc, NewArgs); + cast<CallInst>(NewCall)->setTailCall(CB->isTailCall()); + } + NewCall->setCallingConv(CB->getCallingConv()); + NewCall->copyMetadata(*CB); + NewCall->setAttributes(CB->getAttributes()); + + // Replace all uses and delete the old call. + CB->replaceAllUsesWith(NewCall); + CB->eraseFromParent(); + return true; +} + +FunctionCallee AllocToken::getTokenAllocFunction(const CallBase &CB, + uint64_t TokenID, + LibFunc OriginalFunc) { + std::optional<std::pair<LibFunc, uint64_t>> Key; + if (OriginalFunc != NotLibFunc) { + Key = std::make_pair(OriginalFunc, Options.FastABI ? TokenID : 0); + auto It = TokenAllocFunctions.find(*Key); + if (It != TokenAllocFunctions.end()) + return It->second; + } + + const Function *Callee = CB.getCalledFunction(); + if (!Callee) + return FunctionCallee(); + const FunctionType *OldFTy = Callee->getFunctionType(); + if (OldFTy->isVarArg()) + return FunctionCallee(); + // Copy params, and append token ID type. + Type *RetTy = OldFTy->getReturnType(); + SmallVector<Type *, 4> NewParams{OldFTy->params()}; + std::string TokenAllocName = ClFuncPrefix; + if (Options.FastABI) + TokenAllocName += utostr(TokenID) + "_"; + else + NewParams.push_back(IntPtrTy); // token ID + TokenAllocName += Callee->getName(); + FunctionType *NewFTy = FunctionType::get(RetTy, NewParams, false); + FunctionCallee TokenAlloc = Mod.getOrInsertFunction(TokenAllocName, NewFTy); + if (Function *F = dyn_cast<Function>(TokenAlloc.getCallee())) + F->copyAttributesFrom(Callee); // preserve attrs + + if (Key.has_value()) + TokenAllocFunctions[*Key] = TokenAlloc; + return TokenAlloc; +} + +} // namespace + +AllocTokenPass::AllocTokenPass(AllocTokenOptions Opts) + : Options(std::move(Opts)) {} + +PreservedAnalyses AllocTokenPass::run(Module &M, ModuleAnalysisManager &MAM) { + AllocToken Pass(Options, M, MAM); + bool Modified = false; + + for (Function &F : M) { + if (F.empty()) + continue; // declaration + Modified |= Pass.instrumentFunction(F); + } + + return Modified ? PreservedAnalyses::none().preserveSet<CFGAnalyses>() + : PreservedAnalyses::all(); +} diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index 15fd421..80576c6 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_component_library(LLVMInstrumentation AddressSanitizer.cpp + AllocToken.cpp BoundsChecking.cpp CGProfile.cpp ControlHeightReduction.cpp diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 480ff4a..5ba2167 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -261,6 +261,11 @@ static cl::opt<bool> ClIgnorePersonalityRoutine( "list, do not create a wrapper for it."), cl::Hidden, cl::init(false)); +static cl::opt<bool> ClAddGlobalNameSuffix( + "dfsan-add-global-name-suffix", + cl::desc("Whether to add .dfsan suffix to global names"), cl::Hidden, + cl::init(true)); + static StringRef getGlobalTypeString(const GlobalValue &G) { // Types of GlobalVariables are always pointer types. Type *GType = G.getValueType(); @@ -1256,6 +1261,9 @@ DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) { } void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) { + if (!ClAddGlobalNameSuffix) + return; + std::string GVName = std::string(GV->getName()), Suffix = ".dfsan"; GV->setName(GVName + Suffix); @@ -1784,10 +1792,8 @@ bool DataFlowSanitizer::runImpl( } Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) { - Value *Base = IRB.CreatePointerCast(DFS.ArgTLS, DFS.IntptrTy); - if (ArgOffset) - Base = IRB.CreateAdd(Base, ConstantInt::get(DFS.IntptrTy, ArgOffset)); - return IRB.CreateIntToPtr(Base, PointerType::get(*DFS.Ctx, 0), "_dfsarg"); + return IRB.CreatePtrAdd(DFS.ArgTLS, ConstantInt::get(DFS.IntptrTy, ArgOffset), + "_dfsarg"); } Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) { diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index e9a3e98..584cdad 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -89,6 +89,7 @@ STATISTIC(NumTransforms, "Number of transformations done"); STATISTIC(NumCloned, "Number of blocks cloned"); STATISTIC(NumPaths, "Number of individual paths threaded"); +namespace llvm { static cl::opt<bool> ClViewCfgBefore("dfa-jump-view-cfg-before", cl::desc("View the CFG before DFA Jump Threading"), @@ -120,8 +121,17 @@ static cl::opt<unsigned> cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50)); -namespace { +extern cl::opt<bool> ProfcheckDisableMetadataFixes; + +} // namespace llvm + +static cl::opt<double> MaxClonedRate( + "dfa-max-cloned-rate", + cl::desc( + "Maximum cloned instructions rate accepted for the transformation"), + cl::Hidden, cl::init(7.5)); +namespace { class SelectInstToUnfold { SelectInst *SI; PHINode *SIUse; @@ -135,10 +145,6 @@ public: explicit operator bool() const { return SI && SIUse; } }; -void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, - std::vector<SelectInstToUnfold> *NewSIsToUnfold, - std::vector<BasicBlock *> *NewBBs); - class DFAJumpThreading { public: DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI, @@ -152,7 +158,8 @@ private: void unfoldSelectInstrs(DominatorTree *DT, const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + // TODO: Have everything use a single lazy DTU + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts); while (!Stack.empty()) { @@ -167,16 +174,18 @@ private: } } + static void unfold(DomTreeUpdater *DTU, LoopInfo *LI, + SelectInstToUnfold SIToUnfold, + std::vector<SelectInstToUnfold> *NewSIsToUnfold, + std::vector<BasicBlock *> *NewBBs); + AssumptionCache *AC; DominatorTree *DT; LoopInfo *LI; TargetTransformInfo *TTI; OptimizationRemarkEmitter *ORE; }; - -} // end anonymous namespace - -namespace { +} // namespace /// Unfold the select instruction held in \p SIToUnfold by replacing it with /// control flow. @@ -185,9 +194,10 @@ namespace { /// created basic blocks into \p NewBBs. /// /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible. -void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, - std::vector<SelectInstToUnfold> *NewSIsToUnfold, - std::vector<BasicBlock *> *NewBBs) { +void DFAJumpThreading::unfold(DomTreeUpdater *DTU, LoopInfo *LI, + SelectInstToUnfold SIToUnfold, + std::vector<SelectInstToUnfold> *NewSIsToUnfold, + std::vector<BasicBlock *> *NewBBs) { SelectInst *SI = SIToUnfold.getInst(); PHINode *SIUse = SIToUnfold.getUse(); assert(SI->hasOneUse()); @@ -253,7 +263,11 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, // Insert the real conditional branch based on the original condition. StartBlockTerm->eraseFromParent(); - BranchInst::Create(EndBlock, NewBlock, SI->getCondition(), StartBlock); + auto *BI = + BranchInst::Create(EndBlock, NewBlock, SI->getCondition(), StartBlock); + if (!ProfcheckDisableMetadataFixes) + BI->setMetadata(LLVMContext::MD_prof, + SI->getMetadata(LLVMContext::MD_prof)); DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock}, {DominatorTree::Insert, StartBlock, NewBlock}}); } else { @@ -288,7 +302,11 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, // (Use) BranchInst::Create(EndBlock, NewBlockF); // Insert the real conditional branch based on the original condition. - BranchInst::Create(EndBlock, NewBlockF, SI->getCondition(), NewBlockT); + auto *BI = + BranchInst::Create(EndBlock, NewBlockF, SI->getCondition(), NewBlockT); + if (!ProfcheckDisableMetadataFixes) + BI->setMetadata(LLVMContext::MD_prof, + SI->getMetadata(LLVMContext::MD_prof)); DTU->applyUpdates({{DominatorTree::Insert, NewBlockT, NewBlockF}, {DominatorTree::Insert, NewBlockT, EndBlock}, {DominatorTree::Insert, NewBlockF, EndBlock}}); @@ -342,10 +360,12 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, SI->eraseFromParent(); } +namespace { struct ClonedBlock { BasicBlock *BB; APInt State; ///< \p State corresponds to the next value of a switch stmnt. }; +} // namespace typedef std::deque<BasicBlock *> PathType; typedef std::vector<PathType> PathsType; @@ -375,6 +395,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { return OS; } +namespace { /// ThreadingPath is a path in the control flow of a loop that can be threaded /// by cloning necessary basic blocks and replacing conditional branches with /// unconditional ones. A threading path includes a list of basic blocks, the @@ -814,11 +835,13 @@ struct TransformDFA { : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE), EphValues(EphValues) {} - void run() { + bool run() { if (isLegalAndProfitableToTransform()) { createAllExitPaths(); NumTransforms++; + return true; } + return false; } private: @@ -828,6 +851,7 @@ private: /// also returns false if it is illegal to clone some required block. bool isLegalAndProfitableToTransform() { CodeMetrics Metrics; + uint64_t NumClonedInst = 0; SwitchInst *Switch = SwitchPaths->getSwitchInst(); // Don't thread switch without multiple successors. @@ -837,7 +861,6 @@ private: // Note that DuplicateBlockMap is not being used as intended here. It is // just being used to ensure (BB, State) pairs are only counted once. DuplicateBlockMap DuplicateMap; - for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { PathType PathBBs = TPath.getPath(); APInt NextState = TPath.getExitValue(); @@ -848,6 +871,7 @@ private: BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap); if (!VisitedBB) { Metrics.analyzeBasicBlock(BB, *TTI, EphValues); + NumClonedInst += BB->sizeWithoutDebug(); DuplicateMap[BB].push_back({BB, NextState}); } @@ -865,6 +889,7 @@ private: if (VisitedBB) continue; Metrics.analyzeBasicBlock(BB, *TTI, EphValues); + NumClonedInst += BB->sizeWithoutDebug(); DuplicateMap[BB].push_back({BB, NextState}); } @@ -901,6 +926,22 @@ private: } } + // Too much cloned instructions slow down later optimizations, especially + // SLPVectorizer. + // TODO: Thread the switch partially before reaching the threshold. + uint64_t NumOrigInst = 0; + for (auto *BB : DuplicateMap.keys()) + NumOrigInst += BB->sizeWithoutDebug(); + if (double(NumClonedInst) / double(NumOrigInst) > MaxClonedRate) { + LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much " + "instructions wll be cloned\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch) + << "Too much instructions will be cloned."; + }); + return false; + } + InstructionCost DuplicationCost = 0; unsigned JumpTableSize = 0; @@ -951,8 +992,6 @@ private: /// Transform each threading path to effectively jump thread the DFA. void createAllExitPaths() { - DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager); - // Move the switch block to the end of the path, since it will be duplicated BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock(); for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { @@ -969,15 +1008,18 @@ private: SmallPtrSet<BasicBlock *, 16> BlocksToClean; BlocksToClean.insert_range(successors(SwitchBlock)); - for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { - createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU); - NumPaths++; - } + { + DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy); + for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { + createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU); + NumPaths++; + } - // After all paths are cloned, now update the last successor of the cloned - // path so it skips over the switch statement - for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) - updateLastSuccessor(TPath, DuplicateMap, &DTU); + // After all paths are cloned, now update the last successor of the cloned + // path so it skips over the switch statement + for (const ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) + updateLastSuccessor(TPath, DuplicateMap, &DTU); + } // For each instruction that was cloned and used outside, update its uses updateSSA(NewDefs); @@ -993,7 +1035,7 @@ private: /// To remember the correct destination, we have to duplicate blocks /// corresponding to each state. Also update the terminating instruction of /// the predecessors, and phis in the successor blocks. - void createExitPath(DefMap &NewDefs, ThreadingPath &Path, + void createExitPath(DefMap &NewDefs, const ThreadingPath &Path, DuplicateBlockMap &DuplicateMap, SmallPtrSet<BasicBlock *, 16> &BlocksToClean, DomTreeUpdater *DTU) { @@ -1239,7 +1281,7 @@ private: /// /// Note that this is an optional step and would have been done in later /// optimizations, but it makes the CFG significantly easier to work with. - void updateLastSuccessor(ThreadingPath &TPath, + void updateLastSuccessor(const ThreadingPath &TPath, DuplicateBlockMap &DuplicateMap, DomTreeUpdater *DTU) { APInt NextState = TPath.getExitValue(); @@ -1336,6 +1378,7 @@ private: SmallPtrSet<const Value *, 32> EphValues; std::vector<ThreadingPath> TPaths; }; +} // namespace bool DFAJumpThreading::run(Function &F) { LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n"); @@ -1402,9 +1445,8 @@ bool DFAJumpThreading::run(Function &F) { for (AllSwitchPaths SwitchPaths : ThreadableLoops) { TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); - Transform.run(); - MadeChanges = true; - LoopInfoBroken = true; + if (Transform.run()) + MadeChanges = LoopInfoBroken = true; } #ifdef EXPENSIVE_CHECKS @@ -1415,8 +1457,6 @@ bool DFAJumpThreading::run(Function &F) { return MadeChanges; } -} // end anonymous namespace - /// Integrate with the new Pass Manager PreservedAnalyses DFAJumpThreadingPass::run(Function &F, FunctionAnalysisManager &AM) { diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index bbd1ed6..5ba6f95f 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -970,6 +970,7 @@ Function *CodeExtractor::constructFunctionDeclaration( case Attribute::SanitizeMemTag: case Attribute::SanitizeRealtime: case Attribute::SanitizeRealtimeBlocking: + case Attribute::SanitizeAllocToken: case Attribute::SpeculativeLoadHardening: case Attribute::StackProtect: case Attribute::StackProtectReq: diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 21b2652..b6ca52e 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3031,6 +3031,13 @@ static void combineMetadata(Instruction *K, const Instruction *J, K->getContext(), MDNode::toCaptureComponents(JMD) | MDNode::toCaptureComponents(KMD))); break; + case LLVMContext::MD_alloc_token: + // Preserve !alloc_token if both K and J have it, and they are equal. + if (KMD == JMD) + K->setMetadata(Kind, JMD); + else + K->setMetadata(Kind, nullptr); + break; } } // Set !invariant.group from J if J has it. If both instructions have it diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index bf882d7..6312831 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -201,18 +201,27 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, /// unroll count is non-zero. /// /// This function performs the following: -/// - Update PHI nodes at the unrolling loop exit and epilog loop exit -/// - Create PHI nodes at the unrolling loop exit to combine -/// values that exit the unrolling loop code and jump around it. +/// - Update PHI nodes at the epilog loop exit +/// - Create PHI nodes at the unrolling loop exit and epilog preheader to +/// combine values that exit the unrolling loop code and jump around it. /// - Update PHI operands in the epilog loop by the new PHI nodes -/// - Branch around the epilog loop if extra iters (ModVal) is zero. +/// - At the unrolling loop exit, branch around the epilog loop if extra iters +// (ModVal) is zero. +/// - At the epilog preheader, add an llvm.assume call that extra iters is +/// non-zero. If the unrolling loop exit is the predecessor, the above new +/// branch guarantees that assumption. If the unrolling loop preheader is the +/// predecessor, then the required first iteration from the original loop has +/// yet to be executed, so it must be executed in the epilog loop. If we +/// later unroll the epilog loop, that llvm.assume call somehow enables +/// ScalarEvolution to compute a epilog loop maximum trip count, which enables +/// eliminating the branch at the end of the final unrolled epilog iteration. /// static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE, - unsigned Count) { + unsigned Count, AssumptionCache &AC) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]); @@ -231,7 +240,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, // EpilogLatch // Exit (EpilogPN) - // Update PHI nodes at NewExit and Exit. + // Update PHI nodes at Exit. for (PHINode &PN : NewExit->phis()) { // PN should be used in another PHI located in Exit block as // Exit was split by SplitBlockPredecessors into Exit and NewExit @@ -246,15 +255,11 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, // epilogue edges have already been added. // // There is EpilogPreHeader incoming block instead of NewExit as - // NewExit was spilt 1 more time to get EpilogPreHeader. + // NewExit was split 1 more time to get EpilogPreHeader. assert(PN.hasOneUse() && "The phi should have 1 use"); PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser()); assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block"); - // Add incoming PreHeader from branch around the Loop - PN.addIncoming(PoisonValue::get(PN.getType()), PreHeader); - SE.forgetValue(&PN); - Value *V = PN.getIncomingValueForBlock(Latch); Instruction *I = dyn_cast<Instruction>(V); if (I && L->contains(I)) @@ -271,35 +276,52 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, NewExit); // Now PHIs should look like: // NewExit: - // PN = PHI [I, Latch], [poison, PreHeader] + // PN = PHI [I, Latch] // ... // Exit: // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch] } - // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader). - // Update corresponding PHI nodes in epilog loop. + // Create PHI nodes at NewExit (from the unrolling loop Latch) and at + // EpilogPreHeader (from PreHeader and NewExit). Update corresponding PHI + // nodes in epilog loop. for (BasicBlock *Succ : successors(Latch)) { // Skip this as we already updated phis in exit blocks. if (!L->contains(Succ)) continue; + + // Succ here appears to always be just L->getHeader(). Otherwise, how do we + // know its corresponding epilog block (from VMap) is EpilogHeader and thus + // EpilogPreHeader is the right incoming block for VPN, as set below? + // TODO: Can we thus avoid the enclosing loop over successors? + assert(Succ == L->getHeader() && + "Expect the only in-loop successor of latch to be the loop header"); + for (PHINode &PN : Succ->phis()) { - // Add new PHI nodes to the loop exit block and update epilog - // PHIs with the new PHI values. - PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr"); - NewPN->insertBefore(NewExit->getFirstNonPHIIt()); - // Adding a value to the new PHI node from the unrolling loop preheader. - NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader); - // Adding a value to the new PHI node from the unrolling loop latch. - NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch); + // Add new PHI nodes to the loop exit block. + PHINode *NewPN0 = PHINode::Create(PN.getType(), /*NumReservedValues=*/1, + PN.getName() + ".unr"); + NewPN0->insertBefore(NewExit->getFirstNonPHIIt()); + // Add value to the new PHI node from the unrolling loop latch. + NewPN0->addIncoming(PN.getIncomingValueForBlock(Latch), Latch); + + // Add new PHI nodes to EpilogPreHeader. + PHINode *NewPN1 = PHINode::Create(PN.getType(), /*NumReservedValues=*/2, + PN.getName() + ".epil.init"); + NewPN1->insertBefore(EpilogPreHeader->getFirstNonPHIIt()); + // Add value to the new PHI node from the unrolling loop preheader. + NewPN1->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader); + // Add value to the new PHI node from the epilog loop guard. + NewPN1->addIncoming(NewPN0, NewExit); // Update the existing PHI node operand with the value from the new PHI // node. Corresponding instruction in epilog loop should be PHI. PHINode *VPN = cast<PHINode>(VMap[&PN]); - VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN); + VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN1); } } + // In NewExit, branch around the epilog loop if no extra iters. Instruction *InsertPt = NewExit->getTerminator(); IRBuilder<> B(InsertPt); Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); @@ -308,7 +330,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr, PreserveLCSSA); - // Add the branch to the exit block (around the unrolling loop) + // Add the branch to the exit block (around the epilog loop) MDNode *BranchWeights = nullptr; if (hasBranchWeightMD(*Latch->getTerminator())) { // Assume equal distribution in interval [0, Count). @@ -322,10 +344,11 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, DT->changeImmediateDominator(Exit, NewDom); } - // Split the main loop exit to maintain canonicalization guarantees. - SmallVector<BasicBlock*, 4> NewExitPreds{Latch}; - SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr, - PreserveLCSSA); + // In EpilogPreHeader, assume extra iters is non-zero. + IRBuilder<> B2(EpilogPreHeader, EpilogPreHeader->getFirstNonPHIIt()); + Value *ModIsNotNull = B2.CreateIsNotNull(ModVal, "lcmp.mod"); + AssumeInst *AI = cast<AssumeInst>(B2.CreateAssumption(ModIsNotNull)); + AC.registerAssumption(AI); } /// Create a clone of the blocks in a loop and connect them together. A new @@ -795,7 +818,8 @@ bool llvm::UnrollRuntimeLoopRemainder( ConstantInt::get(BECount->getType(), Count - 1)) : B.CreateIsNotNull(ModVal, "lcmp.mod"); - BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; + BasicBlock *RemainderLoop = + UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. MDNode *BranchWeights = nullptr; @@ -808,7 +832,7 @@ bool llvm::UnrollRuntimeLoopRemainder( PreHeaderBR->eraseFromParent(); if (DT) { if (UseEpilogRemainder) - DT->changeImmediateDominator(NewExit, PreHeader); + DT->changeImmediateDominator(EpilogPreHeader, PreHeader); else DT->changeImmediateDominator(PrologExit, PreHeader); } @@ -880,7 +904,8 @@ bool llvm::UnrollRuntimeLoopRemainder( // from both the original loop and the remainder code reaching the exit // blocks. While the IDom of these exit blocks were from the original loop, // now the IDom is the preheader (which decides whether the original loop or - // remainder code should run). + // remainder code should run) unless the block still has just the original + // predecessor (such as NewExit in the case of an epilog remainder). if (DT && !L->getExitingBlock()) { SmallVector<BasicBlock *, 16> ChildrenToUpdate; // NB! We have to examine the dom children of all loop blocks, not just @@ -891,7 +916,8 @@ bool llvm::UnrollRuntimeLoopRemainder( auto *DomNodeBB = DT->getNode(BB); for (auto *DomChild : DomNodeBB->children()) { auto *DomChildBB = DomChild->getBlock(); - if (!L->contains(LI->getLoopFor(DomChildBB))) + if (!L->contains(LI->getLoopFor(DomChildBB)) && + DomChildBB->getUniquePredecessor() != BB) ChildrenToUpdate.push_back(DomChildBB); } } @@ -930,7 +956,7 @@ bool llvm::UnrollRuntimeLoopRemainder( // Connect the epilog code to the original loop and update the // PHI functions. ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, - NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count); + NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count, *AC); // Update counter in loop for unrolling. // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index c167dd7..fb696be 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2263,8 +2263,7 @@ public: /// debug location \p DL. VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") - : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi, DL), - Name(Name.str()) { + : VPSingleDefRecipe(VPDef::VPWidenPHISC, {}, Phi, DL), Name(Name.str()) { if (Start) addOperand(Start); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index c8212af..b36298f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -763,6 +763,8 @@ void VPlanTransforms::addMinimumVectorEpilogueIterationCheck( // Add the minimum iteration check for the epilogue vector loop. VPValue *TC = Plan.getOrAddLiveIn(TripCount); VPBuilder Builder(cast<VPBasicBlock>(Plan.getEntry())); + VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount( + TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW)); VPValue *Count = Builder.createNaryOp( Instruction::Sub, {TC, Plan.getOrAddLiveIn(VectorTripCount)}, DebugLoc::getUnknown(), "n.vec.remaining"); @@ -770,9 +772,6 @@ void VPlanTransforms::addMinimumVectorEpilogueIterationCheck( // Generate code to check if the loop's trip count is less than VF * UF of // the vector epilogue loop. auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT; - VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount( - TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW)); - auto *CheckMinIters = Builder.createICmp( P, Count, VFxUF, DebugLoc::getUnknown(), "min.epilog.iters.check"); VPInstruction *Branch = diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index ebf833e..c8a2d84 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -3180,9 +3180,8 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, DebugLoc::getUnknown(), "induction"); // Create the widened phi of the vector IV. - auto *WidePHI = new VPWidenPHIRecipe(WidenIVR->getPHINode(), nullptr, + auto *WidePHI = new VPWidenPHIRecipe(WidenIVR->getPHINode(), Init, WidenIVR->getDebugLoc(), "vec.ind"); - WidePHI->addOperand(Init); WidePHI->insertBefore(WidenIVR); // Create the backedge value for the vector IV. @@ -3545,8 +3544,7 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, VPValue *A, *B; VPValue *Tmp = nullptr; // Sub reductions could have a sub between the add reduction and vec op. - if (match(VecOp, - m_Binary<Instruction::Sub>(m_SpecificInt(0), m_VPValue(Tmp)))) { + if (match(VecOp, m_Sub(m_ZeroInt(), m_VPValue(Tmp)))) { Sub = VecOp->getDefiningRecipe(); VecOp = Tmp; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 0599930..66748c5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -71,8 +71,8 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) { m_Specific(&Plan.getVF()))) || IsWideCanonicalIV(A)); - return match(V, m_Binary<Instruction::ICmp>(m_VPValue(A), m_VPValue(B))) && - IsWideCanonicalIV(A) && B == Plan.getOrCreateBackedgeTakenCount(); + return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) && + B == Plan.getOrCreateBackedgeTakenCount(); } const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) { |