diff options
Diffstat (limited to 'llvm/lib/Transforms/Instrumentation')
14 files changed, 394 insertions, 169 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 7c364f8..3ea290a7 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" @@ -248,6 +249,11 @@ static cl::opt<bool> "platforms that support this"), cl::Hidden, cl::init(true)); +static cl::opt<int> + ClShadowAddrSpace("asan-shadow-addr-space", + cl::desc("Address space for pointers to the shadow map"), + cl::Hidden, cl::init(0)); + static cl::opt<bool> ClWithIfuncSuppressRemat( "asan-with-ifunc-suppress-remat", cl::desc("Suppress rematerialization of dynamic shadow address by passing " @@ -436,6 +442,15 @@ static cl::opt<AsanDtorKind> ClOverrideDestructorKind( "Use global destructors")), cl::init(AsanDtorKind::Invalid), cl::Hidden); +static SmallSet<unsigned, 8> SrcAddrSpaces; +static cl::list<unsigned> ClAddrSpaces( + "asan-instrument-address-spaces", + cl::desc("Only instrument variables in the specified address spaces."), + cl::Hidden, cl::CommaSeparated, cl::ZeroOrMore, + cl::callback([](const unsigned &AddrSpace) { + SrcAddrSpaces.insert(AddrSpace); + })); + // Debug flags. static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, @@ -503,6 +518,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, bool IsAMDGPU = TargetTriple.isAMDGPU(); bool IsHaiku = TargetTriple.isOSHaiku(); bool IsWasm = TargetTriple.isWasm(); + bool IsBPF = TargetTriple.isBPF(); ShadowMapping Mapping; @@ -579,6 +595,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, else if (IsHaiku && IsX86_64) Mapping.Offset = (kSmallX86_64ShadowOffsetBase & (kSmallX86_64ShadowOffsetAlignMask << Mapping.Scale)); + else if (IsBPF) + Mapping.Offset = kDynamicShadowSentinel; else Mapping.Offset = kDefaultShadowOffset64; } @@ -1355,11 +1373,25 @@ static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) { static bool isUnsupportedAMDGPUAddrspace(Value *Addr) { Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType()); unsigned int AddrSpace = PtrTy->getPointerAddressSpace(); + // Globals in address space 1 and 4 are supported for AMDGPU. if (AddrSpace == 3 || AddrSpace == 5) return true; return false; } +static bool isSupportedAddrspace(const Triple &TargetTriple, Value *Addr) { + Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType()); + unsigned int AddrSpace = PtrTy->getPointerAddressSpace(); + + if (!SrcAddrSpaces.empty()) + return SrcAddrSpaces.count(AddrSpace); + + if (TargetTriple.isAMDGPU()) + return !isUnsupportedAMDGPUAddrspace(Addr); + + return AddrSpace == 0; +} + Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { // Shadow >> scale Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); @@ -1423,10 +1455,9 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { } bool AddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) { - // Instrument accesses from different address spaces only for AMDGPU. - Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType()); - if (PtrTy->getPointerAddressSpace() != 0 && - !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(Ptr))) + // Check whether the target supports sanitizing the address space + // of the pointer. + if (!isSupportedAddrspace(TargetTriple, Ptr)) return true; // Ignore swifterror addresses. @@ -1942,7 +1973,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Type *ShadowTy = IntegerType::get(*C, std::max(8U, TypeStoreSize >> Mapping.Scale)); - Type *ShadowPtrTy = PointerType::get(*C, 0); + Type *ShadowPtrTy = PointerType::get(*C, ClShadowAddrSpace); Value *ShadowPtr = memToShadow(AddrLong, IRB); const uint64_t ShadowAlign = std::max<uint64_t>(Alignment.valueOrOne().value() >> Mapping.Scale, 1); @@ -2089,9 +2120,7 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const { return false; if (!Ty->isSized()) return false; if (!G->hasInitializer()) return false; - // Globals in address space 1 and 4 are supported for AMDGPU. - if (G->getAddressSpace() && - !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(G))) + if (!isSupportedAddrspace(TargetTriple, G)) return false; if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals. // Two problems with thread-locals: @@ -2669,7 +2698,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, // ODR should not happen for local linkage. if (NewGlobal->hasLocalLinkage()) { - ODRIndicator = ConstantInt::get(IntptrTy, -1); + ODRIndicator = ConstantInt::getAllOnesValue(IntptrTy); } else if (UseOdrIndicator) { // With local aliases, we need to provide another externally visible // symbol __odr_asan_XXX to detect ODR violation. diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp index 8181e4e..38eeee2 100644 --- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp +++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp @@ -67,9 +67,10 @@ cl::opt<std::string> ClFuncPrefix("alloc-token-prefix", cl::desc("The allocation function prefix"), cl::Hidden, cl::init("__alloc_token_")); -cl::opt<uint64_t> ClMaxTokens("alloc-token-max", - cl::desc("Maximum number of tokens (0 = no max)"), - cl::Hidden, cl::init(0)); +cl::opt<uint64_t> + ClMaxTokens("alloc-token-max", + cl::desc("Maximum number of tokens (0 = target SIZE_MAX)"), + cl::Hidden, cl::init(0)); cl::opt<bool> ClFastABI("alloc-token-fast-abi", @@ -233,12 +234,31 @@ public: } }; -// Apply opt overrides. -AllocTokenOptions transformOptionsFromCl(AllocTokenOptions Opts) { - if (!Opts.MaxTokens.has_value()) +// Apply opt overrides and module flags. +static AllocTokenOptions resolveOptions(AllocTokenOptions Opts, + const Module &M) { + auto IntModuleFlagOrNull = [&](StringRef Key) { + return mdconst::extract_or_null<ConstantInt>(M.getModuleFlag(Key)); + }; + + if (auto *S = dyn_cast_or_null<MDString>(M.getModuleFlag("alloc-token-mode"))) + if (auto Mode = getAllocTokenModeFromString(S->getString())) + Opts.Mode = *Mode; + if (auto *Val = IntModuleFlagOrNull("alloc-token-max")) + Opts.MaxTokens = Val->getZExtValue(); + if (auto *Val = IntModuleFlagOrNull("alloc-token-fast-abi")) + Opts.FastABI |= Val->isOne(); + if (auto *Val = IntModuleFlagOrNull("alloc-token-extended")) + Opts.Extended |= Val->isOne(); + + // Allow overriding options from command line options. + if (ClMaxTokens.getNumOccurrences()) Opts.MaxTokens = ClMaxTokens; - Opts.FastABI |= ClFastABI; - Opts.Extended |= ClExtended; + if (ClFastABI.getNumOccurrences()) + Opts.FastABI = ClFastABI; + if (ClExtended.getNumOccurrences()) + Opts.Extended = ClExtended; + return Opts; } @@ -246,21 +266,21 @@ class AllocToken { public: explicit AllocToken(AllocTokenOptions Opts, Module &M, ModuleAnalysisManager &MAM) - : Options(transformOptionsFromCl(std::move(Opts))), Mod(M), + : Options(resolveOptions(std::move(Opts), M)), Mod(M), FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), - Mode(IncrementMode(*IntPtrTy, *Options.MaxTokens)) { + Mode(IncrementMode(*IntPtrTy, Options.MaxTokens)) { switch (Options.Mode) { case TokenMode::Increment: break; case TokenMode::Random: - Mode.emplace<RandomMode>(*IntPtrTy, *Options.MaxTokens, + Mode.emplace<RandomMode>(*IntPtrTy, Options.MaxTokens, M.createRNG(DEBUG_TYPE)); break; case TokenMode::TypeHash: - Mode.emplace<TypeHashMode>(*IntPtrTy, *Options.MaxTokens); + Mode.emplace<TypeHashMode>(*IntPtrTy, Options.MaxTokens); break; case TokenMode::TypeHashPointerSplit: - Mode.emplace<TypeHashPointerSplitMode>(*IntPtrTy, *Options.MaxTokens); + Mode.emplace<TypeHashPointerSplitMode>(*IntPtrTy, Options.MaxTokens); break; } } @@ -317,8 +337,6 @@ bool AllocToken::instrumentFunction(Function &F) { if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; - auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); - auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); SmallVector<std::pair<CallBase *, LibFunc>, 4> AllocCalls; SmallVector<IntrinsicInst *, 4> IntrinsicInsts; @@ -327,6 +345,10 @@ bool AllocToken::instrumentFunction(Function &F) { F.hasFnAttribute(Attribute::SanitizeAllocToken) && !F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation); + // Get TLI only when required. + const TargetLibraryInfo *TLI = + InstrumentFunction ? &FAM.getResult<TargetLibraryAnalysis>(F) : nullptr; + // Collect all allocation calls to avoid iterator invalidation. for (Instruction &I : instructions(F)) { // Collect all alloc_token_* intrinsics. @@ -342,26 +364,28 @@ bool AllocToken::instrumentFunction(Function &F) { auto *CB = dyn_cast<CallBase>(&I); if (!CB) continue; - if (std::optional<LibFunc> Func = shouldInstrumentCall(*CB, TLI)) + if (std::optional<LibFunc> Func = shouldInstrumentCall(*CB, *TLI)) AllocCalls.emplace_back(CB, Func.value()); } + // Return early to avoid unnecessarily instantiating the ORE. + if (AllocCalls.empty() && IntrinsicInsts.empty()) + return false; + + auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); bool Modified = false; - if (!AllocCalls.empty()) { - for (auto &[CB, Func] : AllocCalls) - Modified |= replaceAllocationCall(CB, Func, ORE, TLI); - if (Modified) - NumFunctionsModified++; - } + for (auto &[CB, Func] : AllocCalls) + Modified |= replaceAllocationCall(CB, Func, ORE, *TLI); - if (!IntrinsicInsts.empty()) { - for (auto *II : IntrinsicInsts) - replaceIntrinsicInst(II, ORE); + for (auto *II : IntrinsicInsts) { + replaceIntrinsicInst(II, ORE); Modified = true; - NumFunctionsModified++; } + if (Modified) + NumFunctionsModified++; + return Modified; } diff --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp index 9239ae8..b5a8f79 100644 --- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -178,6 +178,8 @@ getRuntimeCallName(const BoundsCheckingPass::Options::Runtime &Opts) { Name += "_minimal"; if (!Opts.MayReturn) Name += "_abort"; + else if (Opts.HandlerPreserveAllRegs) + Name += "_preserve"; return Name; } @@ -267,7 +269,10 @@ static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI, TrapCall->setDoesNotReturn(); IRB.CreateUnreachable(); } - + // The preserve-all logic is somewhat duplicated in CGExpr.cpp for + // local-bounds. Make sure to change that too. + if (Opts.Rt && Opts.Rt->HandlerPreserveAllRegs && MayReturn) + TrapCall->setCallingConv(CallingConv::PreserveAll); if (!MayReturn && SingleTrapBB && !DebugTrapBB) ReuseTrapBB = TrapBB; diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index 0688bc7..726d94b 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -1992,6 +1992,8 @@ void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, // Use logical and to avoid propagating poison from later conditions. MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond); + setExplicitlyUnknownBranchWeightsIfProfiled( + *cast<Instruction>(MergedCondition), DEBUG_TYPE); } void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index cc53ec2..e984ac4 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -2191,8 +2191,16 @@ std::pair<Value *, Value *> DFSanFunction::loadShadowFast( // and then the entire shadow for the second origin pointer (which will be // chosen by combineOrigins() iff the least-significant half of the wide // shadow was empty but the other half was not). - Value *WideShadowLo = IRB.CreateShl( - WideShadow, ConstantInt::get(WideShadowTy, WideShadowBitWidth / 2)); + Value *WideShadowLo = + F->getParent()->getDataLayout().isLittleEndian() + ? IRB.CreateShl( + WideShadow, + ConstantInt::get(WideShadowTy, WideShadowBitWidth / 2)) + : IRB.CreateAnd( + WideShadow, + ConstantInt::get(WideShadowTy, + (1 - (1 << (WideShadowBitWidth / 2))) + << (WideShadowBitWidth / 2))); Shadows.push_back(WideShadow); Origins.push_back(DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr)); diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index b5548d4..8c8d16a6 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -1944,6 +1944,10 @@ void InstrLowerer::emitNameData() { NamesVar = new GlobalVariable(M, NamesVal->getType(), true, GlobalValue::PrivateLinkage, NamesVal, getInstrProfNamesVarName()); + if (isGPUProfTarget(M)) { + NamesVar->setLinkage(GlobalValue::ExternalLinkage); + NamesVar->setVisibility(GlobalValue::ProtectedVisibility); + } NamesSize = CompressedNameStr.size(); setGlobalVariableLargeSection(TT, *NamesVar); diff --git a/llvm/lib/Transforms/Instrumentation/KCFI.cpp b/llvm/lib/Transforms/Instrumentation/KCFI.cpp index f4cb4e2..f06b1d3 100644 --- a/llvm/lib/Transforms/Instrumentation/KCFI.cpp +++ b/llvm/lib/Transforms/Instrumentation/KCFI.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/Support/xxhash.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp index b72d41a..25953f4 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp @@ -63,6 +63,11 @@ static cl::opt<bool> cl::Hidden, cl::init(false)); static cl::opt<bool> + PrintFunctionGuids("memprof-print-function-guids", + cl::desc("Print function GUIDs computed for matching"), + cl::Hidden, cl::init(false)); + +static cl::opt<bool> SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden); @@ -454,6 +459,15 @@ handleAllocSite(Instruction &I, CallBase *CI, InlinedCallStack.size())] = { AllocInfo->Info.getTotalSize(), AllocType}; } + ORE.emit( + OptimizationRemark(DEBUG_TYPE, "MemProfUse", CI) + << ore::NV("AllocationCall", CI) << " in function " + << ore::NV("Caller", CI->getFunction()) + << " matched alloc context with alloc type " + << ore::NV("Attribute", getAllocTypeAttributeString(AllocType)) + << " total size " << ore::NV("Size", AllocInfo->Info.getTotalSize()) + << " full context id " << ore::NV("Context", FullStackId) + << " frame count " << ore::NV("Frames", InlinedCallStack.size())); } } // If the threshold for the percent of cold bytes is less than 100%, @@ -495,53 +509,59 @@ struct CallSiteEntry { ArrayRef<Frame> Frames; // Potential targets for indirect calls. ArrayRef<GlobalValue::GUID> CalleeGuids; - - // Only compare Frame contents. - // Use pointer-based equality instead of ArrayRef's operator== which does - // element-wise comparison. We want to check if it's the same slice of the - // underlying array, not just equivalent content. - bool operator==(const CallSiteEntry &Other) const { - return Frames.data() == Other.Frames.data() && - Frames.size() == Other.Frames.size(); - } }; -struct CallSiteEntryHash { - size_t operator()(const CallSiteEntry &Entry) const { - return computeFullStackId(Entry.Frames); - } -}; - -static void handleCallSite( - Instruction &I, const Function *CalledFunction, - ArrayRef<uint64_t> InlinedCallStack, - const std::unordered_set<CallSiteEntry, CallSiteEntryHash> &CallSiteEntries, - Module &M, std::set<std::vector<uint64_t>> &MatchedCallSites) { +static void handleCallSite(Instruction &I, const Function *CalledFunction, + ArrayRef<uint64_t> InlinedCallStack, + const std::vector<CallSiteEntry> &CallSiteEntries, + Module &M, + std::set<std::vector<uint64_t>> &MatchedCallSites, + OptimizationRemarkEmitter &ORE) { auto &Ctx = M.getContext(); + // Set of Callee GUIDs to attach to indirect calls. We accumulate all of them + // to support cases where the instuction's inlined frames match multiple call + // site entries, which can happen if the profile was collected from a binary + // where this instruction was eventually inlined into multiple callers. + SetVector<GlobalValue::GUID> CalleeGuids; + bool CallsiteMDAdded = false; for (const auto &CallSiteEntry : CallSiteEntries) { // If we found and thus matched all frames on the call, create and // attach call stack metadata. if (stackFrameIncludesInlinedCallStack(CallSiteEntry.Frames, InlinedCallStack)) { NumOfMemProfMatchedCallSites++; - addCallsiteMetadata(I, InlinedCallStack, Ctx); - - // Try to attach indirect call metadata if possible. - if (!CalledFunction) - addVPMetadata(M, I, CallSiteEntry.CalleeGuids); - // Only need to find one with a matching call stack and add a single // callsite metadata. - - // Accumulate call site matching information upon request. - if (ClPrintMemProfMatchInfo) { - std::vector<uint64_t> CallStack; - append_range(CallStack, InlinedCallStack); - MatchedCallSites.insert(std::move(CallStack)); + if (!CallsiteMDAdded) { + addCallsiteMetadata(I, InlinedCallStack, Ctx); + + // Accumulate call site matching information upon request. + if (ClPrintMemProfMatchInfo) { + std::vector<uint64_t> CallStack; + append_range(CallStack, InlinedCallStack); + MatchedCallSites.insert(std::move(CallStack)); + } + ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemProfUse", &I) + << ore::NV("CallSite", &I) << " in function " + << ore::NV("Caller", I.getFunction()) + << " matched callsite with frame count " + << ore::NV("Frames", InlinedCallStack.size())); + + // If this is a direct call, we're done. + if (CalledFunction) + break; + CallsiteMDAdded = true; } - break; + + assert(!CalledFunction && "Didn't expect direct call"); + + // Collect Callee GUIDs from all matching CallSiteEntries. + CalleeGuids.insert(CallSiteEntry.CalleeGuids.begin(), + CallSiteEntry.CalleeGuids.end()); } } + // Try to attach indirect call metadata if possible. + addVPMetadata(M, I, CalleeGuids.getArrayRef()); } static void readMemprof(Module &M, Function &F, @@ -562,6 +582,9 @@ static void readMemprof(Module &M, Function &F, // linkage function. auto FuncName = F.getName(); auto FuncGUID = Function::getGUIDAssumingExternalLinkage(FuncName); + if (PrintFunctionGuids) + errs() << "MemProf: Function GUID " << FuncGUID << " is " << FuncName + << "\n"; std::optional<memprof::MemProfRecord> MemProfRec; auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec); if (Err) { @@ -616,8 +639,7 @@ static void readMemprof(Module &M, Function &F, // For the callsites we need to record slices of the frame array (see comments // below where the map entries are added) along with their CalleeGuids. - std::map<uint64_t, std::unordered_set<CallSiteEntry, CallSiteEntryHash>> - LocHashToCallSites; + std::map<uint64_t, std::vector<CallSiteEntry>> LocHashToCallSites; for (auto &AI : MemProfRec->AllocSites) { NumOfMemProfAllocContextProfiles++; // Associate the allocation info with the leaf frame. The later matching @@ -636,7 +658,7 @@ static void readMemprof(Module &M, Function &F, uint64_t StackId = computeStackId(StackFrame); ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++); ArrayRef<GlobalValue::GUID> CalleeGuids(CS.CalleeGuids); - LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids}); + LocHashToCallSites[StackId].push_back({FrameSlice, CalleeGuids}); ProfileHasColumns |= StackFrame.Column; // Once we find this function, we can stop recording. @@ -719,7 +741,7 @@ static void readMemprof(Module &M, Function &F, // instruction's leaf location in the callsites map and not the // allocation map. handleCallSite(I, CalledFunction, InlinedCallStack, - CallSitesIter->second, M, MatchedCallSites); + CallSitesIter->second, M, MatchedCallSites, ORE); } } } diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 471c6ec..32ee16c 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2720,34 +2720,55 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // of elements. // // For example, suppose we have: - // VectorA: <a1, a2, a3, a4, a5, a6> - // VectorB: <b1, b2, b3, b4, b5, b6> - // ReductionFactor: 3. + // VectorA: <a0, a1, a2, a3, a4, a5> + // VectorB: <b0, b1, b2, b3, b4, b5> + // ReductionFactor: 3 + // Shards: 1 // The output would be: - // <a1|a2|a3, a4|a5|a6, b1|b2|b3, b4|b5|b6> + // <a0|a1|a2, a3|a4|a5, b0|b1|b2, b3|b4|b5> + // + // If we have: + // VectorA: <a0, a1, a2, a3, a4, a5, a6, a7> + // VectorB: <b0, b1, b2, b3, b4, b5, b6, b7> + // ReductionFactor: 2 + // Shards: 2 + // then a and be each have 2 "shards", resulting in the output being + // interleaved: + // <a0|a1, a2|a3, b0|b1, b2|b3, a4|a5, a6|a7, b4|b5, b6|b7> // // This is convenient for instrumenting horizontal add/sub. // For bitwise OR on "vertical" pairs, see maybeHandleSimpleNomemIntrinsic(). Value *horizontalReduce(IntrinsicInst &I, unsigned ReductionFactor, - Value *VectorA, Value *VectorB) { + unsigned Shards, Value *VectorA, Value *VectorB) { assert(isa<FixedVectorType>(VectorA->getType())); - unsigned TotalNumElems = + unsigned NumElems = cast<FixedVectorType>(VectorA->getType())->getNumElements(); + [[maybe_unused]] unsigned TotalNumElems = NumElems; if (VectorB) { assert(VectorA->getType() == VectorB->getType()); - TotalNumElems = TotalNumElems * 2; + TotalNumElems *= 2; } - assert(TotalNumElems % ReductionFactor == 0); + assert(NumElems % (ReductionFactor * Shards) == 0); Value *Or = nullptr; IRBuilder<> IRB(&I); for (unsigned i = 0; i < ReductionFactor; i++) { SmallVector<int, 16> Mask; - for (unsigned X = 0; X < TotalNumElems; X += ReductionFactor) - Mask.push_back(X + i); + + for (unsigned j = 0; j < Shards; j++) { + unsigned Offset = NumElems / Shards * j; + + for (unsigned X = 0; X < NumElems / Shards; X += ReductionFactor) + Mask.push_back(Offset + X + i); + + if (VectorB) { + for (unsigned X = 0; X < NumElems / Shards; X += ReductionFactor) + Mask.push_back(NumElems + Offset + X + i); + } + } Value *Masked; if (VectorB) @@ -2769,7 +2790,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// /// e.g., <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16>) /// <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>) - void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I) { + void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I, unsigned Shards) { assert(I.arg_size() == 1 || I.arg_size() == 2); assert(I.getType()->isVectorTy()); @@ -2792,8 +2813,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { if (I.arg_size() == 2) SecondArgShadow = getShadow(&I, 1); - Value *OrShadow = horizontalReduce(I, /*ReductionFactor=*/2, FirstArgShadow, - SecondArgShadow); + Value *OrShadow = horizontalReduce(I, /*ReductionFactor=*/2, Shards, + FirstArgShadow, SecondArgShadow); OrShadow = CreateShadowCast(IRB, OrShadow, getShadowTy(&I)); @@ -2808,7 +2829,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// conceptually operates on /// (<4 x i16> [[VAR1]], <4 x i16> [[VAR2]]) /// and can be handled with ReinterpretElemWidth == 16. - void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I, + void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I, unsigned Shards, int ReinterpretElemWidth) { assert(I.arg_size() == 1 || I.arg_size() == 2); @@ -2852,8 +2873,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { SecondArgShadow = IRB.CreateBitCast(SecondArgShadow, ReinterpretShadowTy); } - Value *OrShadow = horizontalReduce(I, /*ReductionFactor=*/2, FirstArgShadow, - SecondArgShadow); + Value *OrShadow = horizontalReduce(I, /*ReductionFactor=*/2, Shards, + FirstArgShadow, SecondArgShadow); OrShadow = CreateShadowCast(IRB, OrShadow, getShadowTy(&I)); @@ -3903,7 +3924,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // adding/"accumulating" %s. "Accumulation" stores the result in one // of the source registers, but this accumulate vs. add distinction // is lost when dealing with LLVM intrinsics.) + // + // ZeroPurifies means that multiplying a known-zero with an uninitialized + // value results in an initialized value. This is applicable for integer + // multiplication, but not floating-point (counter-example: NaN). void handleVectorPmaddIntrinsic(IntrinsicInst &I, unsigned ReductionFactor, + bool ZeroPurifies, unsigned EltSizeInBits = 0) { IRBuilder<> IRB(&I); @@ -3945,7 +3971,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { assert(AccumulatorType == ReturnType); } - FixedVectorType *ImplicitReturnType = ReturnType; + FixedVectorType *ImplicitReturnType = + cast<FixedVectorType>(getShadowTy(ReturnType)); // Step 1: instrument multiplication of corresponding vector elements if (EltSizeInBits) { ImplicitReturnType = cast<FixedVectorType>( @@ -3964,30 +3991,40 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { ReturnType->getNumElements() * ReductionFactor); } - // Multiplying an *initialized* zero by an uninitialized element results in - // an initialized zero element. - // - // This is analogous to bitwise AND, where "AND" of 0 and a poisoned value - // results in an unpoisoned value. We can therefore adapt the visitAnd() - // instrumentation: - // OutShadow = (SaNonZero & SbNonZero) - // | (VaNonZero & SbNonZero) - // | (SaNonZero & VbNonZero) - // where non-zero is checked on a per-element basis (not per bit). - Value *SZero = Constant::getNullValue(Va->getType()); - Value *VZero = Constant::getNullValue(Sa->getType()); - Value *SaNonZero = IRB.CreateICmpNE(Sa, SZero); - Value *SbNonZero = IRB.CreateICmpNE(Sb, SZero); - Value *VaNonZero = IRB.CreateICmpNE(Va, VZero); - Value *VbNonZero = IRB.CreateICmpNE(Vb, VZero); - - Value *SaAndSbNonZero = IRB.CreateAnd(SaNonZero, SbNonZero); - Value *VaAndSbNonZero = IRB.CreateAnd(VaNonZero, SbNonZero); - Value *SaAndVbNonZero = IRB.CreateAnd(SaNonZero, VbNonZero); - // Each element of the vector is represented by a single bit (poisoned or // not) e.g., <8 x i1>. - Value *And = IRB.CreateOr({SaAndSbNonZero, VaAndSbNonZero, SaAndVbNonZero}); + Value *SaNonZero = IRB.CreateIsNotNull(Sa); + Value *SbNonZero = IRB.CreateIsNotNull(Sb); + Value *And; + if (ZeroPurifies) { + // Multiplying an *initialized* zero by an uninitialized element results + // in an initialized zero element. + // + // This is analogous to bitwise AND, where "AND" of 0 and a poisoned value + // results in an unpoisoned value. We can therefore adapt the visitAnd() + // instrumentation: + // OutShadow = (SaNonZero & SbNonZero) + // | (VaNonZero & SbNonZero) + // | (SaNonZero & VbNonZero) + // where non-zero is checked on a per-element basis (not per bit). + Value *VaInt = Va; + Value *VbInt = Vb; + if (!Va->getType()->isIntegerTy()) { + VaInt = CreateAppToShadowCast(IRB, Va); + VbInt = CreateAppToShadowCast(IRB, Vb); + } + + Value *VaNonZero = IRB.CreateIsNotNull(VaInt); + Value *VbNonZero = IRB.CreateIsNotNull(VbInt); + + Value *SaAndSbNonZero = IRB.CreateAnd(SaNonZero, SbNonZero); + Value *VaAndSbNonZero = IRB.CreateAnd(VaNonZero, SbNonZero); + Value *SaAndVbNonZero = IRB.CreateAnd(SaNonZero, VbNonZero); + + And = IRB.CreateOr({SaAndSbNonZero, VaAndSbNonZero, SaAndVbNonZero}); + } else { + And = IRB.CreateOr({SaNonZero, SbNonZero}); + } // Extend <8 x i1> to <8 x i16>. // (The real pmadd intrinsic would have computed intermediate values of @@ -5752,17 +5789,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { case Intrinsic::x86_ssse3_pmadd_ub_sw_128: case Intrinsic::x86_avx2_pmadd_ub_sw: case Intrinsic::x86_avx512_pmaddubs_w_512: - handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2); + handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, + /*ZeroPurifies=*/true); break; // <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) case Intrinsic::x86_ssse3_pmadd_ub_sw: - handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/8); + handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, + /*ZeroPurifies=*/true, /*EltSizeInBits=*/8); break; // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>) case Intrinsic::x86_mmx_pmadd_wd: - handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/16); + handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, + /*ZeroPurifies=*/true, /*EltSizeInBits=*/16); break; // AVX Vector Neural Network Instructions: bytes @@ -5848,71 +5888,144 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { case Intrinsic::x86_avx2_vpdpbuuds_128: case Intrinsic::x86_avx2_vpdpbuuds_256: case Intrinsic::x86_avx10_vpdpbuuds_512: - handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/4, /*EltSize=*/8); + handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/4, + /*ZeroPurifies=*/true, /*EltSizeInBits=*/8); break; // AVX Vector Neural Network Instructions: words // // Multiply and Add Signed Word Integers // < 4 x i32> @llvm.x86.avx512.vpdpwssd.128 - // (< 4 x i32>, < 4 x i32>, < 4 x i32>) + // (< 4 x i32>, < 8 x i16>, < 8 x i16>) // < 8 x i32> @llvm.x86.avx512.vpdpwssd.256 - // (< 8 x i32>, < 8 x i32>, < 8 x i32>) + // (< 8 x i32>, <16 x i16>, <16 x i16>) // <16 x i32> @llvm.x86.avx512.vpdpwssd.512 - // (<16 x i32>, <16 x i32>, <16 x i32>) + // (<16 x i32>, <32 x i16>, <32 x i16>) // // Multiply and Add Signed Word Integers With Saturation // < 4 x i32> @llvm.x86.avx512.vpdpwssds.128 - // (< 4 x i32>, < 4 x i32>, < 4 x i32>) + // (< 4 x i32>, < 8 x i16>, < 8 x i16>) // < 8 x i32> @llvm.x86.avx512.vpdpwssds.256 - // (< 8 x i32>, < 8 x i32>, < 8 x i32>) + // (< 8 x i32>, <16 x i16>, <16 x i16>) // <16 x i32> @llvm.x86.avx512.vpdpwssds.512 - // (<16 x i32>, <16 x i32>, <16 x i32>) + // (<16 x i32>, <32 x i16>, <32 x i16>) + // + // Multiply and Add Signed and Unsigned Word Integers + // < 4 x i32> @llvm.x86.avx2.vpdpwsud.128 + // (< 4 x i32>, < 8 x i16>, < 8 x i16>) + // < 8 x i32> @llvm.x86.avx2.vpdpwsud.256 + // (< 8 x i32>, <16 x i16>, <16 x i16>) + // <16 x i32> @llvm.x86.avx10.vpdpwsud.512 + // (<16 x i32>, <32 x i16>, <32 x i16>) + // + // Multiply and Add Signed and Unsigned Word Integers With Saturation + // < 4 x i32> @llvm.x86.avx2.vpdpwsuds.128 + // (< 4 x i32>, < 8 x i16>, < 8 x i16>) + // < 8 x i32> @llvm.x86.avx2.vpdpwsuds.256 + // (< 8 x i32>, <16 x i16>, <16 x i16>) + // <16 x i32> @llvm.x86.avx10.vpdpwsuds.512 + // (<16 x i32>, <32 x i16>, <32 x i16>) + // + // Multiply and Add Unsigned and Signed Word Integers + // < 4 x i32> @llvm.x86.avx2.vpdpwusd.128 + // (< 4 x i32>, < 8 x i16>, < 8 x i16>) + // < 8 x i32> @llvm.x86.avx2.vpdpwusd.256 + // (< 8 x i32>, <16 x i16>, <16 x i16>) + // <16 x i32> @llvm.x86.avx10.vpdpwusd.512 + // (<16 x i32>, <32 x i16>, <32 x i16>) + // + // Multiply and Add Unsigned and Signed Word Integers With Saturation + // < 4 x i32> @llvm.x86.avx2.vpdpwusds.128 + // (< 4 x i32>, < 8 x i16>, < 8 x i16>) + // < 8 x i32> @llvm.x86.avx2.vpdpwusds.256 + // (< 8 x i32>, <16 x i16>, <16 x i16>) + // <16 x i32> @llvm.x86.avx10.vpdpwusds.512 + // (<16 x i32>, <32 x i16>, <32 x i16>) + // + // Multiply and Add Unsigned and Unsigned Word Integers + // < 4 x i32> @llvm.x86.avx2.vpdpwuud.128 + // (< 4 x i32>, < 8 x i16>, < 8 x i16>) + // < 8 x i32> @llvm.x86.avx2.vpdpwuud.256 + // (< 8 x i32>, <16 x i16>, <16 x i16>) + // <16 x i32> @llvm.x86.avx10.vpdpwuud.512 + // (<16 x i32>, <32 x i16>, <32 x i16>) + // + // Multiply and Add Unsigned and Unsigned Word Integers With Saturation + // < 4 x i32> @llvm.x86.avx2.vpdpwuuds.128 + // (< 4 x i32>, < 8 x i16>, < 8 x i16>) + // < 8 x i32> @llvm.x86.avx2.vpdpwuuds.256 + // (< 8 x i32>, <16 x i16>, <16 x i16>) + // <16 x i32> @llvm.x86.avx10.vpdpwuuds.512 + // (<16 x i32>, <32 x i16>, <32 x i16>) // // These intrinsics are auto-upgraded into non-masked forms: // <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128 - // (<4 x i32>, <4 x i32>, <4 x i32>, i8) + // (<4 x i32>, <8 x i16>, <8 x i16>, i8) // <4 x i32> @llvm.x86.avx512.maskz.vpdpwssd.128 - // (<4 x i32>, <4 x i32>, <4 x i32>, i8) + // (<4 x i32>, <8 x i16>, <8 x i16>, i8) // <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256 - // (<8 x i32>, <8 x i32>, <8 x i32>, i8) + // (<8 x i32>, <16 x i16>, <16 x i16>, i8) // <8 x i32> @llvm.x86.avx512.maskz.vpdpwssd.256 - // (<8 x i32>, <8 x i32>, <8 x i32>, i8) + // (<8 x i32>, <16 x i16>, <16 x i16>, i8) // <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512 - // (<16 x i32>, <16 x i32>, <16 x i32>, i16) + // (<16 x i32>, <32 x i16>, <32 x i16>, i16) // <16 x i32> @llvm.x86.avx512.maskz.vpdpwssd.512 - // (<16 x i32>, <16 x i32>, <16 x i32>, i16) + // (<16 x i32>, <32 x i16>, <32 x i16>, i16) // // <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128 - // (<4 x i32>, <4 x i32>, <4 x i32>, i8) + // (<4 x i32>, <8 x i16>, <8 x i16>, i8) // <4 x i32> @llvm.x86.avx512.maskz.vpdpwssds.128 - // (<4 x i32>, <4 x i32>, <4 x i32>, i8) + // (<4 x i32>, <8 x i16>, <8 x i16>, i8) // <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256 - // (<8 x i32>, <8 x i32>, <8 x i32>, i8) + // (<8 x i32>, <16 x i16>, <16 x i16>, i8) // <8 x i32> @llvm.x86.avx512.maskz.vpdpwssds.256 - // (<8 x i32>, <8 x i32>, <8 x i32>, i8) + // (<8 x i32>, <16 x i16>, <16 x i16>, i8) // <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512 - // (<16 x i32>, <16 x i32>, <16 x i32>, i16) + // (<16 x i32>, <32 x i16>, <32 x i16>, i16) // <16 x i32> @llvm.x86.avx512.maskz.vpdpwssds.512 - // (<16 x i32>, <16 x i32>, <16 x i32>, i16) + // (<16 x i32>, <32 x i16>, <32 x i16>, i16) case Intrinsic::x86_avx512_vpdpwssd_128: case Intrinsic::x86_avx512_vpdpwssd_256: case Intrinsic::x86_avx512_vpdpwssd_512: case Intrinsic::x86_avx512_vpdpwssds_128: case Intrinsic::x86_avx512_vpdpwssds_256: case Intrinsic::x86_avx512_vpdpwssds_512: - handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/16); + case Intrinsic::x86_avx2_vpdpwsud_128: + case Intrinsic::x86_avx2_vpdpwsud_256: + case Intrinsic::x86_avx10_vpdpwsud_512: + case Intrinsic::x86_avx2_vpdpwsuds_128: + case Intrinsic::x86_avx2_vpdpwsuds_256: + case Intrinsic::x86_avx10_vpdpwsuds_512: + case Intrinsic::x86_avx2_vpdpwusd_128: + case Intrinsic::x86_avx2_vpdpwusd_256: + case Intrinsic::x86_avx10_vpdpwusd_512: + case Intrinsic::x86_avx2_vpdpwusds_128: + case Intrinsic::x86_avx2_vpdpwusds_256: + case Intrinsic::x86_avx10_vpdpwusds_512: + case Intrinsic::x86_avx2_vpdpwuud_128: + case Intrinsic::x86_avx2_vpdpwuud_256: + case Intrinsic::x86_avx10_vpdpwuud_512: + case Intrinsic::x86_avx2_vpdpwuuds_128: + case Intrinsic::x86_avx2_vpdpwuuds_256: + case Intrinsic::x86_avx10_vpdpwuuds_512: + handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, + /*ZeroPurifies=*/true, /*EltSizeInBits=*/16); break; - // TODO: Dot Product of BF16 Pairs Accumulated Into Packed Single - // Precision - // <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128 - // (<4 x float>, <8 x bfloat>, <8 x bfloat>) - // <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256 - // (<8 x float>, <16 x bfloat>, <16 x bfloat>) - // <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512 - // (<16 x float>, <32 x bfloat>, <32 x bfloat>) - // handleVectorPmaddIntrinsic() currently only handles integer types. + // Dot Product of BF16 Pairs Accumulated Into Packed Single + // Precision + // <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128 + // (<4 x float>, <8 x bfloat>, <8 x bfloat>) + // <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256 + // (<8 x float>, <16 x bfloat>, <16 x bfloat>) + // <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512 + // (<16 x float>, <32 x bfloat>, <32 x bfloat>) + case Intrinsic::x86_avx512bf16_dpbf16ps_128: + case Intrinsic::x86_avx512bf16_dpbf16ps_256: + case Intrinsic::x86_avx512bf16_dpbf16ps_512: + handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, + /*ZeroPurifies=*/false); + break; case Intrinsic::x86_sse_cmp_ss: case Intrinsic::x86_sse2_cmp_sd: @@ -6010,48 +6123,62 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // Packed Horizontal Add/Subtract case Intrinsic::x86_ssse3_phadd_w: case Intrinsic::x86_ssse3_phadd_w_128: - case Intrinsic::x86_avx2_phadd_w: case Intrinsic::x86_ssse3_phsub_w: case Intrinsic::x86_ssse3_phsub_w_128: - case Intrinsic::x86_avx2_phsub_w: { - handlePairwiseShadowOrIntrinsic(I, /*ReinterpretElemWidth=*/16); + handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1, + /*ReinterpretElemWidth=*/16); + break; + + case Intrinsic::x86_avx2_phadd_w: + case Intrinsic::x86_avx2_phsub_w: + handlePairwiseShadowOrIntrinsic(I, /*Shards=*/2, + /*ReinterpretElemWidth=*/16); break; - } // Packed Horizontal Add/Subtract case Intrinsic::x86_ssse3_phadd_d: case Intrinsic::x86_ssse3_phadd_d_128: - case Intrinsic::x86_avx2_phadd_d: case Intrinsic::x86_ssse3_phsub_d: case Intrinsic::x86_ssse3_phsub_d_128: - case Intrinsic::x86_avx2_phsub_d: { - handlePairwiseShadowOrIntrinsic(I, /*ReinterpretElemWidth=*/32); + handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1, + /*ReinterpretElemWidth=*/32); + break; + + case Intrinsic::x86_avx2_phadd_d: + case Intrinsic::x86_avx2_phsub_d: + handlePairwiseShadowOrIntrinsic(I, /*Shards=*/2, + /*ReinterpretElemWidth=*/32); break; - } // Packed Horizontal Add/Subtract and Saturate case Intrinsic::x86_ssse3_phadd_sw: case Intrinsic::x86_ssse3_phadd_sw_128: - case Intrinsic::x86_avx2_phadd_sw: case Intrinsic::x86_ssse3_phsub_sw: case Intrinsic::x86_ssse3_phsub_sw_128: - case Intrinsic::x86_avx2_phsub_sw: { - handlePairwiseShadowOrIntrinsic(I, /*ReinterpretElemWidth=*/16); + handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1, + /*ReinterpretElemWidth=*/16); + break; + + case Intrinsic::x86_avx2_phadd_sw: + case Intrinsic::x86_avx2_phsub_sw: + handlePairwiseShadowOrIntrinsic(I, /*Shards=*/2, + /*ReinterpretElemWidth=*/16); break; - } // Packed Single/Double Precision Floating-Point Horizontal Add case Intrinsic::x86_sse3_hadd_ps: case Intrinsic::x86_sse3_hadd_pd: - case Intrinsic::x86_avx_hadd_pd_256: - case Intrinsic::x86_avx_hadd_ps_256: case Intrinsic::x86_sse3_hsub_ps: case Intrinsic::x86_sse3_hsub_pd: + handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1); + break; + + case Intrinsic::x86_avx_hadd_pd_256: + case Intrinsic::x86_avx_hadd_ps_256: case Intrinsic::x86_avx_hsub_pd_256: - case Intrinsic::x86_avx_hsub_ps_256: { - handlePairwiseShadowOrIntrinsic(I); + case Intrinsic::x86_avx_hsub_ps_256: + handlePairwiseShadowOrIntrinsic(I, /*Shards=*/2); break; - } case Intrinsic::x86_avx_maskstore_ps: case Intrinsic::x86_avx_maskstore_pd: @@ -6434,7 +6561,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // Add Long Pairwise case Intrinsic::aarch64_neon_saddlp: case Intrinsic::aarch64_neon_uaddlp: { - handlePairwiseShadowOrIntrinsic(I); + handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1); break; } diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp index 80e77e09..66d570b 100644 --- a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp @@ -161,7 +161,7 @@ template <char NsanTypeId> class ShadowTypeConfigImpl : public ShadowTypeConfig { public: char getNsanTypeId() const override { return NsanTypeId; } - static constexpr const char kNsanTypeId = NsanTypeId; + static constexpr char kNsanTypeId = NsanTypeId; }; // `double` (`d`) shadow type. @@ -811,7 +811,7 @@ static bool shouldCheckArgs(CallBase &CI, const TargetLibraryInfo &TLI, return false; const auto ID = Fn->getIntrinsicID(); - LibFunc LFunc = LibFunc::NumLibFuncs; + LibFunc LFunc = LibFunc::NotLibFunc; // Always check args of unknown functions. if (ID == Intrinsic::ID() && !TLI.getLibFunc(*Fn, LFunc)) return true; diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index af53fa0..02f06be 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -734,7 +734,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); // Reserve bit 60-63 for other information purpose. - FunctionHash &= 0x0FFFFFFFFFFFFFFF; + FunctionHash &= NamedInstrProfRecord::FUNC_HASH_MASK; if (IsCS) NamedInstrProfRecord::setCSFlagInHash(FunctionHash); LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" diff --git a/llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp index 5ef6ffb..667fdb7 100644 --- a/llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp @@ -90,6 +90,9 @@ PreservedAnalyses RealtimeSanitizerPass::run(Module &M, [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); }); for (Function &F : M) { + if (F.empty()) + continue; + if (F.hasFnAttribute(Attribute::SanitizeRealtime)) runSanitizeRealtime(F); diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 09abf6a..d72d216 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -1226,7 +1226,7 @@ void ModuleSanitizerCoverage::createFunctionControlFlow(Function &F) { if (CB->isIndirectCall()) { // TODO(navidem): handle indirect calls, for now mark its existence. CFs.push_back((Constant *)IRB.CreateIntToPtr( - ConstantInt::get(IntptrTy, -1), PtrTy)); + ConstantInt::getAllOnesValue(IntptrTy), PtrTy)); } else { auto CalledF = CB->getCalledFunction(); if (CalledF && !CalledF->isIntrinsic()) diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp index 87eba5f..1c91d83 100644 --- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp @@ -62,7 +62,7 @@ static cl::opt<bool> ClOutlineInstrumentation( "tysan-outline-instrumentation", cl::desc("Uses function calls for all TySan instrumentation, reducing " "ELF size"), - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); static cl::opt<bool> ClVerifyOutlinedInstrumentation( "tysan-verify-outlined-instrumentation", |
