diff options
Diffstat (limited to 'llvm')
29 files changed, 2117 insertions, 584 deletions
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 8e86393..22b58bf 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1489,6 +1489,8 @@ Currently, only the following parameter attributes are defined: function, returning a pointer to allocated storage disjoint from the storage for any other object accessible to the caller. +.. _captures_attr: + ``captures(...)`` This attribute restricts the ways in which the callee may capture the pointer. This is not a valid attribute for return values. This attribute @@ -7543,6 +7545,33 @@ The number of bytes known to be dereferenceable is specified by the integer value in the metadata node. This is analogous to the ''dereferenceable_or_null'' attribute on parameters and return values. +'``captures``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^ + +The ``!captures`` metadata can only be applied to ``store`` instructions with +a pointer-typed value operand. It restricts the capturing behavior of the store +value operand in the same way the ``captures(...)`` attribute would do on a +call. See the :ref:`pointer capture section <pointercapture>` for a detailed +discussion of capture semantics. + +The ``!captures`` metadata accepts a non-empty list of strings from the same +set as the :ref:`captures attribute <captures_attr>`: +``!"address"``, ``!"address_is_null"``, ``!"provenance"`` and +``!"read_provenance"``. ``!"none"`` is not supported. + +For example ``store ptr %x, ptr %y, !captures !{!"address"}`` indicates that +the copy of pointer ``%x`` stored to location ``%y`` will only be used to +inspect its integral address value, and not dereferenced. Dereferencing the +pointer would result in undefined behavior. + +Similarly ``store ptr %x, ptr %y, !captures !{!"address", !"read_provenance"}`` +indicates that while reads through the stored pointer are allowed, writes would +result in undefined behavior. + +The ``!captures`` attribute makes no statement about other uses of ``%x``, or +uses of the stored-to memory location after it has been overwritten with a +different value. + .. _llvm.loop: '``llvm.loop``' diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index 77805f5..efae6f3 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -502,25 +502,22 @@ protected: /// Copy the range [I, E) onto the uninitialized memory /// starting with "Dest", constructing elements into it as needed. - template<typename It1, typename It2> + template <typename It1, typename It2> static void uninitialized_copy(It1 I, It1 E, It2 Dest) { - // Arbitrary iterator types; just use the basic implementation. - std::uninitialized_copy(I, E, Dest); - } - - /// Copy the range [I, E) onto the uninitialized memory - /// starting with "Dest", constructing elements into it as needed. - template <typename T1, typename T2> - static void uninitialized_copy( - T1 *I, T1 *E, T2 *Dest, - std::enable_if_t<std::is_same<std::remove_const_t<T1>, T2>::value> * = - nullptr) { - // Use memcpy for PODs iterated by pointers (which includes SmallVector - // iterators): std::uninitialized_copy optimizes to memmove, but we can - // use memcpy here. Note that I and E are iterators and thus might be - // invalid for memcpy if they are equal. - if (I != E) - std::memcpy(reinterpret_cast<void *>(Dest), I, (E - I) * sizeof(T)); + if constexpr (std::is_pointer_v<It1> && std::is_pointer_v<It2> && + std::is_same_v< + std::remove_const_t<std::remove_pointer_t<It1>>, + std::remove_pointer_t<It2>>) { + // Use memcpy for PODs iterated by pointers (which includes SmallVector + // iterators): std::uninitialized_copy optimizes to memmove, but we can + // use memcpy here. Note that I and E are iterators and thus might be + // invalid for memcpy if they are equal. + if (I != E) + std::memcpy(reinterpret_cast<void *>(Dest), I, (E - I) * sizeof(T)); + } else { + // Arbitrary iterator types; just use the basic implementation. + std::uninitialized_copy(I, E, Dest); + } } /// Double the size of the allocated memory, guaranteeing space for at diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index d09cc15..0603abc 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -55,3 +55,4 @@ LLVM_FIXED_MD_KIND(MD_mmra, "mmra", 40) LLVM_FIXED_MD_KIND(MD_noalias_addrspace, "noalias.addrspace", 41) LLVM_FIXED_MD_KIND(MD_callee_type, "callee_type", 42) LLVM_FIXED_MD_KIND(MD_nofree, "nofree", 43) +LLVM_FIXED_MD_KIND(MD_captures, "captures", 44) diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 990bdc6..85a7f8f 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -41,6 +41,7 @@ namespace llvm { +enum class CaptureComponents : uint8_t; class Module; class ModuleSlotTracker; class raw_ostream; @@ -1480,6 +1481,13 @@ public: LLVM_ABI static MDNode *getMergedCallsiteMetadata(MDNode *A, MDNode *B); LLVM_ABI static MDNode *getMergedCalleeTypeMetadata(const MDNode *A, const MDNode *B); + + /// Convert !captures metadata to CaptureComponents. MD may be nullptr. + LLVM_ABI static CaptureComponents toCaptureComponents(const MDNode *MD); + /// Convert CaptureComponents to !captures metadata. The return value may be + /// nullptr. + LLVM_ABI static MDNode *fromCaptureComponents(LLVMContext &Ctx, + CaptureComponents CC); }; /// Tuple of metadata. diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp index a0fe7f9..22229d9 100644 --- a/llvm/lib/Analysis/CaptureTracking.cpp +++ b/llvm/lib/Analysis/CaptureTracking.cpp @@ -320,8 +320,12 @@ UseCaptureInfo llvm::DetermineUseCaptureKind(const Use &U, const Value *Base) { return CaptureComponents::None; case Instruction::Store: // Stored the pointer - conservatively assume it may be captured. + if (U.getOperandNo() == 0) + return MDNode::toCaptureComponents( + I->getMetadata(LLVMContext::MD_captures)); + // Volatile stores make the address observable. - if (U.getOperandNo() == 0 || cast<StoreInst>(I)->isVolatile()) + if (cast<StoreInst>(I)->isVolatile()) return CaptureComponents::All; return CaptureComponents::None; case Instruction::AtomicRMW: { diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 6fb2807..0e5bc48 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -1632,19 +1632,25 @@ LazyValueInfoImpl::getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, *getValueFromCondition(Usr->getOperand(0), Condition, isTrueDest, /*UseBlockValue*/ false); - if (!OpLatticeVal.isConstantRange()) - return OpLatticeVal; + if (OpLatticeVal.isConstantRange()) { + const unsigned ResultBitWidth = + Usr->getType()->getScalarSizeInBits(); + if (auto *Trunc = dyn_cast<TruncInst>(Usr)) + return ValueLatticeElement::getRange( + OpLatticeVal.getConstantRange().truncate( + ResultBitWidth, Trunc->getNoWrapKind())); - const unsigned ResultBitWidth = - Usr->getType()->getScalarSizeInBits(); - if (auto *Trunc = dyn_cast<TruncInst>(Usr)) return ValueLatticeElement::getRange( - OpLatticeVal.getConstantRange().truncate( - ResultBitWidth, Trunc->getNoWrapKind())); - - return ValueLatticeElement::getRange( - OpLatticeVal.getConstantRange().castOp( - cast<CastInst>(Usr)->getOpcode(), ResultBitWidth)); + OpLatticeVal.getConstantRange().castOp( + cast<CastInst>(Usr)->getOpcode(), ResultBitWidth)); + } + if (OpLatticeVal.isConstant()) { + Constant *C = OpLatticeVal.getConstant(); + if (auto *CastC = ConstantFoldCastOperand( + cast<CastInst>(Usr)->getOpcode(), C, Usr->getType(), DL)) + return ValueLatticeElement::get(CastC); + } + return ValueLatticeElement::getOverdefined(); } else { // If one of Val's operand has an inferred value, we may be able to // infer the value of Val. diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp index 9cfb0ff..1add0c7 100644 --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -48,6 +48,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/ModRef.h" #include <cassert> #include <cstddef> #include <cstdint> @@ -1435,6 +1436,40 @@ MDNode *MDNode::getMostGenericAlignmentOrDereferenceable(MDNode *A, MDNode *B) { return B; } +CaptureComponents MDNode::toCaptureComponents(const MDNode *MD) { + if (!MD) + return CaptureComponents::All; + + CaptureComponents CC = CaptureComponents::None; + for (Metadata *Op : MD->operands()) { + CaptureComponents Component = + StringSwitch<CaptureComponents>(cast<MDString>(Op)->getString()) + .Case("address", CaptureComponents::Address) + .Case("address_is_null", CaptureComponents::AddressIsNull) + .Case("provenance", CaptureComponents::Provenance) + .Case("read_provenance", CaptureComponents::ReadProvenance); + CC |= Component; + } + return CC; +} + +MDNode *MDNode::fromCaptureComponents(LLVMContext &Ctx, CaptureComponents CC) { + assert(!capturesNothing(CC) && "Can't encode captures(none)"); + if (capturesAll(CC)) + return nullptr; + + SmallVector<Metadata *> Components; + if (capturesAddressIsNullOnly(CC)) + Components.push_back(MDString::get(Ctx, "address_is_null")); + else if (capturesAddress(CC)) + Components.push_back(MDString::get(Ctx, "address")); + if (capturesReadProvenanceOnly(CC)) + Components.push_back(MDString::get(Ctx, "read_provenance")); + else if (capturesFullProvenance(CC)) + Components.push_back(MDString::get(Ctx, "provenance")); + return MDNode::get(Ctx, Components); +} + //===----------------------------------------------------------------------===// // NamedMDNode implementation. // diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 8c03d6f..6b3cd27 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -542,6 +542,7 @@ private: void visitAliasScopeMetadata(const MDNode *MD); void visitAliasScopeListMetadata(const MDNode *MD); void visitAccessGroupMetadata(const MDNode *MD); + void visitCapturesMetadata(Instruction &I, const MDNode *Captures); template <class Ty> bool isValidMetadataArray(const MDTuple &N); #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N); @@ -5373,6 +5374,27 @@ void Verifier::visitAccessGroupMetadata(const MDNode *MD) { } } +void Verifier::visitCapturesMetadata(Instruction &I, const MDNode *Captures) { + static const char *ValidArgs[] = {"address_is_null", "address", + "read_provenance", "provenance"}; + + auto *SI = dyn_cast<StoreInst>(&I); + Check(SI, "!captures metadata can only be applied to store instructions", &I); + Check(SI->getValueOperand()->getType()->isPointerTy(), + "!captures metadata can only be applied to store with value operand of " + "pointer type", + &I); + Check(Captures->getNumOperands() != 0, "!captures metadata cannot be empty", + &I); + + for (Metadata *Op : Captures->operands()) { + auto *Str = dyn_cast<MDString>(Op); + Check(Str, "!captures metadata must be a list of strings", &I); + Check(is_contained(ValidArgs, Str->getString()), + "invalid entry in !captures metadata", &I, Str); + } +} + /// verifyInstruction - Verify that an instruction is well formed. /// void Verifier::visitInstruction(Instruction &I) { @@ -5600,6 +5622,9 @@ void Verifier::visitInstruction(Instruction &I) { if (MDNode *Annotation = I.getMetadata(LLVMContext::MD_annotation)) visitAnnotationMetadata(Annotation); + if (MDNode *Captures = I.getMetadata(LLVMContext::MD_captures)) + visitCapturesMetadata(I, Captures); + if (MDNode *N = I.getDebugLoc().getAsMDNode()) { CheckDI(isa<DILocation>(N), "invalid !dbg metadata attachment", &I, N); visitMDNode(*N, AreDebugLocsAllowed::Yes); diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index f291191..3f9a1f4 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -495,13 +495,6 @@ public: bool isVMEMOrFlatVMEM(const MachineInstr &MI) const; bool run(MachineFunction &MF); - bool isForceEmitWaitcnt() const { - for (auto T : inst_counter_types()) - if (ForceEmitWaitcnt[T]) - return true; - return false; - } - void setForceEmitWaitcnt() { // For non-debug builds, ForceEmitWaitcnt has been initialized to false; // For debug builds, get the debug counter info and adjust if need be @@ -570,10 +563,6 @@ public: return VmemReadMapping[getVmemType(Inst)]; } - bool hasXcnt() const { return ST->hasWaitXCnt(); } - - bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const; - bool mayAccessLDSThroughFlat(const MachineInstr &MI) const; bool isVmemAccess(const MachineInstr &MI) const; bool generateWaitcntInstBefore(MachineInstr &MI, WaitcntBrackets &ScoreBrackets, @@ -591,7 +580,6 @@ public: WaitcntBrackets &ScoreBrackets); bool insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block, WaitcntBrackets &ScoreBrackets); - static bool asynchronouslyWritesSCC(unsigned Opcode); }; // This objects maintains the current score brackets of each wait counter, and @@ -1109,7 +1097,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, setRegScore(FIRST_LDS_VGPR, T, CurrScore); } - if (Context->asynchronouslyWritesSCC(Inst.getOpcode())) { + if (SIInstrInfo::isSBarrierSCCWrite(Inst.getOpcode())) { setRegScore(SCC, T, CurrScore); PendingSCCWrite = &Inst; } @@ -1831,12 +1819,6 @@ bool WaitcntGeneratorGFX12Plus::createNewWaitcnt( return Modified; } -static bool readsVCCZ(const MachineInstr &MI) { - unsigned Opc = MI.getOpcode(); - return (Opc == AMDGPU::S_CBRANCH_VCCNZ || Opc == AMDGPU::S_CBRANCH_VCCZ) && - !MI.getOperand(1).isUndef(); -} - /// \returns true if the callee inserts an s_waitcnt 0 on function entry. static bool callWaitsOnFunctionEntry(const MachineInstr &MI) { // Currently all conventions wait, but this may not always be the case. @@ -1871,26 +1853,24 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, assert(!MI.isMetaInstruction()); AMDGPU::Waitcnt Wait; + const unsigned Opc = MI.getOpcode(); // FIXME: This should have already been handled by the memory legalizer. // Removing this currently doesn't affect any lit tests, but we need to // verify that nothing was relying on this. The number of buffer invalidates // being handled here should not be expanded. - if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 || - MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC || - MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL || - MI.getOpcode() == AMDGPU::BUFFER_GL0_INV || - MI.getOpcode() == AMDGPU::BUFFER_GL1_INV) { + if (Opc == AMDGPU::BUFFER_WBINVL1 || Opc == AMDGPU::BUFFER_WBINVL1_SC || + Opc == AMDGPU::BUFFER_WBINVL1_VOL || Opc == AMDGPU::BUFFER_GL0_INV || + Opc == AMDGPU::BUFFER_GL1_INV) { Wait.LoadCnt = 0; } // All waits must be resolved at call return. // NOTE: this could be improved with knowledge of all call sites or // with knowledge of the called routines. - if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG || - MI.getOpcode() == AMDGPU::SI_RETURN || - MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN || - MI.getOpcode() == AMDGPU::S_SETPC_B64_return || + if (Opc == AMDGPU::SI_RETURN_TO_EPILOG || Opc == AMDGPU::SI_RETURN || + Opc == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN || + Opc == AMDGPU::S_SETPC_B64_return || (MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) { Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false)); } @@ -1902,8 +1882,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, // send a message to explicitly release all VGPRs before the stores have // completed, but it is only safe to do this if there are no outstanding // scratch stores. - else if (MI.getOpcode() == AMDGPU::S_ENDPGM || - MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) { + else if (Opc == AMDGPU::S_ENDPGM || Opc == AMDGPU::S_ENDPGM_SAVED) { if (!WCG->isOptNone() && (MI.getMF()->getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled() || (ST->getGeneration() >= AMDGPUSubtarget::GFX11 && @@ -1912,8 +1891,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, ReleaseVGPRInsts.insert(&MI); } // Resolve vm waits before gs-done. - else if ((MI.getOpcode() == AMDGPU::S_SENDMSG || - MI.getOpcode() == AMDGPU::S_SENDMSGHALT) && + else if ((Opc == AMDGPU::S_SENDMSG || Opc == AMDGPU::S_SENDMSGHALT) && ST->hasLegacyGeometry() && ((MI.getOperand(0).getImm() & AMDGPU::SendMsg::ID_MASK_PreGFX11_) == AMDGPU::SendMsg::ID_GS_DONE_PreGFX11)) { @@ -1938,7 +1916,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, // Wait for any pending GDS instruction to complete before any // "Always GDS" instruction. - if (TII->isAlwaysGDS(MI.getOpcode()) && ScoreBrackets.hasPendingGDS()) + if (TII->isAlwaysGDS(Opc) && ScoreBrackets.hasPendingGDS()) addWait(Wait, DS_CNT, ScoreBrackets.getPendingGDSWait()); if (MI.isCall() && callWaitsOnFunctionEntry(MI)) { @@ -1964,7 +1942,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, Wait); } } - } else if (MI.getOpcode() == AMDGPU::S_BARRIER_WAIT) { + } else if (Opc == AMDGPU::S_BARRIER_WAIT) { ScoreBrackets.tryClearSCCWriteEvent(&MI); } else { // FIXME: Should not be relying on memoperands. @@ -2061,7 +2039,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, ScoreBrackets.determineWait(SmemAccessCounter, Interval, Wait); } - if (hasXcnt() && Op.isDef()) + if (ST->hasWaitXCnt() && Op.isDef()) ScoreBrackets.determineWait(X_CNT, Interval, Wait); } } @@ -2079,18 +2057,17 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, // // In all other cases, ensure safety by ensuring that there are no outstanding // memory operations. - if (MI.getOpcode() == AMDGPU::S_BARRIER && - !ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) { + if (Opc == AMDGPU::S_BARRIER && !ST->hasAutoWaitcntBeforeBarrier() && + !ST->supportsBackOffBarrier()) { Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/true)); } // TODO: Remove this work-around, enable the assert for Bug 457939 // after fixing the scheduler. Also, the Shader Compiler code is // independent of target. - if (readsVCCZ(MI) && ST->hasReadVCCZBug()) { - if (ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) { - Wait.DsCnt = 0; - } + if (SIInstrInfo::isCBranchVCCZRead(MI) && ST->hasReadVCCZBug() && + ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) { + Wait.DsCnt = 0; } // Verify that the wait is actually needed. @@ -2165,19 +2142,19 @@ bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait, } // XCnt may be already consumed by a load wait. - if (Wait.KmCnt == 0 && Wait.XCnt != ~0u && - !ScoreBrackets.hasPendingEvent(SMEM_GROUP)) - Wait.XCnt = ~0u; + if (Wait.XCnt != ~0u) { + if (Wait.KmCnt == 0 && !ScoreBrackets.hasPendingEvent(SMEM_GROUP)) + Wait.XCnt = ~0u; - if (Wait.LoadCnt == 0 && Wait.XCnt != ~0u && - !ScoreBrackets.hasPendingEvent(VMEM_GROUP)) - Wait.XCnt = ~0u; + if (Wait.LoadCnt == 0 && !ScoreBrackets.hasPendingEvent(VMEM_GROUP)) + Wait.XCnt = ~0u; - // Since the translation for VMEM addresses occur in-order, we can skip the - // XCnt if the current instruction is of VMEM type and has a memory dependency - // with another VMEM instruction in flight. - if (Wait.XCnt != ~0u && isVmemAccess(*It)) - Wait.XCnt = ~0u; + // Since the translation for VMEM addresses occur in-order, we can skip the + // XCnt if the current instruction is of VMEM type and has a memory + // dependency with another VMEM instruction in flight. + if (isVmemAccess(*It)) + Wait.XCnt = ~0u; + } if (WCG->createNewWaitcnt(Block, It, Wait)) Modified = true; @@ -2185,75 +2162,11 @@ bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait, return Modified; } -// This is a flat memory operation. Check to see if it has memory tokens other -// than LDS. Other address spaces supported by flat memory operations involve -// global memory. -bool SIInsertWaitcnts::mayAccessVMEMThroughFlat(const MachineInstr &MI) const { - assert(TII->isFLAT(MI)); - - // All flat instructions use the VMEM counter except prefetch. - if (!TII->usesVM_CNT(MI)) - return false; - - // If there are no memory operands then conservatively assume the flat - // operation may access VMEM. - if (MI.memoperands_empty()) - return true; - - // See if any memory operand specifies an address space that involves VMEM. - // Flat operations only supported FLAT, LOCAL (LDS), or address spaces - // involving VMEM such as GLOBAL, CONSTANT, PRIVATE (SCRATCH), etc. The REGION - // (GDS) address space is not supported by flat operations. Therefore, simply - // return true unless only the LDS address space is found. - for (const MachineMemOperand *Memop : MI.memoperands()) { - unsigned AS = Memop->getAddrSpace(); - assert(AS != AMDGPUAS::REGION_ADDRESS); - if (AS != AMDGPUAS::LOCAL_ADDRESS) - return true; - } - - return false; -} - -// This is a flat memory operation. Check to see if it has memory tokens for -// either LDS or FLAT. -bool SIInsertWaitcnts::mayAccessLDSThroughFlat(const MachineInstr &MI) const { - assert(TII->isFLAT(MI)); - - // Flat instruction such as SCRATCH and GLOBAL do not use the lgkm counter. - if (!TII->usesLGKM_CNT(MI)) - return false; - - // If in tgsplit mode then there can be no use of LDS. - if (ST->isTgSplitEnabled()) - return false; - - // If there are no memory operands then conservatively assume the flat - // operation may access LDS. - if (MI.memoperands_empty()) - return true; - - // See if any memory operand specifies an address space that involves LDS. - for (const MachineMemOperand *Memop : MI.memoperands()) { - unsigned AS = Memop->getAddrSpace(); - if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) - return true; - } - - return false; -} - bool SIInsertWaitcnts::isVmemAccess(const MachineInstr &MI) const { - return (TII->isFLAT(MI) && mayAccessVMEMThroughFlat(MI)) || + return (TII->isFLAT(MI) && TII->mayAccessVMEMThroughFlat(MI)) || (TII->isVMEM(MI) && !AMDGPU::getMUBUFIsBufferInv(MI.getOpcode())); } -static bool isGFX12CacheInvOrWBInst(MachineInstr &Inst) { - auto Opc = Inst.getOpcode(); - return Opc == AMDGPU::GLOBAL_INV || Opc == AMDGPU::GLOBAL_WB || - Opc == AMDGPU::GLOBAL_WBINV; -} - // Return true if the next instruction is S_ENDPGM, following fallthrough // blocks if necessary. bool SIInsertWaitcnts::isNextENDPGM(MachineBasicBlock::instr_iterator It, @@ -2331,7 +2244,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst); } } else if (TII->isFLAT(Inst)) { - if (isGFX12CacheInvOrWBInst(Inst)) { + if (SIInstrInfo::isGFX12CacheInvOrWBInst(Inst.getOpcode())) { ScoreBrackets->updateByEvent(TII, TRI, MRI, getVmemWaitEventType(Inst), Inst); return; @@ -2341,14 +2254,14 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, int FlatASCount = 0; - if (mayAccessVMEMThroughFlat(Inst)) { + if (TII->mayAccessVMEMThroughFlat(Inst)) { ++FlatASCount; IsVMEMAccess = true; ScoreBrackets->updateByEvent(TII, TRI, MRI, getVmemWaitEventType(Inst), Inst); } - if (mayAccessLDSThroughFlat(Inst)) { + if (TII->mayAccessLDSThroughFlat(Inst)) { ++FlatASCount; ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst); } @@ -2394,7 +2307,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_POS_ACCESS, Inst); else ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_GPR_LOCK, Inst); - } else if (asynchronouslyWritesSCC(Inst.getOpcode())) { + } else if (SIInstrInfo::isSBarrierSCCWrite(Inst.getOpcode())) { ScoreBrackets->updateByEvent(TII, TRI, MRI, SCC_WRITE, Inst); } else { switch (Inst.getOpcode()) { @@ -2413,7 +2326,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, } } - if (!hasXcnt()) + if (!ST->hasWaitXCnt()) return; if (IsVMEMAccess) @@ -2478,9 +2391,8 @@ bool WaitcntBrackets::merge(const WaitcntBrackets &Other) { unsigned OldEventsHasSCCWrite = OldEvents & (1 << SCC_WRITE); if (!OldEventsHasSCCWrite) { PendingSCCWrite = Other.PendingSCCWrite; - } else { - if (PendingSCCWrite != Other.PendingSCCWrite) - PendingSCCWrite = nullptr; + } else if (PendingSCCWrite != Other.PendingSCCWrite) { + PendingSCCWrite = nullptr; } } } @@ -2516,12 +2428,6 @@ static bool isWaitInstr(MachineInstr &Inst) { counterTypeForInstr(Opcode).has_value(); } -bool SIInsertWaitcnts::asynchronouslyWritesSCC(unsigned Opcode) { - return Opcode == AMDGPU::S_BARRIER_LEAVE || - Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM || - Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0; -} - // Generate s_waitcnt instructions where needed. bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block, @@ -2578,7 +2484,7 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, OldWaitcntInstr = nullptr; // Restore vccz if it's not known to be correct already. - bool RestoreVCCZ = !VCCZCorrect && readsVCCZ(Inst); + bool RestoreVCCZ = !VCCZCorrect && SIInstrInfo::isCBranchVCCZRead(Inst); // Don't examine operands unless we need to track vccz correctness. if (ST->hasReadVCCZBug() || !ST->partialVCCWritesUpdateVCCZ()) { @@ -2701,7 +2607,7 @@ bool SIInsertWaitcnts::isPreheaderToFlush( bool SIInsertWaitcnts::isVMEMOrFlatVMEM(const MachineInstr &MI) const { if (SIInstrInfo::isFLAT(MI)) - return mayAccessVMEMThroughFlat(MI); + return TII->mayAccessVMEMThroughFlat(MI); return SIInstrInfo::isVMEM(MI); } @@ -2724,11 +2630,10 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML, for (MachineBasicBlock *MBB : ML->blocks()) { for (MachineInstr &MI : *MBB) { if (isVMEMOrFlatVMEM(MI)) { - if (MI.mayLoad()) - HasVMemLoad = true; - if (MI.mayStore()) - HasVMemStore = true; + HasVMemLoad |= MI.mayLoad(); + HasVMemStore |= MI.mayStore(); } + for (const MachineOperand &Op : MI.all_uses()) { if (Op.isDebug() || !TRI->isVectorRegister(*MRI, Op.getReg())) continue; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 044ea86..56435a5 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4344,6 +4344,59 @@ bool SIInstrInfo::mayAccessScratchThroughFlat(const MachineInstr &MI) const { }); } +bool SIInstrInfo::mayAccessVMEMThroughFlat(const MachineInstr &MI) const { + assert(isFLAT(MI)); + + // All flat instructions use the VMEM counter except prefetch. + if (!usesVM_CNT(MI)) + return false; + + // If there are no memory operands then conservatively assume the flat + // operation may access VMEM. + if (MI.memoperands_empty()) + return true; + + // See if any memory operand specifies an address space that involves VMEM. + // Flat operations only supported FLAT, LOCAL (LDS), or address spaces + // involving VMEM such as GLOBAL, CONSTANT, PRIVATE (SCRATCH), etc. The REGION + // (GDS) address space is not supported by flat operations. Therefore, simply + // return true unless only the LDS address space is found. + for (const MachineMemOperand *Memop : MI.memoperands()) { + unsigned AS = Memop->getAddrSpace(); + assert(AS != AMDGPUAS::REGION_ADDRESS); + if (AS != AMDGPUAS::LOCAL_ADDRESS) + return true; + } + + return false; +} + +bool SIInstrInfo::mayAccessLDSThroughFlat(const MachineInstr &MI) const { + assert(isFLAT(MI)); + + // Flat instruction such as SCRATCH and GLOBAL do not use the lgkm counter. + if (!usesLGKM_CNT(MI)) + return false; + + // If in tgsplit mode then there can be no use of LDS. + if (ST.isTgSplitEnabled()) + return false; + + // If there are no memory operands then conservatively assume the flat + // operation may access LDS. + if (MI.memoperands_empty()) + return true; + + // See if any memory operand specifies an address space that involves LDS. + for (const MachineMemOperand *Memop : MI.memoperands()) { + unsigned AS = Memop->getAddrSpace(); + if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) + return true; + } + + return false; +} + bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) { // Skip the full operand and register alias search modifiesRegister // does. There's only a handful of instructions that touch this, it's only an diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index c2252af..754f52a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -688,6 +688,12 @@ public: /// to not hit scratch. bool mayAccessScratchThroughFlat(const MachineInstr &MI) const; + /// \returns true for FLAT instructions that can access VMEM. + bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const; + + /// \returns true for FLAT instructions that can access LDS. + bool mayAccessLDSThroughFlat(const MachineInstr &MI) const; + static bool isBlockLoadStore(uint16_t Opcode) { switch (Opcode) { case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: @@ -748,6 +754,18 @@ public: return isLDSDMA(MI) && MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD; } + static bool isSBarrierSCCWrite(unsigned Opcode) { + return Opcode == AMDGPU::S_BARRIER_LEAVE || + Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM || + Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0; + } + + static bool isCBranchVCCZRead(const MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + return (Opc == AMDGPU::S_CBRANCH_VCCNZ || Opc == AMDGPU::S_CBRANCH_VCCZ) && + !MI.getOperand(1).isUndef(); + } + static bool isWQM(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::WQM; } @@ -1010,6 +1028,16 @@ public: Opcode == AMDGPU::DS_GWS_BARRIER; } + static bool isGFX12CacheInvOrWBInst(unsigned Opc) { + return Opc == AMDGPU::GLOBAL_INV || Opc == AMDGPU::GLOBAL_WB || + Opc == AMDGPU::GLOBAL_WBINV; + } + + static bool isGFX12CacheInvOrWBInst(unsigned Opc) { + return Opc == AMDGPU::GLOBAL_INV || Opc == AMDGPU::GLOBAL_WB || + Opc == AMDGPU::GLOBAL_WBINV; + } + static bool isF16PseudoScalarTrans(unsigned Opcode) { return Opcode == AMDGPU::V_S_EXP_F16_e64 || Opcode == AMDGPU::V_S_LOG_F16_e64 || diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 70b6c7e..1e6b04f8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -3793,6 +3793,11 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, return false; // Operands 1 and 2 are commutable, if we switch the opcode. return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2); + case RISCV::QC_SELECTIEQ: + case RISCV::QC_SELECTINE: + case RISCV::QC_SELECTIIEQ: + case RISCV::QC_SELECTIINE: + return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2); case RISCV::QC_MVEQ: case RISCV::QC_MVNE: case RISCV::QC_MVLT: @@ -4018,6 +4023,11 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1, OpIdx2); } + case RISCV::QC_SELECTIEQ: + case RISCV::QC_SELECTINE: + case RISCV::QC_SELECTIIEQ: + case RISCV::QC_SELECTIINE: + return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); case RISCV::QC_MVEQ: case RISCV::QC_MVNE: case RISCV::QC_MVLT: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index ff4a040..5407868 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -524,7 +524,7 @@ class QCIRVInstRI<bits<1> funct1, DAGOperand InTyImm11, let Inst{30-20} = imm11; } -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in class QCISELECTIICC<bits<3> funct3, string opcodestr> : RVInstR4<0b00, funct3, OPC_CUSTOM_2, (outs GPRNoX0:$rd_wb), (ins GPRNoX0:$rd, GPRNoX0:$rs1, simm5:$simm1, simm5:$simm2), @@ -537,7 +537,7 @@ class QCISELECTIICC<bits<3> funct3, string opcodestr> let rs2 = simm1; } -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in class QCISELECTICC<bits<3> funct3, string opcodestr> : RVInstR4<0b01, funct3, OPC_CUSTOM_2, (outs GPRNoX0:$rd_wb), (ins GPRNoX0:$rd, GPRNoX0:$rs1, GPRNoX0:$rs2, simm5:$simm2), diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index f88d51f..99c4982 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -1680,7 +1680,9 @@ processGlobal(GlobalValue &GV, /// FastCC. static void ChangeCalleesToFastCall(Function *F) { for (User *U : F->users()) - cast<CallBase>(U)->setCallingConv(CallingConv::Fast); + if (auto *Call = dyn_cast<CallBase>(U)) + if (Call->getCalledOperand() == F) + Call->setCallingConv(CallingConv::Fast); } static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs, @@ -1766,10 +1768,12 @@ isValidCandidateForColdCC(Function &F, return false; for (User *U : F.users()) { - CallBase &CB = cast<CallBase>(*U); - Function *CallerFunc = CB.getParent()->getParent(); + CallBase *CB = dyn_cast<CallBase>(U); + if (!CB || CB->getCalledOperand() != &F) + continue; + Function *CallerFunc = CB->getParent()->getParent(); BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc); - if (!isColdCallSite(CB, CallerBFI)) + if (!isColdCallSite(*CB, CallerBFI)) return false; if (!llvm::is_contained(AllCallsCold, CallerFunc)) return false; @@ -1779,7 +1783,9 @@ isValidCandidateForColdCC(Function &F, static void changeCallSitesToColdCC(Function *F) { for (User *U : F->users()) - cast<CallBase>(U)->setCallingConv(CallingConv::Cold); + if (auto *Call = dyn_cast<CallBase>(U)) + if (Call->getCalledOperand() == F) + Call->setCallingConv(CallingConv::Cold); } // This function iterates over all the call instructions in the input Function diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 8fbaf68..ff063f9 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -5169,6 +5169,7 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { // - or: pick -1 // - select's condition: if the true value is constant, choose it by making // the condition true. + // - phi: pick the common constant across operands // - default: pick 0 // // Note that this transform is intentionally done here rather than @@ -5179,9 +5180,32 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid // duplicating logic for binops at least. auto getUndefReplacement = [&](Type *Ty) { - Value *BestValue = nullptr; + auto pickCommonConstantFromPHI = [](PHINode &PN) -> Value * { + // phi(freeze(undef), C, C). Choose C for freeze so the PHI can be + // removed. + Constant *BestValue = nullptr; + for (Value *V : PN.incoming_values()) { + if (match(V, m_Freeze(m_Undef()))) + continue; + + Constant *C = dyn_cast<Constant>(V); + if (!C) + return nullptr; + + if (!isGuaranteedNotToBeUndefOrPoison(C)) + return nullptr; + + if (BestValue && BestValue != C) + return nullptr; + + BestValue = C; + } + return BestValue; + }; + Value *NullValue = Constant::getNullValue(Ty); - for (const auto *U : I.users()) { + Value *BestValue = nullptr; + for (auto *U : I.users()) { Value *V = NullValue; if (match(U, m_Or(m_Value(), m_Value()))) V = ConstantInt::getAllOnesValue(Ty); @@ -5190,6 +5214,9 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { else if (match(U, m_c_Select(m_Specific(&I), m_Value(V)))) { if (!isGuaranteedNotToBeUndefOrPoison(V, &AC, &I, &DT)) V = NullValue; + } else if (auto *PHI = dyn_cast<PHINode>(U)) { + if (Value *MaybeV = pickCommonConstantFromPHI(*PHI)) + V = MaybeV; } if (!BestValue) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 123881e..21b2652 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3025,6 +3025,12 @@ static void combineMetadata(Instruction *K, const Instruction *J, // Preserve !nosanitize if both K and J have it. K->setMetadata(Kind, JMD); break; + case LLVMContext::MD_captures: + K->setMetadata( + Kind, MDNode::fromCaptureComponents( + K->getContext(), MDNode::toCaptureComponents(JMD) | + MDNode::toCaptureComponents(KMD))); + break; } } // Set !invariant.group from J if J has it. If both instructions have it diff --git a/llvm/test/CodeGen/RISCV/xqcics.ll b/llvm/test/CodeGen/RISCV/xqcics.ll index 5b7ca9e7..60fc98c 100644 --- a/llvm/test/CodeGen/RISCV/xqcics.ll +++ b/llvm/test/CodeGen/RISCV/xqcics.ll @@ -690,3 +690,127 @@ entry: ret i32 %sel } +define i32 @select_cc_example_eq1(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_eq1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: beq a1, a0, .LBB21_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a2, 11 +; RV32I-NEXT: .LBB21_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_eq1: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectieq a0, a1, a2, 11 +; RV32IXQCICS-NEXT: ret +; +; RV32IXQCICM-LABEL: select_cc_example_eq1: +; RV32IXQCICM: # %bb.0: # %entry +; RV32IXQCICM-NEXT: qc.selectieq a0, a1, a2, 11 +; RV32IXQCICM-NEXT: ret +; +; RV32IXQCI-LABEL: select_cc_example_eq1: +; RV32IXQCI: # %bb.0: # %entry +; RV32IXQCI-NEXT: qc.line a2, a1, a0, 11 +; RV32IXQCI-NEXT: mv a0, a2 +; RV32IXQCI-NEXT: ret +entry: + %cmp = icmp eq i32 %b, %a + %sel = select i1 %cmp, i32 %x, i32 11 + ret i32 %sel +} + +define i32 @select_cc_example_ne1(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_ne1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: bne a1, a0, .LBB22_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a2, 11 +; RV32I-NEXT: .LBB22_2: # %entry +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_ne1: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectine a0, a1, a2, 11 +; RV32IXQCICS-NEXT: ret +; +; RV32IXQCICM-LABEL: select_cc_example_ne1: +; RV32IXQCICM: # %bb.0: # %entry +; RV32IXQCICM-NEXT: qc.selectine a0, a1, a2, 11 +; RV32IXQCICM-NEXT: ret +; +; RV32IXQCI-LABEL: select_cc_example_ne1: +; RV32IXQCI: # %bb.0: # %entry +; RV32IXQCI-NEXT: qc.lieq a2, a1, a0, 11 +; RV32IXQCI-NEXT: mv a0, a2 +; RV32IXQCI-NEXT: ret +entry: + %cmp = icmp ne i32 %b, %a + %sel = select i1 %cmp, i32 %x, i32 11 + ret i32 %sel +} + + +define i32 @select_cc_example_eq2(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_eq2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: beq a1, a0, .LBB23_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a0, 11 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB23_2: +; RV32I-NEXT: li a0, 15 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_eq2: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectiieq a0, a1, 15, 11 +; RV32IXQCICS-NEXT: ret +; +; RV32IXQCICM-LABEL: select_cc_example_eq2: +; RV32IXQCICM: # %bb.0: # %entry +; RV32IXQCICM-NEXT: qc.selectiieq a0, a1, 15, 11 +; RV32IXQCICM-NEXT: ret +; +; RV32IXQCI-LABEL: select_cc_example_eq2: +; RV32IXQCI: # %bb.0: # %entry +; RV32IXQCI-NEXT: qc.selectiieq a0, a1, 15, 11 +; RV32IXQCI-NEXT: ret +entry: + %cmp = icmp eq i32 %b, %a + %sel = select i1 %cmp, i32 15, i32 11 + ret i32 %sel +} + +define i32 @select_cc_example_ne2(i32 %a, i32 %b, i32 %x, i32 %y) { +; RV32I-LABEL: select_cc_example_ne2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: bne a1, a0, .LBB24_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: li a0, 11 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB24_2: +; RV32I-NEXT: li a0, 15 +; RV32I-NEXT: ret +; +; RV32IXQCICS-LABEL: select_cc_example_ne2: +; RV32IXQCICS: # %bb.0: # %entry +; RV32IXQCICS-NEXT: qc.selectiine a0, a1, 15, 11 +; RV32IXQCICS-NEXT: ret +; +; RV32IXQCICM-LABEL: select_cc_example_ne2: +; RV32IXQCICM: # %bb.0: # %entry +; RV32IXQCICM-NEXT: qc.selectiine a0, a1, 15, 11 +; RV32IXQCICM-NEXT: ret +; +; RV32IXQCI-LABEL: select_cc_example_ne2: +; RV32IXQCI: # %bb.0: # %entry +; RV32IXQCI-NEXT: qc.selectiine a0, a1, 15, 11 +; RV32IXQCI-NEXT: ret +entry: + %cmp = icmp ne i32 %b, %a + %sel = select i1 %cmp, i32 15, i32 11 + ret i32 %sel +} diff --git a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll index e6d3a4b..4d4fc1b 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/LoongArch/vararg-loongarch64.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" @@ -9,12 +10,36 @@ declare void @llvm.va_start(ptr) #2 declare void @llvm.va_end(ptr) #2 declare void @llvm.lifetime.end.p0(ptr nocapture) #1 define i32 @foo(i32 %guard, ...) { -; CHECK-LABEL: @foo -; CHECK: [[TMP1:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls -; CHECK: [[TMP3:%.*]] = alloca {{.*}} [[TMP1]] -; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 0, i64 [[TMP1]], i1 false) -; CHECK: [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP3]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP4]], i1 false) +; CHECK-LABEL: define i32 @foo( +; CHECK-SAME: i32 [[GUARD:%.*]], ...) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VL:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP6]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 87960930222080 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[TMP2]], i64 [[TMP1]], i1 false) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VL]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[VL]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 0 ; %vl = alloca ptr, align 8 call void @llvm.lifetime.start.p0(ptr %vl) @@ -27,11 +52,22 @@ define i32 @foo(i32 %guard, ...) { ;; Save the incoming shadow value from the arguments in the __msan_va_arg_tls ;; array. define i32 @bar() { -; CHECK-LABEL: @bar -; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 -; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls +; CHECK-LABEL: define i32 @bar() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_va_arg_tls, align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] ; %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) ret i32 %1 @@ -40,15 +76,28 @@ define i32 @bar() { ;; Check multiple fixed arguments. declare i32 @foo2(i32 %g1, i32 %g2, ...) define i32 @bar2() { -; CHECK-LABEL: @bar2 -; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls +; CHECK-LABEL: define i32 @bar2() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_va_arg_tls, align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] ; %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) ret i32 %1 } +; UTC_ARGS: --disable + ;; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are ;; passed to a variadic function. declare i64 @sum(i64 %n, ...) diff --git a/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64.ll b/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64.ll index 69a74a3..9f3f10e 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64.ll @@ -1,9 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s target datalayout = "E-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128" target triple = "mips64--linux" define i32 @foo(i32 %guard, ...) { +; CHECK-LABEL: define i32 @foo( +; CHECK-SAME: i32 [[GUARD:%.*]], ...) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VL:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 549755813888 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP6]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 549755813888 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 549755813888 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[TMP2]], i64 [[TMP1]], i1 false) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VL]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[VL]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 0 +; %vl = alloca ptr, align 8 call void @llvm.lifetime.start.p0(ptr %vl) call void @llvm.va_start(ptr %vl) @@ -12,23 +44,29 @@ define i32 @foo(i32 %guard, ...) { ret i32 0 } -; First, check allocation of the save area. - -; CHECK-LABEL: @foo -; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls -; CHECK: [[C:%.*]] = alloca {{.*}} [[A]] - -; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[A]], i1 false) - -; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[A]], i64 800) -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false) - declare void @llvm.lifetime.start.p0(ptr nocapture) #1 declare void @llvm.va_start(ptr) #2 declare void @llvm.va_end(ptr) #2 declare void @llvm.lifetime.end.p0(ptr nocapture) #1 define i32 @bar() { +; CHECK-LABEL: define i32 @bar() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 4) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) ret i32 %1 } @@ -36,23 +74,32 @@ define i32 @bar() { ; Save the incoming shadow value from the arguments in the __msan_va_arg_tls ; array. The first argument is stored at position 4, since it's right ; justified. -; CHECK-LABEL: @bar -; CHECK: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 4) to ptr), align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 -; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls ; Check multiple fixed arguments. declare i32 @foo2(i32 %g1, i32 %g2, ...) define i32 @bar2() { +; CHECK-LABEL: define i32 @bar2() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_va_arg_tls, align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) ret i32 %1 } -; CHECK-LABEL: @bar2 -; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls + +; UTC_ARGS: --disable ; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are ; passed to a variadic function. diff --git a/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64el.ll b/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64el.ll index b19da8e..41fb975 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64el.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/Mips/vararg-mips64el.ll @@ -1,9 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s target datalayout = "e-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128" target triple = "mips64el--linux" define i32 @foo(i32 %guard, ...) { +; CHECK-LABEL: define i32 @foo( +; CHECK-SAME: i32 [[GUARD:%.*]], ...) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VL:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 549755813888 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP6]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 549755813888 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 549755813888 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[TMP2]], i64 [[TMP1]], i1 false) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VL]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[VL]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 0 +; %vl = alloca ptr, align 8 call void @llvm.lifetime.start.p0(ptr %vl) call void @llvm.va_start(ptr %vl) @@ -12,46 +44,60 @@ define i32 @foo(i32 %guard, ...) { ret i32 0 } -; First, check allocation of the save area. - -; CHECK-LABEL: @foo -; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls -; CHECK: [[C:%.*]] = alloca {{.*}} [[A]] - -; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[A]], i1 false) - -; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[A]], i64 800) -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false) - declare void @llvm.lifetime.start.p0(ptr nocapture) #1 declare void @llvm.va_start(ptr) #2 declare void @llvm.va_end(ptr) #2 declare void @llvm.lifetime.end.p0(ptr nocapture) #1 define i32 @bar() { +; CHECK-LABEL: define i32 @bar() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_va_arg_tls, align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) ret i32 %1 } ; Save the incoming shadow value from the arguments in the __msan_va_arg_tls ; array. -; CHECK-LABEL: @bar -; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 -; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls ; Check multiple fixed arguments. declare i32 @foo2(i32 %g1, i32 %g2, ...) define i32 @bar2() { +; CHECK-LABEL: define i32 @bar2() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_va_arg_tls, align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) ret i32 %1 } -; CHECK-LABEL: @bar2 -; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls +; UTC_ARGS: --disable ; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are ; passed to a variadic function. diff --git a/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64.ll b/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64.ll index 9351067..19b07e1 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64.ll @@ -1,9 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64--linux" define i32 @foo(i32 %guard, ...) { +; CHECK-LABEL: define i32 @foo( +; CHECK-SAME: i32 [[GUARD:%.*]], ...) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VL:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], -246290604621825 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 17592186044416 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 8796093022208 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP8]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], -246290604621825 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 17592186044416 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 8796093022208 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP13]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], -246290604621825 +; CHECK-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], 17592186044416 +; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[TMP19]], 8796093022208 +; CHECK-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP20]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[TMP2]], i64 [[TMP1]], i1 false) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VL]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[VL]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 0 +; %vl = alloca ptr, align 8 call void @llvm.lifetime.start.p0(ptr %vl) call void @llvm.va_start(ptr %vl) @@ -12,23 +50,29 @@ define i32 @foo(i32 %guard, ...) { ret i32 0 } -; First, check allocation of the save area. - -; CHECK-LABEL: @foo -; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls -; CHECK: [[C:%.*]] = alloca {{.*}} [[A]] - -; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[A]], i1 false) - -; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[A]], i64 800) -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false) - declare void @llvm.lifetime.start.p0(ptr nocapture) #1 declare void @llvm.va_start(ptr) #2 declare void @llvm.va_end(ptr) #2 declare void @llvm.lifetime.end.p0(ptr nocapture) #1 define i32 @bar() { +; CHECK-LABEL: define i32 @bar() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 4) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) ret i32 %1 } @@ -36,14 +80,22 @@ define i32 @bar() { ; Save the incoming shadow value from the arguments in the __msan_va_arg_tls ; array. The first argument is stored at position 4, since it's right ; justified. -; CHECK-LABEL: @bar -; CHECK: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 4) to ptr), align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 -; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls ; Check vector argument. define i32 @bar2() { +; CHECK-LABEL: define i32 @bar2() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, <2 x i64> <i64 1, i64 2>) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, <2 x i64> <i64 1, i64 2>) ret i32 %1 } @@ -51,50 +103,110 @@ define i32 @bar2() { ; The vector is at offset 16 of parameter save area, but __msan_va_arg_tls ; corresponds to offset 8+ of parameter save area - so the offset from ; __msan_va_arg_tls is actually misaligned. -; CHECK-LABEL: @bar2 -; CHECK: store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls ; Check i64 array. define i32 @bar4() { +; CHECK-LABEL: define i32 @bar4() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store [2 x i64] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store [2 x i64] zeroinitializer, ptr @__msan_va_arg_tls, align 8 +; CHECK-NEXT: store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2]) ret i32 %1 } -; CHECK-LABEL: @bar4 -; CHECK: store [2 x i64] zeroinitializer, ptr @__msan_va_arg_tls, align 8 -; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls ; Check i128 array. define i32 @bar5() { +; CHECK-LABEL: define i32 @bar5() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 40, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, [2 x i128] [i128 1, i128 2]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, [2 x i128] [i128 1, i128 2]) ret i32 %1 } -; CHECK-LABEL: @bar5 -; CHECK: store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls ; Check 8-aligned byval. define i32 @bar6(ptr %arg) { +; CHECK-LABEL: define i32 @bar6( +; CHECK-SAME: ptr [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -246290604621825 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 17592186044416 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 8796093022208 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), i8 0, i64 16, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], -246290604621825 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 17592186044416 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 8796093022208 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_va_arg_tls, ptr align 8 [[TMP11]], i64 16, i1 false) +; CHECK-NEXT: store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP12:%.*]] = call i32 (i32, ...) @foo(i32 0, ptr byval([2 x i64]) align 8 [[ARG]]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP12]] +; %1 = call i32 (i32, ...) @foo(i32 0, ptr byval([2 x i64]) align 8 %arg) ret i32 %1 } -; CHECK-LABEL: @bar6 -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_va_arg_tls, ptr align 8 {{.*}}, i64 16, i1 false) -; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls ; Check 16-aligned byval. define i32 @bar7(ptr %arg) { +; CHECK-LABEL: define i32 @bar7( +; CHECK-SAME: ptr [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -246290604621825 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 17592186044416 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 8796093022208 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], -246290604621825 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 17592186044416 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 8796093022208 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), ptr align 8 [[TMP11]], i64 32, i1 false) +; CHECK-NEXT: store i64 40, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP12:%.*]] = call i32 (i32, ...) @foo(i32 0, ptr byval([4 x i64]) align 16 [[ARG]]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP12]] +; %1 = call i32 (i32, ...) @foo(i32 0, ptr byval([4 x i64]) align 16 %arg) ret i32 %1 } -; CHECK-LABEL: @bar7 -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), ptr align 8 {{.*}}, i64 32, i1 false) -; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls +; UTC_ARGS: --disable ; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are ; passed to a variadic function. diff --git a/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64le.ll b/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64le.ll index 4151f3b..1fe6385 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64le.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/PowerPC/vararg-ppc64le.ll @@ -1,9 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le--linux" define i32 @foo(i32 %guard, ...) { +; CHECK-LABEL: define i32 @foo( +; CHECK-SAME: i32 [[GUARD:%.*]], ...) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[VL:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], -246290604621825 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 17592186044416 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 8796093022208 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP8]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], -246290604621825 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 17592186044416 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 8796093022208 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP13]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[VL]]) +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[VL]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], -246290604621825 +; CHECK-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], 17592186044416 +; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[TMP19]], 8796093022208 +; CHECK-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP20]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[TMP2]], i64 [[TMP1]], i1 false) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[VL]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[VL]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 0 +; %vl = alloca ptr, align 8 call void @llvm.lifetime.start.p0(ptr %vl) call void @llvm.va_start(ptr %vl) @@ -12,37 +50,51 @@ define i32 @foo(i32 %guard, ...) { ret i32 0 } -; First, check allocation of the save area. - -; CHECK-LABEL: @foo -; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls -; CHECK: [[C:%.*]] = alloca {{.*}} [[A]] - -; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[A]], i1 false) - -; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[A]], i64 800) -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false) - declare void @llvm.lifetime.start.p0(ptr nocapture) #1 declare void @llvm.va_start(ptr) #2 declare void @llvm.va_end(ptr) #2 declare void @llvm.lifetime.end.p0(ptr nocapture) #1 define i32 @bar() { +; CHECK-LABEL: define i32 @bar() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_va_arg_tls, align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) ret i32 %1 } ; Save the incoming shadow value from the arguments in the __msan_va_arg_tls ; array. -; CHECK-LABEL: @bar -; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 -; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls ; Check vector argument. define i32 @bar2() { +; CHECK-LABEL: define i32 @bar2() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 24, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, <2 x i64> <i64 1, i64 2>) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, <2 x i64> <i64 1, i64 2>) ret i32 %1 } @@ -50,49 +102,110 @@ define i32 @bar2() { ; The vector is at offset 16 of parameter save area, but __msan_va_arg_tls ; corresponds to offset 8+ of parameter save area - so the offset from ; __msan_va_arg_tls is actually misaligned. -; CHECK-LABEL: @bar2 -; CHECK: store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls ; Check i64 array. define i32 @bar4() { +; CHECK-LABEL: define i32 @bar4() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store [2 x i64] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store [2 x i64] zeroinitializer, ptr @__msan_va_arg_tls, align 8 +; CHECK-NEXT: store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2]) ret i32 %1 } -; CHECK-LABEL: @bar4 -; CHECK: store [2 x i64] zeroinitializer, ptr @__msan_va_arg_tls, align 8 -; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls ; Check i128 array. define i32 @bar5() { +; CHECK-LABEL: define i32 @bar5() { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 40, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 (i32, ...) @foo(i32 0, [2 x i128] [i128 1, i128 2]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; %1 = call i32 (i32, ...) @foo(i32 0, [2 x i128] [i128 1, i128 2]) ret i32 %1 } -; CHECK-LABEL: @bar5 -; CHECK: store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 -; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls ; Check 8-aligned byval. define i32 @bar6(ptr %arg) { +; CHECK-LABEL: define i32 @bar6( +; CHECK-SAME: ptr [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -246290604621825 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 17592186044416 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 8796093022208 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), i8 0, i64 16, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], -246290604621825 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 17592186044416 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 8796093022208 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_va_arg_tls, ptr align 8 [[TMP11]], i64 16, i1 false) +; CHECK-NEXT: store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP12:%.*]] = call i32 (i32, ...) @foo(i32 0, ptr byval([2 x i64]) align 8 [[ARG]]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP12]] +; %1 = call i32 (i32, ...) @foo(i32 0, ptr byval([2 x i64]) align 8 %arg) ret i32 %1 } -; CHECK-LABEL: @bar6 -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_va_arg_tls, ptr align 8 {{.*}}, i64 16, i1 false) -; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls ; Check 16-aligned byval. define i32 @bar7(ptr %arg) { +; CHECK-LABEL: define i32 @bar7( +; CHECK-SAME: ptr [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i32 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -246290604621825 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 17592186044416 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 8796093022208 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], -246290604621825 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 17592186044416 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 8796093022208 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), ptr align 8 [[TMP11]], i64 32, i1 false) +; CHECK-NEXT: store i64 40, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[TMP12:%.*]] = call i32 (i32, ...) @foo(i32 0, ptr byval([4 x i64]) align 16 [[ARG]]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP12]] +; %1 = call i32 (i32, ...) @foo(i32 0, ptr byval([4 x i64]) align 16 %arg) ret i32 %1 } -; CHECK-LABEL: @bar7 -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), ptr align 8 {{.*}}, i64 32, i1 false) -; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls + +; UTC_ARGS: --disable ; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are ; passed to a variadic function. diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_kernel_basic.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_kernel_basic.ll index 4b7a910..a7209de 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/msan_kernel_basic.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/msan_kernel_basic.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; KMSAN instrumentation tests ; RUN: opt < %s -msan-kernel=1 -S -passes=msan 2>&1 | FileCheck %s -check-prefixes=CHECK @@ -6,309 +7,495 @@ target triple = "x86_64-unknown-linux-gnu" ; Check the instrumentation prologue. define void @Empty() nounwind uwtable sanitize_memory { +; CHECK-LABEL: define void @Empty( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: ret void +; entry: ret void } -; CHECK-LABEL: @Empty -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; %param_shadow: -; CHECK: getelementptr {{.*}} i32 0, i32 0 -; %retval_shadow: -; CHECK: getelementptr {{.*}} i32 0, i32 1 -; %va_arg_shadow: -; CHECK: getelementptr {{.*}} i32 0, i32 2 -; %va_arg_origin: -; CHECK: getelementptr {{.*}} i32 0, i32 3 -; %va_arg_overflow_size: -; CHECK: getelementptr {{.*}} i32 0, i32 4 -; %param_origin: -; CHECK: getelementptr {{.*}} i32 0, i32 5 -; %retval_origin: -; CHECK: getelementptr {{.*}} i32 0, i32 6 - ; Check instrumentation of stores - define void @Store1(ptr nocapture %p, i8 %x) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define void @Store1( +; CHECK-SAME: ptr captures(none) [[P:%.*]], i8 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 8 +; CHECK-NEXT: [[_MSARG1:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[_MSARG1]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 8 +; CHECK-NEXT: [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[_MSARG_O2]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[BB11]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8:[0-9]+]] +; CHECK-NEXT: br label %[[BB12]] +; CHECK: [[BB12]]: +; CHECK-NEXT: [[TMP13:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_1(ptr [[P]]) +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 1 +; CHECK-NEXT: store i8 [[TMP7]], ptr [[TMP14]], align 1 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i8 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB16:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB16]]: +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP10]]) +; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-NEXT: br label %[[BB18]] +; CHECK: [[BB18]]: +; CHECK-NEXT: store i8 [[X]], ptr [[P]], align 1 +; CHECK-NEXT: ret void +; entry: store i8 %x, ptr %p ret void } -; CHECK-LABEL: @Store1 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: [[BASE:%[0-9]+]] = ptrtoint {{.*}} [[PARAM_SHADOW]] -; CHECK: [[SHADOW_PTR:%[a-z0-9_]+]] = inttoptr {{.*}} [[BASE]] -; CHECK: [[SHADOW:%[a-z0-9]+]] = load i64, ptr [[SHADOW_PTR]] -; CHECK: [[BASE2:%[0-9]+]] = ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: icmp ne i64 [[SHADOW]] -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_metadata_ptr_for_store_1(ptr %p) -; CHECK: store i8 -; If the new shadow is non-zero, jump to __msan_chain_origin() -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_chain_origin -; Storing origin here: -; CHECK: store i32 -; CHECK: br label -; CHECK: {{^[0-9]+}}: -; CHECK: store i8 -; CHECK: ret void - define void @Store2(ptr nocapture %p, i16 %x) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define void @Store2( +; CHECK-SAME: ptr captures(none) [[P:%.*]], i16 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 8 +; CHECK-NEXT: [[_MSARG1:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[_MSARG1]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 8 +; CHECK-NEXT: [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[_MSARG_O2]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]] +; CHECK: [[BB11]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB12]] +; CHECK: [[BB12]]: +; CHECK-NEXT: [[TMP13:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_2(ptr [[P]]) +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 1 +; CHECK-NEXT: store i16 [[TMP7]], ptr [[TMP14]], align 2 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i16 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB16:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB16]]: +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP10]]) +; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-NEXT: br label %[[BB18]] +; CHECK: [[BB18]]: +; CHECK-NEXT: store i16 [[X]], ptr [[P]], align 2 +; CHECK-NEXT: ret void +; entry: store i16 %x, ptr %p ret void } -; CHECK-LABEL: @Store2 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i64 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_metadata_ptr_for_store_2(ptr %p) -; CHECK: store i16 -; If the new shadow is non-zero, jump to __msan_chain_origin() -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_chain_origin -; Storing origin here: -; CHECK: store i32 -; CHECK: br label -; CHECK: {{^[0-9]+}}: -; CHECK: store i16 -; CHECK: ret void - - define void @Store4(ptr nocapture %p, i32 %x) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define void @Store4( +; CHECK-SAME: ptr captures(none) [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 8 +; CHECK-NEXT: [[_MSARG1:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[_MSARG1]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 8 +; CHECK-NEXT: [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[_MSARG_O2]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]] +; CHECK: [[BB11]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB12]] +; CHECK: [[BB12]]: +; CHECK-NEXT: [[TMP13:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_4(ptr [[P]]) +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 1 +; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i32 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB16:.*]], label %[[BB18:.*]], !prof [[PROF1]] +; CHECK: [[BB16]]: +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP10]]) +; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-NEXT: br label %[[BB18]] +; CHECK: [[BB18]]: +; CHECK-NEXT: store i32 [[X]], ptr [[P]], align 4 +; CHECK-NEXT: ret void +; entry: store i32 %x, ptr %p ret void } -; CHECK-LABEL: @Store4 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i32 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_metadata_ptr_for_store_4(ptr %p) -; CHECK: store i32 -; If the new shadow is non-zero, jump to __msan_chain_origin() -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_chain_origin -; Storing origin here: -; CHECK: store i32 -; CHECK: br label -; CHECK: {{^[0-9]+}}: -; CHECK: store i32 -; CHECK: ret void - define void @Store8(ptr nocapture %p, i64 %x) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define void @Store8( +; CHECK-SAME: ptr captures(none) [[P:%.*]], i64 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 8 +; CHECK-NEXT: [[_MSARG1:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[_MSARG1]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 8 +; CHECK-NEXT: [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[_MSARG_O2]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]] +; CHECK: [[BB11]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB12]] +; CHECK: [[BB12]]: +; CHECK-NEXT: [[TMP13:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_8(ptr [[P]]) +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 1 +; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP14]], align 8 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB16:.*]], label %[[BB21:.*]], !prof [[PROF1]] +; CHECK: [[BB16]]: +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP18]], 32 +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP18]], [[TMP19]] +; CHECK-NEXT: store i64 [[TMP20]], ptr [[TMP15]], align 8 +; CHECK-NEXT: br label %[[BB21]] +; CHECK: [[BB21]]: +; CHECK-NEXT: store i64 [[X]], ptr [[P]], align 8 +; CHECK-NEXT: ret void +; entry: store i64 %x, ptr %p ret void } -; CHECK-LABEL: @Store8 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i64 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_metadata_ptr_for_store_8(ptr %p) -; CHECK: store i64 -; If the new shadow is non-zero, jump to __msan_chain_origin() -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_chain_origin -; Storing origin here: -; CHECK: store i64 -; CHECK: br label -; CHECK: {{^[0-9]+}}: -; CHECK: store i64 -; CHECK: ret void - define void @Store16(ptr nocapture %p, i128 %x) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define void @Store16( +; CHECK-SAME: ptr captures(none) [[P:%.*]], i128 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 8 +; CHECK-NEXT: [[_MSARG1:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[TMP7:%.*]] = load i128, ptr [[_MSARG1]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 8 +; CHECK-NEXT: [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[_MSARG_O2]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]] +; CHECK: [[BB11]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB12]] +; CHECK: [[BB12]]: +; CHECK-NEXT: [[TMP13:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_n(ptr [[P]], i64 16) +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { ptr, ptr } [[TMP13]], 1 +; CHECK-NEXT: store i128 [[TMP7]], ptr [[TMP14]], align 8 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB16:.*]], label %[[BB22:.*]], !prof [[PROF1]] +; CHECK: [[BB16]]: +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP18]], 32 +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP18]], [[TMP19]] +; CHECK-NEXT: store i64 [[TMP20]], ptr [[TMP15]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP15]], i32 1 +; CHECK-NEXT: store i64 [[TMP20]], ptr [[TMP21]], align 8 +; CHECK-NEXT: br label %[[BB22]] +; CHECK: [[BB22]]: +; CHECK-NEXT: store i128 [[X]], ptr [[P]], align 8 +; CHECK-NEXT: ret void +; entry: store i128 %x, ptr %p ret void } -; CHECK-LABEL: @Store16 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i64 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_metadata_ptr_for_store_n(ptr %p, i64 16) -; CHECK: store i128 -; If the new shadow is non-zero, jump to __msan_chain_origin() -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; CHECK: @__msan_chain_origin -; Storing origin here: -; CHECK: store i64 -; CHECK: br label -; CHECK: {{^[0-9]+}}: -; CHECK: store i128 -; CHECK: ret void - - ; Check instrumentation of loads define i8 @Load1(ptr nocapture %p) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define i8 @Load1( +; CHECK-SAME: ptr captures(none) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB6]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_1(ptr [[P]]) +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 1 +; CHECK-NEXT: [[_MSLD:%.*]] = load i8, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: store i8 [[_MSLD]], ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: store i32 [[TMP11]], ptr [[RETVAL_ORIGIN]], align 4 +; CHECK-NEXT: ret i8 [[TMP7]] +; entry: %0 = load i8, ptr %p ret i8 %0 } -; CHECK-LABEL: @Load1 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i64 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; Load the value from %p. This is done before accessing the shadow -; to ease atomic handling. -; CHECK: load i8 -; CHECK: @__msan_metadata_ptr_for_load_1(ptr %p) -; Load the shadow and origin. -; CHECK: load i8 -; CHECK: load i32 - - define i16 @Load2(ptr nocapture %p) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define i16 @Load2( +; CHECK-SAME: ptr captures(none) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB6]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[P]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_2(ptr [[P]]) +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 1 +; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP9]], align 2 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: store i16 [[_MSLD]], ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: store i32 [[TMP11]], ptr [[RETVAL_ORIGIN]], align 4 +; CHECK-NEXT: ret i16 [[TMP7]] +; entry: %0 = load i16, ptr %p ret i16 %0 } -; CHECK-LABEL: @Load2 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i64 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; Load the value from %p. This is done before accessing the shadow -; to ease atomic handling. -; CHECK: load i16 -; CHECK: @__msan_metadata_ptr_for_load_2(ptr %p) -; Load the shadow and origin. -; CHECK: load i16 -; CHECK: load i32 - - define i32 @Load4(ptr nocapture %p) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define i32 @Load4( +; CHECK-SAME: ptr captures(none) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB6]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_4(ptr [[P]]) +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 1 +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[_MSLD]], ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: store i32 [[TMP11]], ptr [[RETVAL_ORIGIN]], align 4 +; CHECK-NEXT: ret i32 [[TMP7]] +; entry: %0 = load i32, ptr %p ret i32 %0 } -; CHECK-LABEL: @Load4 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i64 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; Load the value from %p. This is done before accessing the shadow -; to ease atomic handling. -; CHECK: load i32 -; CHECK: @__msan_metadata_ptr_for_load_4(ptr %p) -; Load the shadow and origin. -; CHECK: load i32 -; CHECK: load i32 - define i64 @Load8(ptr nocapture %p) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define i64 @Load8( +; CHECK-SAME: ptr captures(none) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB6]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[P]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_8(ptr [[P]]) +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 1 +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: store i32 [[TMP11]], ptr [[RETVAL_ORIGIN]], align 4 +; CHECK-NEXT: ret i64 [[TMP7]] +; entry: %0 = load i64, ptr %p ret i64 %0 } -; CHECK-LABEL: @Load8 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i64 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; Load the value from %p. This is done before accessing the shadow -; to ease atomic handling. -; CHECK: load i64 -; CHECK: @__msan_metadata_ptr_for_load_8(ptr %p) -; Load the shadow and origin. -; CHECK: load i64 -; CHECK: load i32 - define i128 @Load16(ptr nocapture %p) nounwind uwtable sanitize_memory { +; CHECK-LABEL: define i128 @Load16( +; CHECK-SAME: ptr captures(none) [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] +; CHECK: [[BB5]]: +; CHECK-NEXT: call void @__msan_warning(i32 [[TMP4]]) #[[ATTR8]] +; CHECK-NEXT: br label %[[BB6]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP7:%.*]] = load i128, ptr [[P]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_n(ptr [[P]], i64 16) +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { ptr, ptr } [[TMP8]], 1 +; CHECK-NEXT: [[_MSLD:%.*]] = load i128, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 8 +; CHECK-NEXT: store i128 [[_MSLD]], ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: store i32 [[TMP11]], ptr [[RETVAL_ORIGIN]], align 4 +; CHECK-NEXT: ret i128 [[TMP7]] +; entry: %0 = load i128, ptr %p ret i128 %0 } -; CHECK-LABEL: @Load16 -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: ptrtoint {{.*}} [[PARAM_SHADOW]] -; Load the shadow of %p and check it -; CHECK: load i64 -; CHECK: icmp -; CHECK: br i1 -; CHECK: {{^[0-9]+}}: -; Load the value from %p. This is done before accessing the shadow -; to ease atomic handling. -; CHECK: load i128 -; CHECK: @__msan_metadata_ptr_for_load_n(ptr %p, i64 16) -; Load the shadow and origin. -; CHECK: load i128 -; CHECK: load i32 - - ; Test kernel-specific va_list instrumentation %struct.__va_list_tag = type { i32, i32, ptr, ptr } @@ -319,6 +506,78 @@ declare dso_local i32 @VAListFn(ptr, ptr) local_unnamed_addr ; Function Attrs: nounwind uwtable define dso_local i32 @VarArgFn(ptr %fmt, ...) local_unnamed_addr sanitize_memory #0 { +; CHECK-LABEL: define dso_local i32 @VarArgFn( +; CHECK-SAME: ptr [[FMT:%.*]], ...) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[_MSARG_O]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VA_ARG_OVERFLOW_SIZE]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 48, [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[TMP6]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP7]], i8 0, i64 [[TMP6]], i1 false) +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP6]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP7]], ptr align 8 [[VA_ARG_SHADOW]], i64 [[TMP8]], i1 false) +; CHECK-NEXT: [[TMP9:%.*]] = alloca i8, i64 [[TMP6]], align 8 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[VA_ARG_ORIGIN]], i64 [[TMP8]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARGS:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @__msan_poison_alloca(ptr [[ARGS]], i64 24, ptr @[[GLOB0:[0-9]+]]) +; CHECK-NEXT: [[TMP10:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_1(ptr [[ARGS]]) +; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { ptr, ptr } [[TMP10]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { ptr, ptr } [[TMP10]], 1 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP11]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP13]], 16 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +; CHECK-NEXT: [[TMP17:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_1(ptr [[TMP16]]) +; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { ptr, ptr } [[TMP17]], 0 +; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { ptr, ptr } [[TMP17]], 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP18]], ptr align 16 [[TMP7]], i64 48, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP19]], ptr align 16 [[TMP9]], i64 48, i1 false) +; CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 8 +; CHECK-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP21]] to ptr +; CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 +; CHECK-NEXT: [[TMP24:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_1(ptr [[TMP23]]) +; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { ptr, ptr } [[TMP24]], 0 +; CHECK-NEXT: [[TMP26:%.*]] = extractvalue { ptr, ptr } [[TMP24]], 1 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP7]], i32 48 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP25]], ptr align 16 [[TMP27]], i64 [[TMP5]], i1 false) +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[TMP9]], i32 48 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP26]], ptr align 16 [[TMP28]], i64 [[TMP5]], i1 false) +; CHECK-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG1:%.*]] = inttoptr i64 [[TMP29]] to ptr +; CHECK-NEXT: store i64 [[TMP2]], ptr [[_MSARG1]], align 8 +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr [[PARAM_ORIGIN]] to i64 +; CHECK-NEXT: [[_MSARG_O2:%.*]] = inttoptr i64 [[TMP30]] to ptr +; CHECK-NEXT: store i32 [[TMP4]], ptr [[_MSARG_O2]], align 4 +; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = add i64 [[TMP31]], 8 +; CHECK-NEXT: [[_MSARG3:%.*]] = inttoptr i64 [[TMP32]] to ptr +; CHECK-NEXT: store i64 0, ptr [[_MSARG3]], align 8 +; CHECK-NEXT: store i32 0, ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @VAListFn(ptr [[FMT]], ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[RETVAL_ORIGIN]], align 4 +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: store i32 [[_MSRET]], ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: store i32 [[TMP33]], ptr [[RETVAL_ORIGIN]], align 4 +; CHECK-NEXT: ret i32 [[CALL]] +; entry: %args = alloca [1 x %struct.__va_list_tag], align 16 call void @llvm.va_start(ptr nonnull %args) @@ -330,52 +589,56 @@ entry: ; Kernel is built without SSE support. attributes #0 = { "target-features"="+fxsr,+x87,-sse" } -; CHECK-LABEL: @VarArgFn -; CHECK: @__msan_get_context_state() -; CHECK: [[VA_ARG_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 2 -; CHECK: [[VA_ARG_ORIGIN:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 3 -; CHECK: [[VA_ARG_OVERFLOW_SIZE:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 4 -; CHECK: [[OSIZE:%[0-9]+]] = load i64, ptr [[VA_ARG_OVERFLOW_SIZE]] ; Register save area is 48 bytes for non-SSE builds. -; CHECK: [[SIZE:%[0-9]+]] = add i64 48, [[OSIZE]] -; CHECK: [[SHADOWS:%[0-9]+]] = alloca i8, i64 [[SIZE]] -; CHECK: call void @llvm.memset{{.*}}(ptr align 8 [[SHADOWS]], i8 0, i64 [[SIZE]], i1 false) -; CHECK: [[COPYSZ:%[0-9]+]] = call i64 @llvm.umin.i64(i64 [[SIZE]], i64 800) -; CHECK: call void @llvm.memcpy{{.*}}(ptr align 8 [[SHADOWS]], ptr align 8 [[VA_ARG_SHADOW]], i64 [[COPYSZ]] -; CHECK: [[ORIGINS:%[0-9]+]] = alloca i8, i64 [[SIZE]] -; CHECK: call void @llvm.memcpy{{.*}}(ptr align 8 [[ORIGINS]], ptr align 8 [[VA_ARG_ORIGIN]], i64 [[COPYSZ]] -; CHECK: call i32 @VAListFn ; Function Attrs: nounwind uwtable define dso_local void @VarArgCaller() local_unnamed_addr sanitize_memory { +; CHECK-LABEL: define dso_local void @VarArgCaller( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__msan_get_context_state() +; CHECK-NEXT: [[PARAM_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: [[VA_ARG_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 2 +; CHECK-NEXT: [[VA_ARG_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 3 +; CHECK-NEXT: [[VA_ARG_OVERFLOW_SIZE:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 4 +; CHECK-NEXT: [[PARAM_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i32 0, i32 6 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[_MSARG:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: store i64 0, ptr [[_MSARG]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[PARAM_SHADOW]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 8 +; CHECK-NEXT: [[_MSARG1:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: store i32 0, ptr [[_MSARG1]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[VA_ARG_SHADOW]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 0 +; CHECK-NEXT: [[_MSARG_VA_S:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[VA_ARG_ORIGIN]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 +; CHECK-NEXT: [[_MSARG_VA_O:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[VA_ARG_SHADOW]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 8 +; CHECK-NEXT: [[_MSARG_VA_S2:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[VA_ARG_ORIGIN]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 8 +; CHECK-NEXT: [[_MSARG_VA_O3:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: store i32 0, ptr [[_MSARG_VA_S2]], align 8 +; CHECK-NEXT: store i32 0, ptr [[_MSARG_VA_O3]], align 8 +; CHECK-NEXT: store i64 0, ptr [[VA_ARG_OVERFLOW_SIZE]], align 8 +; CHECK-NEXT: store i32 0, ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (ptr, ...) @VarArgFn(ptr @.str, i32 123) +; CHECK-NEXT: [[_MSRET:%.*]] = load i32, ptr [[RETVAL_SHADOW]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL_ORIGIN]], align 4 +; CHECK-NEXT: ret void +; entry: %call = tail call i32 (ptr, ...) @VarArgFn(ptr @.str, i32 123) ret void } -; CHECK-LABEL: @VarArgCaller - -; CHECK: entry: -; CHECK: @__msan_get_context_state() -; CHECK: [[PARAM_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 0 -; CHECK: [[VA_ARG_SHADOW:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 2 -; CHECK: [[VA_ARG_OVERFLOW_SIZE:%[a-z0-9_]+]] = getelementptr {{.*}} i32 0, i32 4 - -; CHECK: [[PARAM_SI:%[_a-z0-9]+]] = ptrtoint {{.*}} [[PARAM_SHADOW]] -; CHECK: [[ARG1_S:%[_a-z0-9]+]] = inttoptr i64 [[PARAM_SI]] to ptr -; First argument is initialized -; CHECK: store i64 0, ptr [[ARG1_S]] - -; Dangling cast of va_arg_shadow[0], unused because the first argument is fixed. -; CHECK: [[VA_CAST0:%[_a-z0-9]+]] = ptrtoint {{.*}} [[VA_ARG_SHADOW]] to i64 - -; CHECK: [[VA_CAST1:%[_a-z0-9]+]] = ptrtoint {{.*}} [[VA_ARG_SHADOW]] to i64 -; CHECK: [[ARG1_SI:%[_a-z0-9]+]] = add i64 [[VA_CAST1]], 8 -; CHECK: [[PARG1_S:%[_a-z0-9]+]] = inttoptr i64 [[ARG1_SI]] to ptr - -; Shadow for 123 is 0. -; CHECK: store i32 0, ptr [[ARG1_S]] - -; CHECK: store i64 0, ptr [[VA_ARG_OVERFLOW_SIZE]] -; CHECK: call i32 (ptr, ...) @VarArgFn({{.*}} @.str{{.*}} i32 123) +;. +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} +;. diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/pr161367.ll b/llvm/test/Transforms/CorrelatedValuePropagation/pr161367.ll new file mode 100644 index 0000000..346eaea --- /dev/null +++ b/llvm/test/Transforms/CorrelatedValuePropagation/pr161367.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=correlated-propagation -S | FileCheck %s + +; Make sure that we apply trunc to the edge value of %x. +@g = external global i8 + +define i16 @pr161367(i64 %x) { +; CHECK-LABEL: define i16 @pr161367( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[X]] to i16 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[X]], sub (i64 0, i64 ptrtoint (ptr @g to i64)) +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT:.*]], label %[[ELSE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RET:%.*]] = phi i16 [ trunc (i64 sub (i64 0, i64 ptrtoint (ptr @g to i64)) to i16), %[[ENTRY]] ], [ 0, %[[ELSE]] ] +; CHECK-NEXT: ret i16 [[RET]] +; +entry: + %trunc = trunc i64 %x to i16 + %exitcond = icmp eq i64 %x, sub (i64 0, i64 ptrtoint (ptr @g to i64)) + br i1 %exitcond, label %exit, label %else + +else: + br label %exit + +exit: + %ret = phi i16 [ %trunc, %entry ], [ 0, %else ] + ret i16 %ret +} diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll index 60a4214..8113ba65 100644 --- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -1398,5 +1398,73 @@ define void @assume_nonnull(ptr %p) { ret void } +define void @captures_metadata_address_is_null(ptr %x, ptr %y) { +; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; FNATTRS-LABEL: define void @captures_metadata_address_is_null +; FNATTRS-SAME: (ptr captures(address_is_null) [[X:%.*]], ptr writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) #[[ATTR17]] { +; FNATTRS-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META0:![0-9]+]] +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; ATTRIBUTOR-LABEL: define void @captures_metadata_address_is_null +; ATTRIBUTOR-SAME: (ptr nofree writeonly [[X:%.*]], ptr nofree nonnull writeonly captures(none) [[Y:%.*]]) #[[ATTR13]] { +; ATTRIBUTOR-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META0:![0-9]+]] +; ATTRIBUTOR-NEXT: ret void +; + store ptr %x, ptr %y, !captures !{!"address_is_null"} + ret void +} + +define void @captures_metadata_address(ptr %x, ptr %y) { +; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; FNATTRS-LABEL: define void @captures_metadata_address +; FNATTRS-SAME: (ptr captures(address) [[X:%.*]], ptr writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) #[[ATTR17]] { +; FNATTRS-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META1:![0-9]+]] +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; ATTRIBUTOR-LABEL: define void @captures_metadata_address +; ATTRIBUTOR-SAME: (ptr nofree writeonly [[X:%.*]], ptr nofree nonnull writeonly captures(none) [[Y:%.*]]) #[[ATTR13]] { +; ATTRIBUTOR-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META1:![0-9]+]] +; ATTRIBUTOR-NEXT: ret void +; + store ptr %x, ptr %y, !captures !{!"address"} + ret void +} + +define void @captures_metadata_address_read_provenance(ptr %x, ptr %y) { +; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; FNATTRS-LABEL: define void @captures_metadata_address_read_provenance +; FNATTRS-SAME: (ptr captures(address, read_provenance) [[X:%.*]], ptr writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) #[[ATTR17]] { +; FNATTRS-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META2:![0-9]+]] +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; ATTRIBUTOR-LABEL: define void @captures_metadata_address_read_provenance +; ATTRIBUTOR-SAME: (ptr nofree writeonly [[X:%.*]], ptr nofree nonnull writeonly captures(none) [[Y:%.*]]) #[[ATTR13]] { +; ATTRIBUTOR-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META2:![0-9]+]] +; ATTRIBUTOR-NEXT: ret void +; + store ptr %x, ptr %y, !captures !{!"address", !"read_provenance"} + ret void +} + +define void @captures_metadata_provenance(ptr %x, ptr %y) { +; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; FNATTRS-LABEL: define void @captures_metadata_provenance +; FNATTRS-SAME: (ptr captures(provenance) [[X:%.*]], ptr writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) #[[ATTR17]] { +; FNATTRS-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META3:![0-9]+]] +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; ATTRIBUTOR-LABEL: define void @captures_metadata_provenance +; ATTRIBUTOR-SAME: (ptr nofree writeonly [[X:%.*]], ptr nofree nonnull writeonly captures(none) [[Y:%.*]]) #[[ATTR13]] { +; ATTRIBUTOR-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META3:![0-9]+]] +; ATTRIBUTOR-NEXT: ret void +; + store ptr %x, ptr %y, !captures !{!"provenance"} + ret void +} + declare ptr @llvm.launder.invariant.group.p0(ptr) declare ptr @llvm.strip.invariant.group.p0(ptr) diff --git a/llvm/test/Transforms/GlobalOpt/fastcc.ll b/llvm/test/Transforms/GlobalOpt/fastcc.ll index 854357e..edbd602 100644 --- a/llvm/test/Transforms/GlobalOpt/fastcc.ll +++ b/llvm/test/Transforms/GlobalOpt/fastcc.ll @@ -1,16 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=globalopt -S | FileCheck %s declare token @llvm.call.preallocated.setup(i32) declare ptr @llvm.call.preallocated.arg(token, i32) define internal i32 @f(ptr %m) { -; CHECK-LABEL: define internal fastcc i32 @f +; CHECK-LABEL: define internal fastcc i32 @f( +; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr { +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; %v = load i32, ptr %m ret i32 %v } define internal x86_thiscallcc i32 @g(ptr %m) { -; CHECK-LABEL: define internal fastcc i32 @g +; CHECK-LABEL: define internal fastcc i32 @g( +; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr { +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; %v = load i32, ptr %m ret i32 %v } @@ -18,41 +27,80 @@ define internal x86_thiscallcc i32 @g(ptr %m) { ; Leave this one alone, because the user went out of their way to request this ; convention. define internal coldcc i32 @h(ptr %m) { -; CHECK-LABEL: define internal coldcc i32 @h +; CHECK-LABEL: define internal coldcc i32 @h( +; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr { +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; %v = load i32, ptr %m ret i32 %v } define internal i32 @j(ptr %m) { -; CHECK-LABEL: define internal i32 @j +; CHECK-LABEL: define internal i32 @j( +; CHECK-SAME: ptr [[M:%.*]]) { +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; %v = load i32, ptr %m ret i32 %v } define internal i32 @inalloca(ptr inalloca(i32) %p) { -; CHECK-LABEL: define internal fastcc i32 @inalloca(ptr %p) +; CHECK-LABEL: define internal fastcc i32 @inalloca( +; CHECK-SAME: ptr [[P:%.*]]) unnamed_addr { +; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: ret i32 [[RV]] +; %rv = load i32, ptr %p ret i32 %rv } define i32 @inalloca2_caller(ptr inalloca(i32) %p) { +; CHECK-LABEL: define i32 @inalloca2_caller( +; CHECK-SAME: ptr inalloca(i32) [[P:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[RV:%.*]] = musttail call i32 @inalloca2(ptr inalloca(i32) [[P]]) +; CHECK-NEXT: ret i32 [[RV]] +; %rv = musttail call i32 @inalloca2(ptr inalloca(i32) %p) ret i32 %rv } define internal i32 @inalloca2(ptr inalloca(i32) %p) { ; Because of the musttail caller, this inalloca cannot be dropped. -; CHECK-LABEL: define internal i32 @inalloca2(ptr inalloca(i32) %p) +; CHECK-LABEL: define internal i32 @inalloca2( +; CHECK-SAME: ptr inalloca(i32) [[P:%.*]]) unnamed_addr { +; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: ret i32 [[RV]] +; %rv = load i32, ptr %p ret i32 %rv } define internal i32 @preallocated(ptr preallocated(i32) %p) { -; CHECK-LABEL: define internal fastcc i32 @preallocated(ptr %p) +; CHECK-LABEL: define internal fastcc i32 @preallocated( +; CHECK-SAME: ptr [[P:%.*]]) unnamed_addr { +; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: ret i32 [[RV]] +; %rv = load i32, ptr %p ret i32 %rv } define void @call_things() { +; CHECK-LABEL: define void @call_things() local_unnamed_addr { +; CHECK-NEXT: [[M:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call fastcc i32 @f(ptr [[M]]) +; CHECK-NEXT: [[TMP2:%.*]] = call fastcc i32 @g(ptr [[M]]) +; CHECK-NEXT: [[TMP3:%.*]] = call coldcc i32 @h(ptr [[M]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @j(ptr [[M]]) +; CHECK-NEXT: [[ARGS:%.*]] = alloca inalloca i32, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = call fastcc i32 @inalloca(ptr [[ARGS]]) +; CHECK-NEXT: [[TMP6:%.*]] = call ptr @llvm.stacksave.p0() +; CHECK-NEXT: [[PAARG:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = call fastcc i32 @preallocated(ptr [[PAARG]]) +; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP6]]) +; CHECK-NEXT: ret void +; %m = alloca i32 call i32 @f(ptr %m) call x86_thiscallcc i32 @g(ptr %m) @@ -65,15 +113,25 @@ define void @call_things() { call i32 @preallocated(ptr preallocated(i32) %N) ["preallocated"(token %c)] ret void } -; CHECK-LABEL: define void @call_things() -; CHECK: call fastcc i32 @f -; CHECK: call fastcc i32 @g -; CHECK: call coldcc i32 @h -; CHECK: call i32 @j -; CHECK: call fastcc i32 @inalloca(ptr %args) -; CHECK-NOT: llvm.call.preallocated -; CHECK: call fastcc i32 @preallocated(ptr %paarg) @llvm.used = appending global [1 x ptr] [ - ptr @j + ptr @j ], section "llvm.metadata" + +define internal i32 @assume_fastcc() { +; CHECK-LABEL: define internal fastcc i32 @assume_fastcc() { +; CHECK-NEXT: [[OBJSIZE:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr @assume_fastcc, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret i32 [[OBJSIZE]] +; + %objsize = call i32 @llvm.objectsize.i32.p0(ptr @assume_fastcc, i1 false, i1 false, i1 false) + ret i32 %objsize +} + +define internal i32 @constexpr_self_user() addrspace(1) { +; CHECK-LABEL: define internal fastcc i32 @constexpr_self_user() addrspace(1) { +; CHECK-NEXT: [[OBJSIZE:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr addrspacecast (ptr addrspace(1) @constexpr_self_user to ptr), i1 false, i1 false, i1 false) +; CHECK-NEXT: ret i32 [[OBJSIZE]] +; + %objsize = call i32 @llvm.objectsize.i32.p0(ptr addrspacecast (ptr addrspace(1) @constexpr_self_user to ptr), i1 false, i1 false, i1 false) + ret i32 %objsize +} diff --git a/llvm/test/Transforms/InstCombine/in-freeze-phi.ll b/llvm/test/Transforms/InstCombine/in-freeze-phi.ll new file mode 100644 index 0000000..917d81b --- /dev/null +++ b/llvm/test/Transforms/InstCombine/in-freeze-phi.ll @@ -0,0 +1,274 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=instcombine -S < %s | FileCheck %s + +define i32 @phi_freeze_same_consts(i1 %c0, i1 %c1) { +; CHECK-LABEL: define i32 @phi_freeze_same_consts( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: ret i32 42 +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze i32 undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %final +cB: + br label %final + +final: + %phi = phi i32 [ %f, %bb_freeze ], [ 42, %cA ], [ 42, %cB ] + ret i32 %phi +} + +define i32 @phi_freeze_mixed_consts(i1 %c0, i1 %c1) { +; CHECK-LABEL: define i32 @phi_freeze_mixed_consts( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB_FREEZE]] ], [ 42, %[[CA]] ], [ 7, %[[CB]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze i32 undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %final +cB: + br label %final + +final: + %phi = phi i32 [ %f, %bb_freeze ], [ 42, %cA ], [ 7, %cB ] + ret i32 %phi +} + +define i32 @phi_freeze_with_nonconst_incoming(i32 %x, i1 %c0, i1 %c1) { +; CHECK-LABEL: define i32 @phi_freeze_with_nonconst_incoming( +; CHECK-SAME: i32 [[X:%.*]], i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB_FREEZE]] ], [ [[X]], %[[CA]] ], [ 13, %[[CB]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze i32 undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %final +cB: + br label %final + +final: + %phi = phi i32 [ %f, %bb_freeze ], [ %x, %cA ], [ 13, %cB ] + ret i32 %phi +} + +define <4 x i8> @phi_freeze_vector(i1 %c0, i1 %c1) { +; CHECK-LABEL: define <4 x i8> @phi_freeze_vector( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: ret <4 x i8> splat (i8 9) +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze <4 x i8> undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB + +cA: + br label %final + +cB: + br label %final + +final: + %phi = phi <4 x i8> [ %f, %bb_freeze ], + [<i8 9, i8 9, i8 9, i8 9>, %cA ], + [<i8 9, i8 9, i8 9, i8 9>, %cB ] + ret <4 x i8> %phi +} + +define i32 @multi_use_one_folds_one_not_zero(i1 %c0, i1 %c1, i1 %c2) { +; CHECK-LABEL: define i32 @multi_use_one_folds_one_not_zero( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_OTHER3:.*]], label %[[CC1:.*]] +; CHECK: [[BB_OTHER3]]: +; CHECK-NEXT: br label %[[MID:.*]] +; CHECK: [[CC1]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[MID]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[MID]] +; CHECK: [[MID]]: +; CHECK-NEXT: [[PHI_FOLD:%.*]] = phi i32 [ 0, %[[BB_OTHER3]] ], [ 1, %[[CA]] ], [ 1, %[[CB]] ] +; CHECK-NEXT: br i1 [[C2]], label %[[BB_FREEZE2:.*]], label %[[CD:.*]] +; CHECK: [[BB_FREEZE2]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER2:.*:]] +; CHECK-NEXT: br i1 true, label %[[CA]], label %[[CB]] +; CHECK: [[CC:.*:]] +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CD]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: ret i32 [[PHI_FOLD]] +; +entry: + %f = freeze i32 undef + br i1 %c0, label %bb_freeze, label %bb_other +bb_freeze: + br label %mid +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %mid +cB: + br label %mid +mid: + %phi_no_fold = phi i32 [ %f, %bb_freeze ], [ 1, %cA ], [ 1, %cB ] + br i1 %c2, label %bb_freeze2, label %cD +bb_freeze2: + br label %final +bb_other2: + br i1 %c1, label %cA, label %cB +cC: + br label %final +cD: + br label %final +final: + %phi_fold = phi i32 [ %f, %bb_freeze2 ], [ 0, %cC ], [ 0, %cD ] + %a = add i32 %phi_fold, %phi_no_fold + ret i32 %a +} + +define i32 @phi_freeze_poison(i1 %c0, i1 %c1) { +; CHECK-LABEL: define i32 @phi_freeze_poison( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: ret i32 0 +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze i32 undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %final +cB: + br label %final + +final: + %phi = phi i32 [ %f, %bb_freeze ], [ poison, %cA ], [ poison, %cB ] + ret i32 %phi +} + +define <2 x i32> @phi_freeze_poison_vec(i1 %c0, i1 %c1) { +; CHECK-LABEL: define <2 x i32> @phi_freeze_poison_vec( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: [[PHI:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB_FREEZE]] ], [ <i32 poison, i32 1>, %[[CA]] ], [ <i32 poison, i32 1>, %[[CB]] ] +; CHECK-NEXT: ret <2 x i32> [[PHI]] +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze <2 x i32> undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %final +cB: + br label %final + +final: + %phi = phi <2 x i32> [ %f, %bb_freeze ], [ <i32 poison, i32 1>, %cA ], [ <i32 poison, i32 1>, %cB ] + ret <2 x i32> %phi +} diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll index d34ac2b..85c8ed2 100644 --- a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll +++ b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll @@ -424,6 +424,174 @@ join: ret ptr %phi } +define void @hoist_captures_same(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_same( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META9:![0-9]+]] +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +out: + ret void +} + +define void @hoist_captures_different(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_different( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META10:![0-9]+]] +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"read_provenance"} + br label %out + +out: + ret void +} + +define void @hoist_captures_overlap(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_overlap( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META10]] +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"address", !"read_provenance"} + br label %out + +out: + ret void +} + +define void @hoist_captures_subsume1(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_subsume1( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META9]] +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address_is_null"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +out: + ret void +} + +define void @hoist_captures_subsume2(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_subsume2( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META11:![0-9]+]] +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"provenance"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"read_provenance"} + br label %out + +out: + ret void +} + +define void @hoist_captures_full_set(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_full_set( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8 +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"provenance"} + br label %out + +out: + ret void +} + +define void @hoist_captures_only_one1(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_only_one1( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8 +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +else: + store ptr %x, ptr %y + br label %out + +out: + ret void +} + +define void @hoist_captures_only_one2(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_only_one2( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8 +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +out: + ret void +} + !0 = !{ i8 0, i8 1 } !1 = !{ i8 3, i8 5 } !2 = !{} @@ -445,4 +613,7 @@ join: ; CHECK: [[META6]] = !{float 2.500000e+00} ; CHECK: [[META7]] = !{i32 5, i32 6} ; CHECK: [[META8]] = !{i32 4, i32 5} +; CHECK: [[META9]] = !{!"address"} +; CHECK: [[META10]] = !{!"address", !"read_provenance"} +; CHECK: [[META11]] = !{!"provenance"} ;. diff --git a/llvm/test/Verifier/captures-metadata.ll b/llvm/test/Verifier/captures-metadata.ll new file mode 100644 index 0000000..ae08ddd --- /dev/null +++ b/llvm/test/Verifier/captures-metadata.ll @@ -0,0 +1,37 @@ +; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s + +; CHECK: !captures metadata can only be applied to store instructions +define void @wrong_instr_type(ptr %x) { + load ptr, ptr %x, !captures !{!"address"} + ret void +} + +; CHECK: captures metadata can only be applied to store with value operand of pointer type +define void @wrong_op_type(i32 %x, ptr %y) { + store i32 %x, ptr %y, !captures !{!"address"} + ret void +} + +; CHECK: !captures metadata cannot be empty +define void @empty(ptr %x, ptr %y) { + store ptr %x, ptr %y, !captures !{} + ret void +} + +; CHECK: !captures metadata must be a list of strings +define void @not_string(ptr %x, ptr %y) { + store ptr %x, ptr %y, !captures !{!{}} + ret void +} + +; CHECK: invalid entry in !captures metadata +define void @invalid_str(ptr %x, ptr %y) { + store ptr %x, ptr %y, !captures !{!"foo"} + ret void +} + +; CHECK: invalid entry in !captures metadata +define void @invalid_none(ptr %x, ptr %y) { + store ptr %x, ptr %y, !captures !{!"none"} + ret void +} |